In [1]:
import pandas as pd
from pathlib import Path
input_file = Path.cwd()/'receipts.csv'
df = pd.read_csv(input_file)

## Remove unwanted columns, add blank column and reorder

In [2]:
df.drop(['Given', 'Surname','Income','GST',], axis=1, inplace=True)

In [3]:
df["Cumulative"] = ""

In [4]:
df=df[['Number', 'Paid by', 'Date', 'Paid','Cumulative','Type', 'Payment_Type','Issued_By', 'Merchant Ref', 'Notes']]

In [5]:
df.head()

Unnamed: 0,Number,Paid by,Date,Paid,Cumulative,Type,Payment_Type,Issued_By,Merchant Ref,Notes
0,10667,Sri Hart,2022-5-9 14:54:27,-10.0,,Refund,cash,Didi Mudigdo,,"Enrolment Fee adjustment for ""The Inquisitive ..."
1,10668,Bev Smith,2022-5-10 15:06:45,10.0,,Income,credit card,Bev Smith,CM794843544_Smith,"1 ticket for ""Floating on the Canals of Europ..."
2,10669,Bev Smith,2022-5-10 15:12:32,10.0,,Income,credit card,Bev Smith,CM533006953_Smith,"1 ticket for ""The Inquisitive Traveller - Eas..."
3,10670,Marion Sarcich,2022-5-11 10:50:04,15.0,,Income,credit card,Didi Mudigdo,CM121290048_Sarcich,Membership Payment for (1-May-2022 to 23-Jul-2...
4,10670,Marion Sarcich,2022-5-11 10:50:04,160.0,,Income,credit card,Didi Mudigdo,CM121290048_Sarcich,"Enrolment into ""French B2 Intermediate Ongoing..."


Add the column `Allocate` to contain MYOB allocation information

In [6]:
df['MYOB Narration']=""

Populate Allocate with the course code or membership type

In [7]:
def myobnarr(notes,cost):
    if "Membership" in notes:
        if cost == 15.0:
            return 'Term Membership'
        else:
            return 'Membership' 
    else:
        return notes[notes.find("(")+1:notes.find(")")]

In [8]:
df['MYOB Narration'] = df.apply(lambda x: myobnarr(x['Notes'], x['Paid']), axis=1)

In [9]:
# For the record, this is what we used to do without a lambda function and thus not dealing with membership

In [10]:
#df['Allocate'] = df['Notes'].str.extract(r'\((.*?)\)', expand=False)

Check results

In [11]:
df.head()

Unnamed: 0,Number,Paid by,Date,Paid,Cumulative,Type,Payment_Type,Issued_By,Merchant Ref,Notes,MYOB Narration
0,10667,Sri Hart,2022-5-9 14:54:27,-10.0,,Refund,cash,Didi Mudigdo,,"Enrolment Fee adjustment for ""The Inquisitive ...","Enrolment Fee adjustment for ""The Inquisitive ..."
1,10668,Bev Smith,2022-5-10 15:06:45,10.0,,Income,credit card,Bev Smith,CM794843544_Smith,"1 ticket for ""Floating on the Canals of Europ...",T32
2,10669,Bev Smith,2022-5-10 15:12:32,10.0,,Income,credit card,Bev Smith,CM533006953_Smith,"1 ticket for ""The Inquisitive Traveller - Eas...",T31b
3,10670,Marion Sarcich,2022-5-11 10:50:04,15.0,,Income,credit card,Didi Mudigdo,CM121290048_Sarcich,Membership Payment for (1-May-2022 to 23-Jul-2...,Term Membership
4,10670,Marion Sarcich,2022-5-11 10:50:04,160.0,,Income,credit card,Didi Mudigdo,CM121290048_Sarcich,"Enrolment into ""French B2 Intermediate Ongoing...",L7


For the record, here is how to strip the digits, maybe in the future we can specify allocation

In [12]:
# df['Allocate'] = df['Allocate'].replace('(\d+)','',regex=True)

In [13]:
df

Unnamed: 0,Number,Paid by,Date,Paid,Cumulative,Type,Payment_Type,Issued_By,Merchant Ref,Notes,MYOB Narration
0,10667,Sri Hart,2022-5-9 14:54:27,-10.0,,Refund,cash,Didi Mudigdo,,"Enrolment Fee adjustment for ""The Inquisitive ...","Enrolment Fee adjustment for ""The Inquisitive ..."
1,10668,Bev Smith,2022-5-10 15:06:45,10.0,,Income,credit card,Bev Smith,CM794843544_Smith,"1 ticket for ""Floating on the Canals of Europ...",T32
2,10669,Bev Smith,2022-5-10 15:12:32,10.0,,Income,credit card,Bev Smith,CM533006953_Smith,"1 ticket for ""The Inquisitive Traveller - Eas...",T31b
3,10670,Marion Sarcich,2022-5-11 10:50:04,15.0,,Income,credit card,Didi Mudigdo,CM121290048_Sarcich,Membership Payment for (1-May-2022 to 23-Jul-2...,Term Membership
4,10670,Marion Sarcich,2022-5-11 10:50:04,160.0,,Income,credit card,Didi Mudigdo,CM121290048_Sarcich,"Enrolment into ""French B2 Intermediate Ongoing...",L7
5,10671,Bell Mackness,2022-5-11 11:13:57,40.0,,Income,credit card,Elfie Collins,CM122913344_Mackness,Membership Payment 5-May-2023 to 5-May-2024,Membership
6,10672,Maxine O'connell,2022-5-11 13:56:16,160.0,,Income,credit card,Danielle Desvaux,CM730090586_O'connell,"Enrolment into ""Spanish B1 Intermediate - Wedn...",L32
7,10673,MMaxine O'connell,2022-5-11 14:05:37,-40.0,,Refund,EFT,Didi Mudigdo,,Membership refund from terminated membership,Membership
8,10674,Maxine O'Connell,2022-5-11 14:06:24,40.0,,Income,cheque,Didi Mudigdo,,Membership Payment 3-Feb-2022 to 3-Feb-2023,Membership
9,10675,MMaxine O'connell,2022-5-11 14:09:13,-160.0,,Refund,EFT,Didi Mudigdo,,"Enrolment Fee adjustment for ""Spanish A2 Begin...","Enrolment Fee adjustment for ""Spanish A2 Begin..."


## Filter out non-credit card transactions
Now for the Boolean indexing. The rows we want to remove are 'Deposit'/'Saved' (from the Type column) and 'cash, 'EFT'/'cash' (from the Payment_Type) column. Constructing the Boolean index:

In [14]:
cash = df['Payment_Type']=='cash'
cheque = df['Payment_Type']=='cheque'
saved = df['Type']=='Saved'
eft = df['Payment_Type']=='EFT'
deposit = df['Type']=='Deposit'

And now apply and export:

In [15]:
df[~(cash|cheque|eft|saved|deposit)].to_csv('hurrah.csv',index=False)