In [1]:
import pandas as pd
from pathlib import Path
input_file = Path.cwd()/'receipts.csv'
df = pd.read_csv(input_file)

## Remove unwanted columns, add blank column and reorder

In [2]:
df.drop(['Given', 'Surname','Income','GST',], axis=1, inplace=True)

In [3]:
df["Cumulative"] = ""

In [4]:
df=df[['Number', 'Paid by', 'Date', 'Paid','Cumulative','Type', 'Payment_Type','Issued_By', 'Merchant Ref', 'Notes']]

In [5]:
df.head()

Unnamed: 0,Number,Paid by,Date,Paid,Cumulative,Type,Payment_Type,Issued_By,Merchant Ref,Notes
0,11009,Brenda Hansell,2022-7-4 13:19:28,160.0,,Income,credit card,Brenda Hansell,CM833380782_Hansell,"Enrolment into ""French For Travellers (L3)"", s..."
1,11010,Narina Sidhu,2022-7-4 14:30:57,40.0,,Income,credit card,Sarah Dunne,CM472538171_Sidhu,Membership Payment for (4-Jul-2022 to 4-Jul-20...
2,11010,Narina Sidhu,2022-7-4 14:30:57,160.0,,Income,credit card,Sarah Dunne,CM472538171_Sidhu,"Enrolment into ""Italian A2 Beginners(L15)"", st..."
3,11011,Janet Murray,2022-7-5 9:24:52,-20.0,,Used,credit account,Danielle Desvaux,,"Enrolment into ""French Beginners Conversation(..."
4,11011,Janet Murray,2022-7-5 9:24:53,160.0,,Income,credit card,Danielle Desvaux,CM735077395_Murray,"Enrolment into ""French Beginners Conversation(..."


Add the column `MYOB Narration` to contain MYOB allocation information

In [6]:
df['MYOB Narration']=""

Populate `MYOB Narration` with the course code or membership type, using a function and lambda

In [7]:
def myobnarr(notes,cost):
    if "Membership" in notes:
        if cost == 15.0:
            return 'Term Membership'
        else:
            return 'Membership' 
    else:
        return notes[notes.find("(")+1:notes.find(")")]

In [8]:
df['MYOB Narration'] = df.apply(lambda x: myobnarr(x['Notes'], x['Paid']), axis=1)

In [9]:
# For the record, this is what we used to do without a lambda function and thus not dealing with membership

In [10]:
# df['Allocate'] = df['Notes'].str.extract(r'\((.*?)\)', expand=False)

Check results

In [11]:
df.head()

Unnamed: 0,Number,Paid by,Date,Paid,Cumulative,Type,Payment_Type,Issued_By,Merchant Ref,Notes,MYOB Narration
0,11009,Brenda Hansell,2022-7-4 13:19:28,160.0,,Income,credit card,Brenda Hansell,CM833380782_Hansell,"Enrolment into ""French For Travellers (L3)"", s...",L3
1,11010,Narina Sidhu,2022-7-4 14:30:57,40.0,,Income,credit card,Sarah Dunne,CM472538171_Sidhu,Membership Payment for (4-Jul-2022 to 4-Jul-20...,Membership
2,11010,Narina Sidhu,2022-7-4 14:30:57,160.0,,Income,credit card,Sarah Dunne,CM472538171_Sidhu,"Enrolment into ""Italian A2 Beginners(L15)"", st...",L15
3,11011,Janet Murray,2022-7-5 9:24:52,-20.0,,Used,credit account,Danielle Desvaux,,"Enrolment into ""French Beginners Conversation(...",L4
4,11011,Janet Murray,2022-7-5 9:24:53,160.0,,Income,credit card,Danielle Desvaux,CM735077395_Murray,"Enrolment into ""French Beginners Conversation(...",L4


For the record, here is how to strip the digits, maybe in the future we can specify allocation

In [12]:
# df['Allocate'] = df['Allocate'].replace('(\d+)','',regex=True)

In [13]:
df

Unnamed: 0,Number,Paid by,Date,Paid,Cumulative,Type,Payment_Type,Issued_By,Merchant Ref,Notes,MYOB Narration
0,11009,Brenda Hansell,2022-7-4 13:19:28,160.0,,Income,credit card,Brenda Hansell,CM833380782_Hansell,"Enrolment into ""French For Travellers (L3)"", s...",L3
1,11010,Narina Sidhu,2022-7-4 14:30:57,40.0,,Income,credit card,Sarah Dunne,CM472538171_Sidhu,Membership Payment for (4-Jul-2022 to 4-Jul-20...,Membership
2,11010,Narina Sidhu,2022-7-4 14:30:57,160.0,,Income,credit card,Sarah Dunne,CM472538171_Sidhu,"Enrolment into ""Italian A2 Beginners(L15)"", st...",L15
3,11011,Janet Murray,2022-7-5 9:24:52,-20.0,,Used,credit account,Danielle Desvaux,,"Enrolment into ""French Beginners Conversation(...",L4
4,11011,Janet Murray,2022-7-5 9:24:53,160.0,,Income,credit card,Danielle Desvaux,CM735077395_Murray,"Enrolment into ""French Beginners Conversation(...",L4
5,11012,Chris Holland,2022-7-5 9:53:23,0.0,,Income,credit account,Danielle Desvaux,,"Enrolment into ""Yoga Tuesday(HW15)"", starting ...",HW15
6,11013,Didi Mudigdo,2022-7-5 9:54:39,0.0,,Income,credit account,Danielle Desvaux,,"Enrolment into ""Yoga Tuesday(HW15)"", starting ...",HW15
7,11014,Danielle Desvaux,2022-7-5 9:55:47,0.0,,Income,credit account,Danielle Desvaux,,"Enrolment into ""Yoga Tuesday(HW15)"", starting ...",HW15
8,11015,Cheryl Davenport,2022-7-5 10:36:30,0.0,,Income,credit card,Cheryl Davenport,,"Enrolment into ""eBooks: 10,000 Library Books i...",CT9
9,11016,Elena Starina,2022-7-5 11:24:43,10.0,,Income,credit card,Elena Starina,CM180086164_Starina,"1 ticket for ""French Movie Classics - Notre ...",WB1a


## Filter out non-credit card transactions
Now for the Boolean indexing. The rows we want to keep are 'credit card' and 'Used'. So the rows we want to remove are 'Deposit'/'Saved' (from the Type column) and 'cash', 'EFT' and 'cheque' (from the Payment_Type) column. Constructing the Boolean index:

In [14]:
cash = df['Payment_Type']=='cash'
cheque = df['Payment_Type']=='cheque'
saved = df['Type']=='Saved'
eft = df['Payment_Type']=='EFT'
deposit = df['Type']=='Deposit'

And now apply and export:

In [15]:
df[~(cash|cheque|eft|saved|deposit)].to_csv('hurrah.csv',index=False)