In [1]:
import pandas as pd
import re

In [2]:
df = pd.read_csv('data/transactions.csv')
df

Unnamed: 0,Date,Description,Amount
0,2025-04-01,"NEWNHAM MAIN TORONTO, ON",5.03
1,2025-03-31,"UBER CANADA/UBEREATS TORONTO, ON",32.54
2,2025-03-28,"NEWNHAM MAIN TORONTO, ON",6.73
3,2025-03-28,"WINGSTOP CANADA 2595 YON NORTH YORK, ON",13.38
4,2025-03-27,"HUDSON'S BAY #1554 NORTH YORK, ON",129.95
...,...,...,...
534,2024-04-01,"TIM HORTONS #4852 BRAMPTON, ON",15.88
535,2024-04-01,"ASHBY FIELD CONVENIENCE BRAMPTON, ON",12.31
536,2024-04-01,"TIM HORTONS #4852 BRAMPTON, ON",13.79
537,2024-04-01,"NEWNHAM MAIN TORONTO, ON",8.74


In [3]:
print(df.shape) # rows, cols
print(df.dtypes) # datatypes
print(df.isnull().sum()) # null values

(539, 3)
Date            object
Description     object
Amount         float64
dtype: object
Date           0
Description    0
Amount         0
dtype: int64


In [4]:
def clean_description(desc):
    desc = desc.lower()
    desc = re.sub(r'[^a-z]', '', desc)
    return desc

In [5]:
# 
df['Date'] = pd.to_datetime(df['Date'])
df['NewDescription'] = df['Description'].apply(clean_description)
df['Amount'] = pd.to_numeric(df['Amount'])

payment_kws = re.compile(r'(payment|thank\s+you|cashback)', re.IGNORECASE)
df['Type'] = df['Description'].apply(lambda x: 'payment' if payment_kws.search(x) else 'purchase')

# axis=1: apply function to rows

# sorting the rows in oldest to newest transactions. 
df.sort_values(by= 'Date', inplace= True)
df.reset_index(drop= True, inplace= True)
df.to_csv('final_transactions.csv', index= False)
df

Unnamed: 0,Date,Description,Amount,NewDescription,Type
0,2024-04-01,"PRESTO FARE/5512K4P7GT TORONTO, ON",4.50,prestofarekpgttorontoon,purchase
1,2024-04-01,"CINEPLEX REC ROOM ROUN TORONTO, ON",31.64,cineplexrecroomrountorontoon,purchase
2,2024-04-01,"KELLYS LANDING #6415 TORONTO, ON",38.34,kellyslandingtorontoon,purchase
3,2024-04-01,"OISHII TEA BRAMPTON, ON",15.42,oishiiteabramptonon,purchase
4,2024-04-01,"NEWNHAM MAIN TORONTO, ON",8.74,newnhammaintorontoon,purchase
...,...,...,...,...,...
534,2025-03-27,"HUDSON'S BAY #1554 NORTH YORK, ON",129.95,hudsonsbaynorthyorkon,purchase
535,2025-03-28,"WINGSTOP CANADA 2595 YON NORTH YORK, ON",13.38,wingstopcanadayonnorthyorkon,purchase
536,2025-03-28,"NEWNHAM MAIN TORONTO, ON",6.73,newnhammaintorontoon,purchase
537,2025-03-31,"UBER CANADA/UBEREATS TORONTO, ON",32.54,ubercanadaubereatstorontoon,purchase
