In [1]:
import numpy as np
import pandas as pd
import os
import glob

In [2]:
subdir = '../expenses'
globPattern = os.path.join(subdir, '*.csv')
columnNames = ['Date', 'Payee', 'Debit', 'Credit', 'Balance']

# Rename CSV files

In [3]:
for file in glob.iglob(globPattern):   
    df = pd.read_csv(file, names=columnNames, header=None)
    month = pd.Timestamp(df['Date'].iloc[0])
    newFile = os.path.join(subdir, f'accountactivity_{month:%Y-%m}.csv')
    os.rename(file, newFile)

# Process the data

In [4]:
df = []

csvFiles = glob.glob(globPattern)
csvFiles.sort()
for file in csvFiles:
    df.append(pd.read_csv(file, names=columnNames, header=None))
    
df = pd.concat(df).reset_index(drop=True)
df['Date'] = pd.to_datetime(df['Date'])
df['Amount'] = -df['Debit'].combine_first(-df['Credit'])

df = df[['Date', 'Payee', 'Amount']]

In [5]:
def categorizer(row) -> str:
    payee = row['Payee'].upper()
    categories = {
        'TOR HYD ELEC': 'Hydro',
        'TOR UTILITY': 'Water',
        'TORONTO TAX': 'Property Tax',
        'WAWANESA INS': 'Insurance',
        'ENBRIDGE': 'Heat'
    }
    for key, value in categories.items():
        if payee.startswith(key):
            return value
    return None

df['Category'] = df.apply(categorizer, axis=1)

# Analysis

Last 16 months

In [9]:
nmonths = 16
oneYearAgo = pd.Timestamp.now() - pd.offsets.DateOffset(months=nmonths)
cond = (df['Date'] >= oneYearAgo) & (df['Category'] != '')
group = df[cond].groupby('Category')

summary = group.sum().assign(
    Monthly = lambda x: x['Amount'] / nmonths,
    Yearly = lambda x: x['Monthly'] * 12,
)[['Yearly', 'Monthly']]

total = summary.sum()
total.name = 'Total'
summary = summary.append(total)

summary.style.format({
    'Yearly': '{:,.2f}',
    'Monthly': '{:,.2f}'
})

Unnamed: 0_level_0,Yearly,Monthly
Category,Unnamed: 1_level_1,Unnamed: 2_level_1
Heat,-1180.93,-98.41
Hydro,-1279.18,-106.6
Insurance,-860.22,-71.69
Property Tax,-3423.48,-285.29
Water,-674.55,-56.21
Total,-7418.36,-618.2


In [7]:
summary.to_excel('home_expenses.xlsx')

# Playground

In [17]:
df.to_excel('./~$expenses.xlsx', index=False)