In [None]:
import pandas as pd
from datetime import datetime

import IPython
HTML = IPython.display.HTML

import re
import gspread

In [None]:
gc = gspread.service_account(filename='service_account.json')

years = []
years.append(datetime.now().year)
#years.append(2022)

rdfs = []
for year in years:
    sh = gc.open(str(year))
    worksheet = sh.worksheet('USD')
    rdfs.append(pd.DataFrame(worksheet.get_all_records()))
    #rdfs.append(pd.read_excel(f'~/playground/Expenses/{year}.xlsx', sheet_name="USD"))

In [None]:
rdf = pd.concat(rdfs)

rdf["Merchant"] = rdf["Merchant"].str.strip()
rdf["Description"] = rdf["Description"].str.strip()
rdf['Date'] =  pd.DatetimeIndex(rdf.Date)
rdf = rdf.drop(columns=["Notes"])

In [None]:
credits = rdf[(rdf["Transaction Type"] == "credit") & (rdf["Category"] != "Salary")]
credits = credits.assign(Amount=lambda x: x.Amount * -1)
credits['Amount'].sum()

In [None]:
debits = rdf[rdf["Transaction Type"] == "debit"]
debits['Amount'].sum()

In [None]:
ITEMS_REGEX = r"((?P<item>([a-zA-Z0-9\–\'\-\’]+\s*)+)(\((?P<expr>[0-9.+* ]*)\))?)+"

def eval_expr(expr):
    if expr is None:
        return expr
    try: 
        return float(expr)
    except (TypeError, ValueError):
        try:
            return eval(expr)
        except (TypeError, ValueError):
            print(expr)
            return expr

def items(description): # why doesn't this get each row as input ?
    items = []
    for desc in description:
        item = []
        for m in re.finditer(ITEMS_REGEX, desc):
            item.append((m['item'].strip(), eval_expr(m['expr'])))
        items.append(item)
    return items

def item_cost(item):
    return item[1]

def item_name(item):
    return item[0]

In [None]:
debits = debits.assign(Items=debits['Description'])
debits['Items'].fillna(debits['Category'], inplace=True)
debits = debits.drop(columns=['Description', 'Account Name', 'Category'])

In [None]:
debits = debits.assign(Item=items(debits.Items)).explode('Item')
debits = debits.assign(ItemCost = lambda x: item_cost(x.Item.str), ItemName = lambda y: item_name(y.Item.str))
debits['ItemCost'].fillna(debits['Amount'], inplace=True)
debits = debits.drop(columns=['Amount', 'Items', 'Item'])

# with pd.option_context("display.max_rows", 1000):
#    display(HTML(debits.to_html(index=False, header=True)))

In [None]:
df = debits

In [None]:
monthly_report = df.groupby([df.Date.dt.to_period('M'), "ItemName"])['ItemCost'].sum().reset_index(name='ItemCost').sort_values(by=['Date', 'ItemCost'], ascending=False)
pv_table = pd.pivot_table(monthly_report, index = 'ItemName', columns = 'Date', values = 'ItemCost', fill_value=0, dropna=False)
item_report = df.groupby(["ItemName"])['ItemCost'].sum().reset_index(name='ItemCost').sort_values(by=['ItemCost'], ascending=False)
annual_item_report=item_report.merge(pv_table, on='ItemName').sort_values(by=['ItemCost'], ascending=False)

In [None]:
with pd.option_context("display.max_rows", 2000):
    display(HTML(annual_item_report.to_html(index=False, header=True)))

In [None]:
print(debits.ItemCost.sum(), monthly_report.ItemCost.sum())