# Analysis of recent bank activity

- Transactions pulled from 6/1/2022 - 7/16/2022
- Only includes Checking Account Activity

In [3]:
from abc import ABCMeta, abstractmethod
import csv
from dataclasses import dataclass, field
from datetime import datetime
from decimal import Decimal as decimal
from enum import Enum, IntEnum
import re
from typing import Any

from pandas import DataFrame, Series
from rich import print

from models.filters import Filter, Condition, StringMatchCondition, ValueCondition, DateRangeCondition
from models.enums import Category, Column, ColumnIndex, Comparison, Month
from helpers import handle_ex
from constants import DEFAULT_DATA_DIRECTORY, DEFAULT_CLEANSED_DIRECTORY, DEFAULT_PARSED_DIRECTORY, DEFAULT_RAW_DIRECTORY

In [4]:
print(DEFAULT_DATA_DIRECTORY)

## Define Constants for working with transactions

In [None]:
RAW_FILEPATH = f"{DEFAULT_CLEANSED_DIRECTORY}/stmt.csv"
CLEANSED_FILEPATH = f"{DEFAULT_PARSED_DIRECTORY}/stmt.csv"
PARSED_FILEPATH = f"{DEFAULT_RAW_DIRECTORY}/stmt.csv"

COLUMN_NAMES = ["date", "description", "amount", "running balance", "category", "tags", "id"]

## Load and Clean up data
- Remove header rows
- Remove commas
- Convert empty values for amount and balance to 0.0
- Save the clensed data

In [None]:
with open(RAW_FILEPATH, 'r+') as f:
    raw_dataset = [row for row in csv.reader(f, dialect='excel')]
ZERO = decimal(0)
dataset = raw_dataset[8:]
for i, row in enumerate(dataset): 
    month, day, year = row[ColumnIndex.DATE].split("/")
    row[ColumnIndex.DATE] = datetime(int(year), int(month), int(day), 0, 0, 0)
    row.extend([Category.UNCATEGORIZED, [], i+1])
    if row[ColumnIndex.AMOUNT] == "":
        row[ColumnIndex.AMOUNT] = ZERO
    else:
        amount = row[ColumnIndex.AMOUNT].replace(",","")
        row[ColumnIndex.AMOUNT] = decimal(f"{float(amount):2f}")
        if row[ColumnIndex.AMOUNT] > ZERO:(
            row)[ColumnIndex.CATEGORY] = Category.INCOME
        row[ColumnIndex.AMOUNT] = abs(row[ColumnIndex.AMOUNT])
            
    if row[ColumnIndex.RUNNING_BALANCE] == "":
        row[ColumnIndex.RUNNING_BALANCE] = ZERO
    else:
        balance = row[ColumnIndex.RUNNING_BALANCE].replace(",","")
        row[ColumnIndex.RUNNING_BALANCE] = decimal(f"{float(balance):2f}")

with open(CLEANSED_FILEPATH, "w+", newline="") as f:
    writer = csv.writer(f, dialect="excel")
    writer.writerows(dataset)    

## Load The DataFrame

In [None]:
df = DataFrame(dataset, columns=COLUMN_NAMES)
print(f"There are {df.shape[0]} transactions in the dataset")

## Create Filters
- Create list of filters

In [None]:
# AMAZON_SUBFILTER = DescriptionFilter("Amazon", "AMZN")
# VENMO_SUBFILTER = DescriptionFilter("VenmoFilter", "VENMO")
AMAZON_CONDITION = StringMatchCondition("AMZN")
VENMO_CONDITION = StringMatchCondition("AMZN")

filters: list[Filter] = [
    Filter("Fraud", Category.FRAUD, [AMAZON_CONDITION, DateRangeCondition(datetime(2022, 6, 6))]),
    Filter("Fraud", Category.FRAUD, [AMAZON_CONDITION, DateRangeCondition(datetime(2022, 7, 11))]),
    Filter("Fraud", Category.FRAUD, [AMAZON_CONDITION, DateRangeCondition(datetime(2022, 6, 21))]),
    Filter("Chris", Category.WEED, [VENMO_CONDITION, ValueCondition(120.0)]),
]
# filters: list[DescriptionFilter] = [
#     DateRangeFilter(datetime(2022, 6, 5), datetime(2022, 6, 7), AmazonFilter(float(27.87), Category.FRAUD)),
#     DateRangeFilter(datetime(2022, 6, 10), datetime(2022, 7, 12), AmazonFilter(float(8.50), Category.FRAUD)),
#     DateRangeFilter(datetime(2022, 6, 20), datetime(2022, 6, 22), AmazonFilter(float(9.56), Category.FRAUD)),    
#     ValueFilter("Chris", float(120.0), Category.WEED, subfilter=VENMO_SUBFILTER),
#     ValueFilter("Aza", float(15.0), Category.GIFTS, subfilter=VENMO_SUBFILTER),
#     DescriptionFilter("GrubHub", "GRUBHUB", Category.DELIVERY),
#     DescriptionFilter("Snacks", "CHESHIRE GAS", Category.GROCERIES),
#     DescriptionFilter("Steam", "STEAMGAMES", Category.ENTERTAINMENT),
#     DescriptionFilter("McCue-2", "MCCUE", Category.MORTGAGE),
#     DescriptionFilter("AllState", "ALLSTATE", Category.INSURANCE),
#     DescriptionFilter("Gym", "TENNIS", Category.FITNESS),
#     DescriptionFilter("Gym-2", "EDGE\sFITNESS", Category.FITNESS),
#     DescriptionFilter("SavingsDeposit", "transfer\sto\sSAV", Category.SAVINGS),
#     DescriptionFilter("Mortgage", "McCue", Category.MORTGAGE),
#     DescriptionFilter("PSN", "PLAYSTATION", Category.ENTERTAINMENT),
#     DescriptionFilter("Dunkin", "DUNKIN", Category.TAKEOUT),
#     DescriptionFilter("Snacks", "SAM'S\sFOOD", Category.TAKEOUT),
#     DescriptionFilter("KeepTheChange", "KEEP\sTHE\sCHANGE", Category.SAVINGS),
#     DescriptionFilter("SavingsWithdraw", "transfer\sfrom\sSAV", Category.SAVINGS),
#     DescriptionFilter("Comcast", "COMCAST", Category.INTERNET),
#     DescriptionFilter("Att", "ATT\sDES", Category.PHONE),
#     DescriptionFilter("Peapod", "PEAPOD", Category.GROCERIES),
#     DescriptionFilter("Patreon", "PATREON\sMEMBER", Category.ENTERTAINMENT),
#     DescriptionFilter("Juli'sBills", "MAGRATH", Category.LOANS),
#     DescriptionFilter("Juli'sBills-2", "Magrath", Category.LOANS),
#     DescriptionFilter("PetSupplies", "PETCO", Category.PETS),
#     DescriptionFilter("HouseWork", "SM\sMECHANICAL\sSERVICES", Category.GAS),
#     DescriptionFilter("Aresco", "ARESCO", Category.GROCERIES),
#     DescriptionFilter("Sunoco", "SUNOCO", Category.GROCERIES),
#     DescriptionFilter("Affirm", "AFFIRM", Category.LOANS),
#     DescriptionFilter("LifeInsurance", "NEW\sYORK\sLIFE\sDES", Category.INSURANCE),
#     DescriptionFilter("Kindle", "KINDLE", Category.ENTERTAINMENT),
#     DescriptionFilter("YouTube", "YOUTUBE", Category.ENTERTAINMENT),
#     DescriptionFilter("Fuel", "CITGO", Category.AUTO),
#     DescriptionFilter("AMEXCreditCard", "AMERICA\sCREDIT\sCARD", Category.LOANS),
#     DescriptionFilter("MCCreditCard", "PAYPAL\sEXTRAS\sMASTERCARD", Category.LOANS),
#     DescriptionFilter("Bilaton", "STRYVEFOODS", Category.GROCERIES),
#     DescriptionFilter("Theater", "THOMASTON\sOPERA\sHOUSE", Category.ENTERTAINMENT),
#     DescriptionFilter("Electric", "CL&P", Category.ELECTRIC),
#     DescriptionFilter("PriceChopper", "PRICE\sCHOPPER", Category.GROCERIES),
#     DescriptionFilter("IRS", "IRS\sDES", Category.TAXES),
#     DescriptionFilter("Vivint", "VIVINT", Category.INSURANCE),
#     DescriptionFilter("Dropbox", "DROPBOX", Category.INTERNET),
#     DescriptionFilter("VPN", "MOZILLACORP", Category.INTERNET),
#     DescriptionFilter("Fairview", "FAIRVIEW", Category.GIFTS),
#     DescriptionFilter("UpgradeLoad", "UPGRADE", Category.LOANS),
#     DescriptionFilter("Eversource", "EVERSOURCE", Category.GAS),
#     DescriptionFilter("CarLoan", "CAPITAL\sONE\sAUTO", Category.LOANS),
#     DescriptionFilter("Chris", "TO\sCHRIS", Category.WEED),
#     DescriptionFilter("Aza", "TO\sAZA", Category.ENTERTAINMENT),
#     DescriptionFilter("Stryve-2", "STRYVE", Category.GROCERIES),
#     DescriptionFilter("GasStation", "FUEL\sPLUS", Category.AUTO),
#     DescriptionFilter("NetFlix", "Netflix", Category.ENTERTAINMENT), 
#     AmazonFilter(10.62, Category.ENTERTAINMENT), 
#     AmazonFilter(11.29, Category.GROCERIES),
#     AmazonFilter(53.15, Category.ENTERTAINMENT),
#     AmazonFilter(16.84, Category.GROCERIES),
#     AmazonFilter(14.13, Category.WEED),
#     AmazonFilter(26.12, Category.GROCERIES),
#     AmazonFilter(57.40, Category.GROCERIES),
#     AmazonFilter(19.30, Category.GROCERIES),
#     AmazonFilter(180.78, Category.ENTERTAINMENT),
#     AmazonFilter(17.01, Category.ENTERTAINMENT),    
# ]


- Automatically categorize Income as anything > $0.00 
- Run uncategorized transactions against filters, matches get category set by filter

In [None]:
for filter in filters:
    uncategorized = df[df.category == Category.UNCATEGORIZED]
    if uncategorized.size == 0:
        break
    matches = filter.match(uncategorized)
    matches.category = filter.category
    
categorized = df[df.category != Category.UNCATEGORIZED]
uncategorized = df[df.category == Category.UNCATEGORIZED]
print(f"There are {categorized.size} categorized transactions")
print(f"There are {uncategorized.size} uncategorized transactions")


## Save the results

## Analysis of Categorized Transactions

In [None]:
# CATEGORY_HEADER_TEXT = "Category"
# ZERO_TOTAL_TEXT = "$0.00"
# with open(PARSED_FILEPATH, "r+") as f:
#     trx_list = TransactionList.from_json(f.read())

# totals_by_category_by_month: dict[Month, dict[Category, float]] = {}
# months = set([Month(trx.date.month) for trx in trx_list.transactions])
# for month in months:
#     totals_by_category = {}    
#     for trx in trx_list.transactions:    
#         if trx.date.month == month:
#             if trx.category not in totals_by_category.keys():
#                 totals_by_category[trx.category] = trx.amount
#             else:
#                 totals_by_category[trx.category] += trx.amount
#     totals_by_category_by_month[month] = totals_by_category
    
# totals_by_category: dict[Category, dict[Month, float]] = {}
# for month, totals_for_month in totals_by_category_by_month.items():    
#     for category, total in totals_for_month.items():
#         if category not in totals_by_category.keys():
#             totals_by_category[category] = {Month(month): total}
#         else:
#             totals_by_category[category][Month(month)] = total

# header_row: list[str] = [
#     CATEGORY_HEADER_TEXT
# ]
# for month in months:
#     header_row.append(month.name)

# data: list[list[str]] = []
# for category, month_totals in totals_by_category.items():
#     row = [category.value]
#     for month in months:
#         if month in month_totals.keys():            
#             total = month_totals[month]
#             row.append(f"${total:.2f}")
#         else:
#             row.append(ZERO_TOTAL_TEXT)
#     data.append(row)

# DataFrame(data, columns=header_row)