# Financial Statements Analyzer

By Ken Burchfiel

Released under the MIT license

(More documentation to come!)

In [1]:
import pandas as pd
import numpy as np
import os
import time
import plotly.express as px
start_time = time.time()
import matplotlib.pyplot as plt



In [2]:
expenses_path = 'coded_transactions\\finances_updated_in_python_edited.csv' 
# The above path points to an edited copy of finances_updated_in_python.csv
# in which I filled in missing subcodes.
expense_codes_path = 'sample_finance_codes.csv'
chart_data_folder = 'data_charts'

In [3]:
# pd.set_option('display.max_rows', 300)

In [4]:
expense_codes = pd.read_csv(expense_codes_path).drop('Notes', axis = 1)

expense_codes

Unnamed: 0,Code,Subcode,Code_Description,Subcode_Description
0,A,A-X,ATM,ATM Withdrawals
1,B,B-C,Basics,Convenience Stores
2,B,B-B,Basics,Books
3,B,B-F,Basics,Pets
4,B,B-G,Basics,Gifts
...,...,...,...,...
99,W,W-S,Clothing,Shoes
100,W,W-X,Clothing,Clothing--Other
101,X,X-X,Other,Other Expenses
102,Z,Z-C,Excluded,Credit Card Payments


## Reading in financial records with manual edits:

In [5]:
df_expenses = pd.read_csv(expenses_path)
df_expenses

Unnamed: 0,Date,Amount,Description,Account_Type,Subcode,Month
0,6/15/2022,-1.31,FOREIGN ATM SURCHARGE REBATE,truist_1,A-X,6
1,4/27/2022,13.50,JOES GOURMET DELI ATM NETWORK CASH WITHDRAWAL,truist_1,A-X,4
2,9/29/2022,-0.52,NEW YORK NY JOES GOURMET DELI FOREIGN ATM SURC...,truist_1,A-X,9
3,7/17/2022,0.12,LIBRERIA SAN PABLO 06-14 SEVILLA DEBIT CARD IN...,truist_1,B-B,7
4,4/1/2022,9.40,MAGNIFICAT,amex_1,B-B,4
...,...,...,...,...,...,...
299,5/4/2022,-17.34,ONLINE PAYMENT - THANK YOU,amex_1,Z-C,5
300,7/3/2022,-123.91,ONLINE PAYMENT - THANK YOU,amex_1,Z-C,7
301,10/29/2022,-163.57,ONLINE PAYMENT - THANK YOU,amex_1,Z-C,10
302,9/21/2022,-78.51,ONLINE PAYMENT - THANK YOU,amex_1,Z-C,9


## Merging in expense codes, subcodes, and descriptions:

In [6]:
df_expenses_merged = df_expenses.merge(expense_codes, on = 'Subcode', how = 'left')
subcode_col = df_expenses_merged.columns.get_loc('Subcode')
df_expenses_merged.insert(subcode_col, 'Code', df_expenses_merged.pop('Code')) 
# The above line positions the Code column before the Subcode column.
df_expenses_merged

Unnamed: 0,Date,Amount,Description,Account_Type,Code,Subcode,Month,Code_Description,Subcode_Description
0,6/15/2022,-1.31,FOREIGN ATM SURCHARGE REBATE,truist_1,A,A-X,6,ATM,ATM Withdrawals
1,4/27/2022,13.50,JOES GOURMET DELI ATM NETWORK CASH WITHDRAWAL,truist_1,A,A-X,4,ATM,ATM Withdrawals
2,9/29/2022,-0.52,NEW YORK NY JOES GOURMET DELI FOREIGN ATM SURC...,truist_1,A,A-X,9,ATM,ATM Withdrawals
3,7/17/2022,0.12,LIBRERIA SAN PABLO 06-14 SEVILLA DEBIT CARD IN...,truist_1,B,B-B,7,Basics,Books
4,4/1/2022,9.40,MAGNIFICAT,amex_1,B,B-B,4,Basics,Books
...,...,...,...,...,...,...,...,...,...
299,5/4/2022,-17.34,ONLINE PAYMENT - THANK YOU,amex_1,Z,Z-C,5,Excluded,Credit Card Payments
300,7/3/2022,-123.91,ONLINE PAYMENT - THANK YOU,amex_1,Z,Z-C,7,Excluded,Credit Card Payments
301,10/29/2022,-163.57,ONLINE PAYMENT - THANK YOU,amex_1,Z,Z-C,10,Excluded,Credit Card Payments
302,9/21/2022,-78.51,ONLINE PAYMENT - THANK YOU,amex_1,Z,Z-C,9,Excluded,Credit Card Payments


In [7]:
pd.set_option('display.float_format', str)
# Source: https://stackoverflow.com/a/21140339/13097194

## Monthly spending/receipts per subcode:

In [8]:
df_expenses_by_subcode_by_month = df_expenses_merged.pivot_table(index = ['Month', 'Code', 'Subcode', 'Code_Description', 'Subcode_Description'], values = 'Amount', aggfunc = 'sum').reset_index()
df_expenses_by_subcode_by_month

Unnamed: 0,Month,Code,Subcode,Code_Description,Subcode_Description,Amount
0,1,B,B-C,Basics,Convenience Stores,21.68
1,1,B,B-T,Basics,Local Public Transit,13.75
2,1,D,D-X,Dining,Dining Out,74.33
3,1,F,F-G,Food,Grocery Stores,3.71
4,1,H,H-F,Home,House Furnishings,8.29
...,...,...,...,...,...,...
118,12,H,H-F,Home,House Furnishings,48.39
119,12,H,H-M,Home,Moving,12.52
120,12,J,J-S,Non-Work Income,Investment Account Withdrawals,-155.0
121,12,L,L-X,Charity,Other Charitable Expenses,101.46000000000001


In [9]:
df_expenses_by_subcode_by_year = df_expenses_merged.pivot_table(index = ['Code', 'Subcode', 'Code_Description', 'Subcode_Description'], values = 'Amount', aggfunc = 'sum').reset_index()
df_expenses_by_subcode_by_year

Unnamed: 0,Code,Subcode,Code_Description,Subcode_Description,Amount
0,A,A-X,ATM,ATM Withdrawals,11.67
1,B,B-B,Basics,Books,15.44
2,B,B-C,Basics,Convenience Stores,82.26
3,B,B-G,Basics,Gifts,90.39
4,B,B-R,Basics,Cabs and Rideshare,206.98
5,B,B-T,Basics,Local Public Transit,335.27
6,B,B-X,Basics,Basics--Other,11.28
7,D,D-X,Dining,Dining Out,689.43
8,E,E-S,Entertainment,Sports,22.32
9,E,E-W,Entertainment,Newspapers,12.0


## Creating subsets of these lists that include only selected codes:

In [10]:
df_expenses_by_selected_subcodes_by_month = df_expenses_by_subcode_by_month.query("~Code.isin(['I', 'G', 'J', 'S', 'V', 'Z'])").dropna(subset = 'Code').copy()
df_expenses_by_selected_subcodes_by_month

Unnamed: 0,Month,Code,Subcode,Code_Description,Subcode_Description,Amount
0,1,B,B-C,Basics,Convenience Stores,21.68
1,1,B,B-T,Basics,Local Public Transit,13.75
2,1,D,D-X,Dining,Dining Out,74.33
3,1,F,F-G,Food,Grocery Stores,3.71
4,1,H,H-F,Home,House Furnishings,8.29
...,...,...,...,...,...,...
116,12,F,F-A,Food,Beer/Wine/Liquor,4.73
117,12,F,F-V,Food,Vending Machines,1.57
118,12,H,H-F,Home,House Furnishings,48.39
119,12,H,H-M,Home,Moving,12.52


In [11]:
df_expenses_by_selected_codes_by_month = df_expenses_by_selected_subcodes_by_month.pivot_table(index = ['Month', 'Code', 'Code_Description'], values = 'Amount', aggfunc = 'sum').reset_index()
df_expenses_by_selected_codes_by_month
# Useful for line charts

Unnamed: 0,Month,Code,Code_Description,Amount
0,1,B,Basics,35.43
1,1,D,Dining,74.33
2,1,F,Food,3.71
3,1,H,Home,8.29
4,1,L,Charity,168.99
...,...,...,...,...
68,12,D,Dining,80.64999999999999
69,12,E,Entertainment,8.0
70,12,F,Food,6.300000000000001
71,12,H,Home,60.91


In [12]:
df_expenses_by_selected_subcodes_by_year = df_expenses_by_subcode_by_year.query("~Code.isin(['I', 'G', 'J', 'S', 'V', 'Z'])").dropna(subset = 'Code').copy()
df_expenses_by_selected_subcodes_by_year

Unnamed: 0,Code,Subcode,Code_Description,Subcode_Description,Amount
0,A,A-X,ATM,ATM Withdrawals,11.67
1,B,B-B,Basics,Books,15.44
2,B,B-C,Basics,Convenience Stores,82.26
3,B,B-G,Basics,Gifts,90.39
4,B,B-R,Basics,Cabs and Rideshare,206.98
5,B,B-T,Basics,Local Public Transit,335.27
6,B,B-X,Basics,Basics--Other,11.28
7,D,D-X,Dining,Dining Out,689.43
8,E,E-S,Entertainment,Sports,22.32
9,E,E-W,Entertainment,Newspapers,12.0


In [13]:
sum(df_expenses_by_selected_subcodes_by_year['Amount'])

4463.37

In [14]:
sum(df_expenses_by_selected_subcodes_by_month['Amount'])

4463.369999999999

## Visualizing spending through interactive Plotly bar charts:

In [15]:
# See https://plotly.com/python/bar-charts/
fig_yearly_spending_by_selected_subcodes = px.bar(df_expenses_by_selected_subcodes_by_year, x = 'Code_Description', y = 'Amount', color = 'Subcode_Description')
fig_yearly_spending_by_selected_subcodes.show()

In [16]:
fig_monthly_spending_by_selected_subcodes = px.bar(df_expenses_by_selected_subcodes_by_month, x = 'Month', y = 'Amount', color = 'Subcode_Description')
fig_monthly_spending_by_selected_subcodes.write_html(chart_data_folder + '\\monthly_spending_by_subcode.html') 
fig_monthly_spending_by_selected_subcodes.show()

In [17]:
fig_monthly_spending_by_selected_codes = px.histogram(df_expenses_by_selected_subcodes_by_month, x = 'Month', y = 'Amount', color = 'Code_Description', histfunc = 'sum')
# See https://plotly.com/python/histograms/
fig_monthly_spending_by_selected_codes.update_layout(bargroupgap = 0.2)
fig_monthly_spending_by_selected_codes.write_html(chart_data_folder + '\\fig_monthly_spending_by_selected_codes.html') 
fig_monthly_spending_by_selected_codes.show()

In [18]:
fig_monthly_spending_by_selected_codes_line_chart = px.line(df_expenses_by_selected_codes_by_month, x = 'Month', y = 'Amount', color = 'Code_Description')
# See https://plotly.com/python/histograms/

fig_monthly_spending_by_selected_codes_line_chart.write_html(chart_data_folder + '\\fig_monthly_spending_by_selected_codes_line_chart.html') 
fig_monthly_spending_by_selected_codes_line_chart.show()