# Get Category Transactions
This notebook provides a scratch area to dump the transactions for a provided year and spending group.

As input it uses the PATH_TO_SPENDING_DATA csv file that is created by the extract_spending_and_income.py file

The output of the predict_future_spending.py program includes a table of spending by category year over year with color coding that highlights when a particular years spending deviates from the average or the previous year

This notebook can be useful to do a deep dive into the individual transactions to try to understand the reason for the deviation and possibly catch mis-categorized transaction data

In [101]:
# Set the Year and Spending Group of Interest
YEAR = 2022
SPENDING_GROUP = "Health Care"

# This probably doesn't need to change
year_col = f'{YEAR} Amount'
COLUMNS_OF_INTEREST = ['Description', year_col, 'Category']

In [102]:
import pandas as pd
# Import shared configuration file
import expenses_config as ec
import visualization_methods as vms

# Set option to display all rows
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 200)

# Create a dataframe from the csv with all the spending transactions
df = vms.read_structured_transactions(
    ec.PATH_TO_SPENDING_DATA,
    ec.PATH_TO_YOUR_TRANSACTIONS,
    "Date",
    "spending transaction data",
)
#df.head()

Reading spending transaction data from spending.csv


In [103]:
# Narrow transactions  down to just the year and category of interest
filtered_df = df[(df['Spending Group'] == SPENDING_GROUP) & (df[year_col].notna())]

# Keep only the useful columns
filtered_df = filtered_df[COLUMNS_OF_INTEREST]
filtered_df = filtered_df.rename(columns={'Transaction Type': 'Type', year_col: 'Amount'}).sort_index()
print(filtered_df)
print(f'There were {len(filtered_df)} transactions for '
      f'Total spending on {SPENDING_GROUP} for {YEAR}: {filtered_df.Amount.sum():.2f}')

                                                          Description  Amount                Category
Date                                                                                                 
2022-01-02                                        Amazon - Covid Test   24.99                  Doctor
2022-01-03                                                        CVS    7.54                Pharmacy
2022-01-05                                                        CVS    5.12                Pharmacy
2022-01-05                                                        CVS   13.77                Pharmacy
2022-01-10                Patelco Check 1407 - Sam Gialanella Dentist  196.00                 Dentist
2022-01-10                         KARNER PSYCHOLOGICALGUILDERLAND NY   35.00                  Doctor
2022-01-10                                 Amazon - Nose Hair Trimmer   10.79                Pharmacy
2022-01-11                                        Amazon - Covid Test   17.98     

In [104]:
# Set the 2nd Year of Interest
YEAR = 2023

# This probably doesn't need to change
year_col = f'{YEAR} Amount'
COLUMNS_OF_INTEREST = ['Description', year_col, 'Category']


In [105]:
# Narrow transactions  down to just the year and category of interest
filtered_df = df[(df['Spending Group'] == SPENDING_GROUP) & (df[year_col].notna())]

# Keep only the useful columns
filtered_df = filtered_df[COLUMNS_OF_INTEREST]
filtered_df = filtered_df.rename(columns={'Transaction Type': 'Type', year_col: 'Amount'}).sort_index()
print(filtered_df)
print(f'There were {len(filtered_df)} transactions for '
      f'Total spending on {SPENDING_GROUP} for {YEAR}: {filtered_df.Amount.sum():.2f}')

                                                     Description  Amount                Category
Date                                                                                            
2023-01-04                                                   CVS    3.88                Pharmacy
2023-01-05                                                   CVS   13.26                Pharmacy
2023-01-08                                                   CVS   16.22                Pharmacy
2023-01-16            PATRICK NEWMAN      CLIFTON PARK        NY   49.65  Alternative Healthcare
2023-01-17                      Check # 1020 - Willow Podiatrist  117.29                  Doctor
2023-01-19                      GAETANO GIALANELLA DDS ALBANY NY  212.00                 Dentist
2023-01-20                                                   CVS  100.00                Pharmacy
2023-01-21                                                   CVS   15.40                Pharmacy
2023-01-23                    