In [47]:
import pandas as pd
from datetime import datetime
import os
import shutil
from openpyxl import Workbook, load_workbook
from dateutil.relativedelta import relativedelta

In [None]:
###### CSV Header Names ######
## Global Columns, or one that should be normalized to be global, such as date and cost
category = 'Category'
retailer = 'Description'
date = 'Date'
cost = 'Amount'

## Credit Card CSV Columns
transaction_date = 'Transaction Date'
transaction_cost = 'Debit'

## Bank Account CSV Columns
banking_date = "Date"
banking_cost = "Amount"

## THESE ARE THE NAMES OF GROCERIES AS THEY APPEAR ON THE TRANSACTIONS CSV FILE
grocery_keywords = ['KROGER', 'GIANT', 'SAFEWAY', 'HELLOFRESH', 'WEGMANS', 'FOOD LION']

## Names of Sheets to output in the Excel Workbook
net_sheet_name = "Net"
income_sheet_name = "Income"
expenses_sheet_name = "Expenses"
analysis_sheet_name = "Analysis"

###### Mappings and Lookups ######
banking_to_income_lookup = {
    "SAFECO": "Car Insurance",
    "PAYMENT AT CAPITAL ONE ONLINE PMT": "Credit Card",
    "PAYMENT AT CAPITAL ONE CRCARDPMT": "Credit Card",
    "PAYMENT AT CAPITAL ONE MOBILE PMT" : "Credit Card",
    "PAYMENT AT CAPITAL ONE TRANSFER": "HYSA Transfer",
    "PAYMENT AT SCHWAB": "Investments",
    "SCHWAB BROKERAGE MONEYLINK" : "Investments",
    "PAYMENT AT GOVERNORS GREEN": "Rent",
    "HUMANA PAYROLL": "Payroll",
    "XAVIER UNIVERSIT PAYROLL" : "Payroll",
    "PLANET FIT CLUB FEES" : "Fitness",
    "COMCAST": "Internet",
    "RECURRING PURCHASE AT SPECTRUM" : "Internet",
    "5/3 ONLINE TRANSFER" : "Fifth Third Transfer",
    "ZELLE PMT" : "Zelle",
    "VENMO" : "Venmo",
    "MOBILE DEPOSIT" : "Deposit"
}

In [49]:
print("Spending Tool. Enter start and end date, the location of the CSV transaction files, and if you want an Excel spreadsheet.")
print("Ensure all file headers match what is declared in the Python script")
# date_input = input("Start date (YYYY-MM-DD): ")
date_format = "%Y-%m-%d"
start_date = pd.to_datetime("2025-11-03")
# date_input = input("End date (YYYY-MM-DD): ")
end_date = pd.to_datetime("2025-12-03")
transaction_path = "../../Finances/Transactions"
banking_path = "../../Finances/Banking"
budget_tracking_path = "../../Finances/Official Budget.xlsx"
excel_filename = "budget.xlsx"
# excel = input("Write to Excel Sheet? (Y/N): ")
excel="Y"
if excel == "Y":
    excel = True
else:
    excel = False

Spending Tool. Enter start and end date, the location of the CSV transaction files, and if you want an Excel spreadsheet.
Ensure all file headers match what is declared in the Python script


In [50]:
global income_df, expenses_df, net_df
# # Create a backup of the file at budget_tracking_path
# backup_file_path = budget_tracking_path + ".bak"
# print("Backup file:", backup_file_path)
# shutil.copy2(budget_tracking_path, backup_file_path)
# Read in the file at budget_tracking_path to a series of dataframes
expenses_df = pd.read_excel(budget_tracking_path, sheet_name=expenses_sheet_name)
income_df = pd.read_excel(budget_tracking_path, sheet_name=income_sheet_name)
net_df = pd.DataFrame(columns=['Month', 'Net', 'Income', 'Expenses'])

In [51]:
def sort_df_by_date(df, date_field):
    df[date_field] = pd.to_datetime(df[date_field])
    df = df.sort_values(by=date_field, ascending=True)
    df = df.reset_index(drop=True)
    df[date_field] = df[date_field].dt.strftime('%Y-%m-%d')
    return df

In [52]:
def merge_events(input_file_path, dedup_fields):
    # Ingest CSV lines
    input_events = [os.path.join(input_file_path, f) for f in os.listdir(input_file_path) if os.path.isfile(os.path.join(input_file_path, f))]
    input_df = [pd.read_csv(file) for file in input_events]
    merged_events = pd.concat(input_df).drop_duplicates(subset=dedup_fields)
    # Normalize Date and Amount columns
    if banking_date in merged_events.columns and cost in merged_events.columns:
        merged_events = merged_events.rename(columns={banking_date: date, cost: cost})
    elif transaction_date in merged_events.columns and transaction_cost in merged_events.columns:
        merged_events = merged_events.rename(columns={transaction_date: date, transaction_cost: cost})
    # Remove $0 events
    merged_events = merged_events[merged_events[cost].notna()]
    
    # Fill in Banking categories
    if not category in merged_events.columns:
        print("Adding categories to merged_banking")
        # Populate the 'category' column in filtered_banking based on retailer
        for lookup, category_value in banking_to_income_lookup.items():
            merged_events.loc[merged_events[retailer].str.contains(lookup), category] = category_value
        # Fill in "Other" for events not defined in the lookup
        merged_events.loc[merged_events[category].isna(), category] = "Other"
        merged_events = sort_df_by_date(merged_events, date)
    return merged_events


In [53]:
def filter_events_by_date(start_date, end_date, merged_events):
    filtered_events = merged_events[(pd.to_datetime(merged_events.iloc[:, 0]) >= start_date) & 
                        (pd.to_datetime(merged_events.iloc[:, 0]) <= end_date)].sort_values(by=date, ascending=False) 
    return filtered_events


In [54]:
def enrich_grocery(merged_transactions):
    for keyword in grocery_keywords:
        contains_keyword = merged_transactions[retailer].str.contains(keyword, case=False, na=False)
        not_fuel = ~merged_transactions[retailer].str.contains('FUEL', case=False, na=False)
        if contains_keyword.any() and not_fuel.any():
            merged_transactions.loc[contains_keyword & not_fuel, category] = 'Grocery'
    return merged_transactions

In [55]:
def summarize_transaction_categories(filtered_transactions):
    transaction_to_expenses_lookup = {
        "Gas/Automotive" : "Gas",
        "Health Care" : "Healthcare",
        "Entertainment" : "Other"
    }
    for lookup in transaction_to_expenses_lookup:
        filtered_transactions.loc[filtered_transactions[category] == lookup, category] = transaction_to_expenses_lookup[lookup]
    filtered_transactions.loc[filtered_transactions[category].str.startswith("Other"), category] = "Other"            
    categories_summary = filtered_transactions.groupby(category)[cost].sum().round(2).reset_index()
    return categories_summary

In [56]:
def summarize_banking_categories(filtered_banking):
    # Now the 'category' column in merged_banking should be filled with the corresponding values
    categories_summary = filtered_banking.groupby(category)[banking_cost].sum().round(2).reset_index()
    return categories_summary

In [57]:
def daily_totals(filtered_transactions):
    date_range_df = pd.DataFrame({date: pd.date_range(start=start_date, end=end_date, freq='D')})
    date_range_df[date] = date_range_df[date].dt.date
    filtered_transactions[cost] = filtered_transactions[cost].fillna(0)

    filtered_transactions['Count'] = 0
    date_range_df[cost] = float('nan')
    date_range_df['Count'] = 0

    for index, row in filtered_transactions.iterrows():
        for i, r in date_range_df.iterrows():
            if str(row[date]) == str(r[date]):
                date_range_df.at[i, 'Count'] += 1
                if pd.isna(date_range_df.at[i, cost]):
                    date_range_df.at[i, cost] = row[cost]
                else:
                    date_range_df.at[i, cost] += row[cost]
                    date_range_df.at[i, cost] = date_range_df.at[i, cost].round(decimals=2)
                break
    date_range_df[cost] = date_range_df[cost].fillna(0)   

    return date_range_df

In [58]:
def append_transactions(categories_summary):
    global timeline_transactions_df
    new_row = {"Month": end_date.strftime("%Y-%m-%d")} # One row for new dataframe
    timeline_transactions_df.loc[len(timeline_transactions_df)] = new_row # append new row to dataframe
    # Set each category cost to its category in the row corresponding to end_month
    for index, row in categories_summary.iterrows():
        timeline_transactions_df.loc[timeline_transactions_df['Month'] == end_date.strftime("%Y-%m-%d"), row[category]] = row[cost]
    timeline_transactions_df = sort_df_by_date(timeline_transactions_df, 'Month')
    return timeline_transactions_df

In [59]:
def append_banking(categorized_banking):
    global timeline_banking_df
    new_row = {"Month": end_date.strftime("%Y-%m-%d")} # One row for new dataframe
    timeline_banking_df.loc[len(timeline_banking_df)] = new_row # append new row to dataframe
    # Set each category cost to its category in the row corresponding to end_month
    for index, row in categorized_banking.iterrows():
        timeline_banking_df.loc[timeline_banking_df['Month'] == end_date.strftime("%Y-%m-%d"), row[category]] = row[cost]
    timeline_banking_df = sort_df_by_date(timeline_banking_df, 'Month')
    return timeline_banking_df

In [None]:
def append_net(filtered_transactions, filtered_banking):
    global net_df
    transaction_expenses = filtered_transactions[filtered_transactions[cost] > 0] # positive transaction events lose money
    banking_expenses = filtered_banking[filtered_banking[cost] < 0] # negative banking events lose money
    
    transaction_income = filtered_transactions[filtered_transactions[cost] < 0] # negative transaction events gain money
    banking_income = filtered_banking[filtered_banking[cost] > 0] # positive banking events gain money
            
    # Flip the negative to work with positive numbers
    transaction_income[cost] = transaction_income[cost].abs()
    banking_expenses[cost] = banking_expenses[cost].abs()
    
    transaction_expenses = transaction_expenses[[date, category, cost]]
    banking_expenses = banking_expenses[[date, category, cost]]
    transaction_income = transaction_income[[date, category, cost]]
    banking_income = banking_income[[date, category, cost]]
    
    # Keep transactions here to make date of expense/income accurate; don't apply "Credit Card" category to expenses
    expenses_df = pd.concat(
        [
            transaction_expenses, 
            banking_expenses
        ])
    income_df = pd.concat(
        [
            transaction_income,
            banking_income
        ])
    
    # Exceptions to not count in net
    net_exception_categories = ["HYSA Transfer", "Investments", "Credit Card"]
    expenses_df = expenses_df[~expenses_df[category].isin(net_exception_categories)]
    income_df = income_df[~income_df[category].isin(net_exception_categories)]
    
    # Append Month totals to Net DF
    new_row = {"Month": end_date.strftime("%Y-%m-%d")} # One row for new dataframe
    net_df.loc[len(net_df)] = new_row # append new row to dataframe
    net_df.loc[net_df['Month'] == end_date.strftime("%Y-%m-%d"), "Income"] = income_df[cost].sum()
    net_df.loc[net_df['Month'] == end_date.strftime("%Y-%m-%d"), "Expenses"] = expenses_df[cost].sum()
    net_df.loc[net_df['Month'] == end_date.strftime("%Y-%m-%d"), "Net"] = (income_df[cost].sum() - expenses_df[cost].sum())
    net_df.loc[net_df['Month'] == end_date.strftime("%Y-%m-%d"), "Difference"] = ((expenses_df[cost].sum() / income_df[cost].sum())).round(2)
    net_df = sort_df_by_date(net_df, 'Month')
    return net_df

In [None]:
def analyze_finances(net_df, timeline_banking_df, timeline_transactions_df):
    analysis_df = pd.DataFrame()
    return analysis_df

In [61]:
def export_to_excel(dataframe_sheets):
    with pd.ExcelWriter(excel_filename) as writer:
        keys_list = list(dataframe_sheets.keys())
        for sheet in dataframe_sheets:
            dataframe_sheets[sheet].to_excel(writer, sheet_name=sheet, index=keys_list.index(sheet))

In [None]:
merged_transactions = merge_events(transaction_path, [transaction_cost, retailer, transaction_date])
merged_banking = merge_events(banking_path, [banking_cost, retailer, banking_date])
merged_transactions = enrich_grocery(merged_transactions)

timeline_transactions_df = pd.DataFrame(columns=expenses_df.columns)
timeline_banking_df = pd.DataFrame(columns=income_df.columns)

# For loop to go back X iterations of past Y where past Y was the unit of time to include in this report (likely past month so go back X months, month by month)
def iterate_months(iterations):
    global end_date, filtered_transactions, timeline_transactions_df, daily_transactions, merged_banking
    for i in range(iterations):
        # Get time period (length of unit of time minus 1 day)
        start_date = end_date - relativedelta(months=1) + relativedelta(days=1) # get latest date and go back as far as the size of the unit (likely a month)
        filtered_transactions = filter_events_by_date(start_date, end_date, merged_transactions)
        filtered_banking = filter_events_by_date(start_date, end_date, merged_banking)
        # daily_transactions = daily_totals(filtered_transactions)
        # Expenses
        categorized_transactions = summarize_transaction_categories(filtered_transactions)
        timeline_transactions_df = append_transactions(categorized_transactions)
        # Income
        categorized_banking = summarize_banking_categories(filtered_banking)
        timeline_banking_df = append_banking(categorized_banking)
        # Net
        net_df = append_net(filtered_transactions, filtered_banking)
        end_date = start_date - relativedelta(days=1)

iterate_months(18)
analysis_df = analyze_finances(net_df, timeline_banking_df, timeline_transactions_df)

dataframe_sheets = {
    "Net" : net_df,
    "Analysis" : analysis_df,
    "Transactions": timeline_transactions_df, 
    "Banking" : timeline_banking_df,
    "All Transactions" : merged_transactions,
    "All Banking" : merged_banking
}
if excel == True:
    try:
        export_to_excel(dataframe_sheets)
    except Exception as e:
        print("Failed to create Excel file:", e)
    else:
        print("Excel file created successfully.", excel_filename)
else:
    print()

Adding categories to merged_banking


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  banking_expenses[cost] = banking_expenses[cost].abs()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  banking_expenses[cost] = banking_expenses[cost].abs()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  banking_expenses[cost] = banking_expenses[cost].abs()
A value is trying to be set on a copy of a 

Excel file created successfully. budget.xlsx
