In [1]:
from datetime import datetime as dt  #to manipulate dates

import numpy as np  #to cleanse data
import openpyxl
import pandas as pd
import plotly.express as px  #to create interactive charts
import plotly.graph_objects as go  #to create interactive charts
from datetime import datetime

In [2]:
dtype_dict = {
    'Description': 'string',
    'Amount': 'float',
    'Type': 'string',
    'Current balance': 'float',
    'Status': 'string'
}

ytd_data = pd.read_csv(
    'D:\\Sean\\Documents\\Personal\\Budget\\YTD_downloads\\2024\\SOFI-Savings.csv', dtype=dtype_dict, parse_dates=['Date']
)


ytd_data['Account'] = 'sofi_savings'
ytd_data.rename(columns={'Date':'Trans. Date','Current balance':'CumAmount'}, inplace=True)

ytd_data['Date'] = pd.to_datetime(ytd_data['Trans. Date'].dt.strftime('%m/%Y'), format='%m/%Y')

In [3]:
ytd_data

Unnamed: 0,Trans. Date,Description,Type,Amount,CumAmount,Status,Account,Date
0,2024-12-31,Interest earned,Interest Earned,12.43,5075.54,Posted,sofi_savings,2024-12-01
1,2024-12-23,Nationwide,Direct Deposit,2989.48,5063.11,Posted,sofi_savings,2024-12-01
2,2024-12-16,DISCOVER,Direct Payment,-2386.27,2073.63,Posted,sofi_savings,2024-12-01
3,2024-12-13,To Checking - 8930,Withdrawal,-1132.00,4459.90,Posted,sofi_savings,2024-12-01
4,2024-12-13,To Checking - 8930,Withdrawal,-155.00,5591.90,Posted,sofi_savings,2024-12-01
...,...,...,...,...,...,...,...,...
123,2024-01-23,To Checking - 8930,Withdrawal,-155.00,2259.86,Posted,sofi_savings,2024-01-01
124,2024-01-16,BOARD OF REGENTS,Direct Payment,-932.00,2414.86,Posted,sofi_savings,2024-01-01
125,2024-01-11,DISCOVER,Direct Payment,-1382.28,3346.86,Posted,sofi_savings,2024-01-01
126,2024-01-09,Nationwide,Direct Deposit,2845.53,4729.14,Posted,sofi_savings,2024-01-01


In [4]:
# Discover Transactions
mapping = {
    # Direct Deposit
    'income': ['PAYROLL', "Nationwide",'Franchise Tax BD', 'IRS  TREAS 310'],
    'interest': ['INTEREST'],
    'school':['BOARD OF REGENTS'],
    'transfer' :['TRANSFER','To Checking - 8930','Discover','WELLS FARGO BANK','WELLS FARGO IFI'],
    'Don': ['Fortiva']

    
    
}
for k, v in mapping.items():
    ytd_data.loc[ytd_data.Description.str.contains('|'.join(v), case=False), 'Short_Desc'] = k
    ytd_data.loc[ytd_data.Description.str.contains("WELLS FARGO BANK", case=False) & (
            ytd_data['Amount'] == -2100), 'Short_Desc'] = 'family'


In [5]:
ytd_data[ytd_data['Short_Desc'].isnull()].sort_values(by='Amount', ascending=False)

Unnamed: 0,Trans. Date,Description,Type,Amount,CumAmount,Status,Account,Date,Short_Desc


In [6]:
ytd_data.loc[ytd_data['Short_Desc'].isnull(), 'Short_Desc'] = 'unsure'

In [7]:
reocurring_list = []
ytd_data.loc[ytd_data.Short_Desc.isin(reocurring_list), 'Reoccurring_Flag'] = 'Yes'
ytd_data.loc[~ytd_data.Short_Desc.isin(reocurring_list), 'Reoccurring_Flag'] = 'No'

mapping = {
    'Housing': [],
    'Transportation': [],
    'Food': [],
    'Insurance': [],
    'Utilities': [],
    'Medical': [],
    'Government': [],
    'Savings': ['income'],
    'Personal_Spending': ['school'],
    'Recreation_Entertainment': [],
    'Misc': [],
    'People': ['family'],
    'Payment_and_Interest': ['interest']
}

for k, v in mapping.items():
    ytd_data.loc[ytd_data.Short_Desc.isin(v), 'Category_2'] = k
    

In [8]:
ytd_data[ytd_data['Category_2'].isnull()].sort_values(by='Description', ascending=False).sort_values(by='Short_Desc',
                                                                                                     ascending=False)

Unnamed: 0,Trans. Date,Description,Type,Amount,CumAmount,Status,Account,Date,Short_Desc,Reoccurring_Flag,Category_2
42,2024-07-28,WELLS FARGO IFI,Deposit,3500.00,5454.07,Posted,sofi_savings,2024-07-01,transfer,No,
28,2024-09-13,To Checking - 8930,Withdrawal,-155.00,3881.65,Posted,sofi_savings,2024-09-01,transfer,No,
104,2024-03-01,To Checking - 8930,Withdrawal,-450.00,585.91,Posted,sofi_savings,2024-03-01,transfer,No,
52,2024-07-21,To Checking - 8930,Withdrawal,-2000.00,2446.82,Posted,sofi_savings,2024-07-01,transfer,No,
47,2024-07-23,To Checking - 8930,Withdrawal,-1600.00,3836.30,Posted,sofi_savings,2024-07-01,transfer,No,
...,...,...,...,...,...,...,...,...,...,...,...
79,2024-05-13,To Checking - 8930,Withdrawal,-155.00,2182.50,Posted,sofi_savings,2024-05-01,transfer,No,
77,2024-05-18,To Checking - 8930,Withdrawal,-1640.00,3531.98,Posted,sofi_savings,2024-05-01,transfer,No,
73,2024-06-01,To Checking - 8930,Withdrawal,-450.00,4718.64,Posted,sofi_savings,2024-06-01,transfer,No,
113,2024-02-08,To Checking - 8930,Withdrawal,-820.00,529.18,Posted,sofi_savings,2024-02-01,transfer,No,


In [9]:
x = ytd_data.groupby(['Reoccurring_Flag']).size().reset_index(name='count')

x.sort_values(by='count', ascending=False)

Unnamed: 0,Reoccurring_Flag,count
0,No,128


In [17]:
ytd_sorted = ytd_data.sort_values(by='Trans. Date').reset_index(drop=True)

# Ensure 'Sort_Date' is also in datetime format for the entire DataFrame
ytd_sorted['Trans. Date'] = pd.to_datetime(ytd_sorted['Trans. Date'])
ytd_sorted['Year'] = ytd_sorted['Trans. Date'].dt.year
ytd_sorted['Counter'] = ytd_sorted.groupby('Short_Desc').cumcount() + 1
ytd_sorted


Unnamed: 0,Trans. Date,Description,Type,Amount,CumAmount,Status,Account,Date,Short_Desc,Reoccurring_Flag,Category_2,Year,Counter
0,2024-01-02,DISCOVER,Direct Payment,-1568.14,1883.61,Posted,sofi_savings,2024-01-01,transfer,No,,2024,1
1,2024-01-09,Nationwide,Direct Deposit,2845.53,4729.14,Posted,sofi_savings,2024-01-01,income,No,Savings,2024,1
2,2024-01-11,DISCOVER,Direct Payment,-1382.28,3346.86,Posted,sofi_savings,2024-01-01,transfer,No,,2024,2
3,2024-01-16,BOARD OF REGENTS,Direct Payment,-932.00,2414.86,Posted,sofi_savings,2024-01-01,school,No,Personal_Spending,2024,1
4,2024-01-23,Nationwide,Direct Deposit,2845.53,5105.39,Posted,sofi_savings,2024-01-01,income,No,Savings,2024,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,2024-12-13,To Checking - 8930,Withdrawal,-155.00,5591.90,Posted,sofi_savings,2024-12-01,transfer,No,,2024,82
124,2024-12-13,To Checking - 8930,Withdrawal,-1132.00,4459.90,Posted,sofi_savings,2024-12-01,transfer,No,,2024,83
125,2024-12-16,DISCOVER,Direct Payment,-2386.27,2073.63,Posted,sofi_savings,2024-12-01,transfer,No,,2024,84
126,2024-12-23,Nationwide,Direct Deposit,2989.48,5063.11,Posted,sofi_savings,2024-12-01,income,No,Savings,2024,28


In [18]:
columns = ['Trans. Date', 'Description', 'Amount', 'Category', 'Short_Desc', 'Reoccurring_Flag',
                     'Category_2', 'Date', 'Year', 'Sort_Date', 'CumAmount', 'Counter', 'Account']
ytd_sorted['Category'] = 'NA'
ytd_sorted['Sort_Date'] = ytd_sorted['Trans. Date']

sofi_svaings = ytd_sorted[columns]

In [19]:
sofi_svaings.to_excel('D:\\Sean\\Documents\\Personal\\Budget\\2024_output\\sofi_savings_Curated_Bills.xlsx')