In [18]:
from datetime import datetime as dt  #to manipulate dates

import numpy as np  #to cleanse data
import openpyxl
import pandas as pd
import plotly.express as px  #to create interactive charts
import plotly.graph_objects as go  #to create interactive charts
from datetime import datetime

In [19]:
dtype_dict = {
    'Description': 'string',
    'Amount': 'float',
    'Type': 'string',
    'Current balance': 'float',
    'Status': 'string'
}

ytd_data = pd.read_csv(
    'D:\\Sean\\Documents\\Personal\\Budget\\YTD_downloads\\2024\\SOFI-Checking.csv', dtype=dtype_dict, parse_dates=['Date']
)


ytd_data['Account'] = 'sofi_checking'
ytd_data.rename(columns={'Date':'Trans. Date','Current balance':'CumAmount'}, inplace=True)

ytd_data['Date'] = pd.to_datetime(ytd_data['Trans. Date'].dt.strftime('%m/%Y'), format='%m/%Y')

In [20]:
ytd_data

Unnamed: 0,Trans. Date,Description,Type,Amount,CumAmount,Status,Account,Date
0,2024-12-31,Interest earned,Interest Earned,0.68,1202.43,Posted,sofi_checking,2024-12-01
1,2024-12-27,Zelle® Payment from Kenny Thai,Direct Payment,30.00,1201.75,Posted,sofi_checking,2024-12-01
2,2024-12-20,DISCOVER CARD,Bill Pay,-500.00,1171.75,Posted,sofi_checking,2024-12-01
3,2024-12-17,STATE FARM RO 27,Direct Payment,-108.56,1671.75,Posted,sofi_checking,2024-12-01
4,2024-12-16,Synchrony Bank,Direct Payment,-1131.94,1780.31,Posted,sofi_checking,2024-12-01
...,...,...,...,...,...,...,...,...
118,2024-01-31,Interest earned,Interest Earned,0.04,33.22,Posted,sofi_checking,2024-01-01
119,2024-01-25,VENMO,Direct Payment,-1803.40,33.18,Posted,sofi_checking,2024-01-01
120,2024-01-24,From Savings - 3552,Deposit,1804.00,1836.58,Posted,sofi_checking,2024-01-01
121,2024-01-24,BEST BUY,Direct Payment,-155.00,32.58,Posted,sofi_checking,2024-01-01


In [21]:
# Discover Transactions
mapping = {
    # Direct Deposit
    'interest': ['INTEREST'],
    'transfer' :['TRANSFER','From Savings - 3552','Discover','WELLS FARGO BANK','Venmo','Sean Lewis'],
    'laptop': ['BEST BUY'],
    'student_loan': ['DEPT EDUCATION'],
    'state_farm': ['STATE FARM'],
    'clothing': ['SP RAISED BY SOCIETY'],
    'Jayelin':['Jayelin'],
    'care_credit':['Synchrony'],
    'Don':['Credit One'],
    'Tahjei': ['Peak Living'],
    'Kenny': ['Zelle® Payment from Kenny Thai'],
    'Roundup': ['Roundup'],
    'parking': [r'CITY OF SM PARKPAYBYPH'],
    'fast_food': ['Subway','LIFE CAFE'],
    'groceries': ['Safeway'],
    'everything_store': ['Target'],
    'misc':['AppFolio, Inc.']
}

for k, v in mapping.items():
    ytd_data.loc[ytd_data.Description.str.contains('|'.join(v), case=False), 'Short_Desc'] = k
    ytd_data.loc[ytd_data.Description.str.contains("venmo", case=False) & (
            ytd_data['Amount'] == -816.87), 'Short_Desc'] = 'rent'
    ytd_data.loc[ytd_data.Description.str.contains("venmo", case=False) & (
            ytd_data['Amount'] == -1803.40), 'Short_Desc'] = 'vincent_debt'
    ytd_data.loc[ytd_data.Description.str.contains("venmo", case=False) & (
            ytd_data['Amount'] == -3606.80), 'Short_Desc'] = 'vincent_debt'
    ytd_data.loc[ytd_data.Description.str.contains("venmo", case=False) & (
            ytd_data['Amount'] == -1473.72), 'Short_Desc'] = 'vincent_debt'
    ytd_data.loc[ytd_data.Description.str.contains("venmo", case=False) & (
            ytd_data['Amount'] == -50), 'Short_Desc'] = 'joshua_mitchell'
    ytd_data.loc[ytd_data.Description.str.contains("venmo", case=False) & (
            ytd_data['Amount'] == -30), 'Short_Desc'] = 'huong_nguyen'
    


In [22]:
ytd_data[ytd_data['Short_Desc'].isnull()].sort_values(by='Amount', ascending=False)

Unnamed: 0,Trans. Date,Description,Type,Amount,CumAmount,Status,Account,Date,Short_Desc


In [23]:
ytd_data.loc[ytd_data['Short_Desc'].isnull(), 'Short_Desc'] = 'unsure'

In [24]:
reocurring_list = ['rent','student_loan','laptop', 'vincent_debt','interest']
ytd_data.loc[ytd_data.Short_Desc.isin(reocurring_list), 'Reoccurring_Flag'] = 'Yes'
ytd_data.loc[~ytd_data.Short_Desc.isin(reocurring_list), 'Reoccurring_Flag'] = 'No'

mapping = {
    'Housing': ['rent'],
    'Transportation': [],
    'Food': [],
    'Insurance': [],
    'Utilities': [],
    'Medical': [],
    'Government': ['student_loan'],
    'Savings': ['income'],
    'Personal_Spending': ['laptop','clothing'],
    'Recreation_Entertainment': [],
    'Misc': [],
    'People': ['vincent_debt','joshua_mitchell','huong_nguyen'],
    'Payment_and_Interest': ['interest']
}

for k, v in mapping.items():
    ytd_data.loc[ytd_data.Short_Desc.isin(v), 'Category_2'] = k

In [25]:
ytd_data[ytd_data['Category_2'].isnull()].sort_values(by='Description', ascending=False).sort_values(by='Short_Desc',
                                                                                                     ascending=False)

Unnamed: 0,Trans. Date,Description,Type,Amount,CumAmount,Status,Account,Date,Short_Desc,Reoccurring_Flag,Category_2
55,2024-07-25,Zelle® Payment from Sean Lewis,Direct Payment,5.00,230.38,Posted,sofi_checking,2024-07-01,transfer,No,
108,2024-02-13,From Savings - 3552,Deposit,155.00,174.95,Posted,sofi_checking,2024-02-01,transfer,No,
66,2024-07-08,From Savings - 3552,Deposit,1633.74,2500.36,Posted,sofi_checking,2024-07-01,transfer,No,
67,2024-07-01,From Savings - 3552,Deposit,450.00,866.62,Posted,sofi_checking,2024-07-01,transfer,No,
50,2024-08-13,From Savings - 3552,Deposit,155.00,907.85,Posted,sofi_checking,2024-08-01,transfer,No,
...,...,...,...,...,...,...,...,...,...,...,...
48,2024-08-19,Synchrony Bank,Direct Payment,-460.00,497.85,Posted,sofi_checking,2024-08-01,care_credit,No,
60,2024-07-17,Peak Living Prop,Direct Payment,-833.75,225.38,Posted,sofi_checking,2024-07-01,Tahjei,No,
1,2024-12-27,Zelle® Payment from Kenny Thai,Direct Payment,30.00,1201.75,Posted,sofi_checking,2024-12-01,Kenny,No,
49,2024-08-13,Zelle® Payment from Jayelin Lewis,Direct Payment,50.00,957.85,Posted,sofi_checking,2024-08-01,Jayelin,No,


In [26]:
x = ytd_data.groupby(['Reoccurring_Flag']).size().reset_index(name='count')

x.sort_values(by='count', ascending=False)

Unnamed: 0,Reoccurring_Flag,count
0,No,91
1,Yes,32


In [27]:
ytd_sorted = ytd_data.sort_values(by='Trans. Date').reset_index(drop=True)

# Ensure 'Sort_Date' is also in datetime format for the entire DataFrame
ytd_sorted['Trans. Date'] = pd.to_datetime(ytd_sorted['Trans. Date'])
ytd_sorted['Year'] = ytd_sorted['Trans. Date'].dt.year
ytd_sorted['Counter'] = ytd_sorted.groupby('Short_Desc').cumcount() + 1
ytd_sorted

Unnamed: 0,Trans. Date,Description,Type,Amount,CumAmount,Status,Account,Date,Short_Desc,Reoccurring_Flag,Category_2,Year,Counter
0,2024-01-23,From Savings - 3552,Deposit,155.00,187.58,Posted,sofi_checking,2024-01-01,transfer,No,,2024,1
1,2024-01-24,BEST BUY,Direct Payment,-155.00,32.58,Posted,sofi_checking,2024-01-01,laptop,Yes,Personal_Spending,2024,1
2,2024-01-24,From Savings - 3552,Deposit,1804.00,1836.58,Posted,sofi_checking,2024-01-01,transfer,No,,2024,2
3,2024-01-25,VENMO,Direct Payment,-1803.40,33.18,Posted,sofi_checking,2024-01-01,vincent_debt,Yes,People,2024,1
4,2024-01-31,Interest earned,Interest Earned,0.04,33.22,Posted,sofi_checking,2024-01-01,interest,Yes,Payment_and_Interest,2024,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,2024-12-16,Synchrony Bank,Direct Payment,-1131.94,1780.31,Posted,sofi_checking,2024-12-01,care_credit,No,,2024,7
119,2024-12-17,STATE FARM RO 27,Direct Payment,-108.56,1671.75,Posted,sofi_checking,2024-12-01,state_farm,No,,2024,3
120,2024-12-20,DISCOVER CARD,Bill Pay,-500.00,1171.75,Posted,sofi_checking,2024-12-01,transfer,No,,2024,59
121,2024-12-27,Zelle® Payment from Kenny Thai,Direct Payment,30.00,1201.75,Posted,sofi_checking,2024-12-01,Kenny,No,,2024,1


In [28]:
columns = ['Trans. Date', 'Description', 'Amount', 'Category', 'Short_Desc', 'Reoccurring_Flag',
                     'Category_2', 'Date', 'Year', 'Sort_Date', 'CumAmount', 'Counter', 'Account']
ytd_sorted['Category'] = 'NA'
ytd_sorted['Sort_Date'] = ytd_sorted['Trans. Date']

sofi_checking = ytd_sorted[columns]

In [29]:
sofi_checking.to_excel('D:\\Sean\\Documents\\Personal\\Budget\\2024_output\\sofi_checking_Curated_Bills.xlsx')