In [1]:
import os
import pandas as pd

In [92]:
folder_path = "statement"

csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

statement_set = set(i[0: 7] for i in csv_files)

statement_set

{'CC-4253', 'DC-9084'}

In [191]:
def get_data(head):
    df_list = []
    for file in csv_files:
        file_path = os.path.join(folder_path, file)
        
        if file[0: 7] == head and file[0: 2] == 'CC':
            df = pd.read_csv(file_path)
            df = df.rename(columns = {'Posted Date': 'Date', 'Payee': 'Description'})
            df_list.append(df[['Date', 'Description', 'Amount']])
        elif file[0: 7] == head and file[0: 2] == 'DC':
            df = pd.read_csv(file_path, skiprows = 6)
            df['Amount'] = df['Amount'].apply(lambda x: str(x).replace(",", "")).astype(float)
            df_list.append(df[['Date', 'Description', 'Amount']])
    
    combined_df = pd.concat(df_list)
    combined_df["Date"] = pd.to_datetime(combined_df["Date"], format="%m/%d/%Y")
    combined_df = combined_df[combined_df['Amount'].notna()]
    
    combined_df = combined_df.sort_values('Date').reset_index(drop = True)

    return combined_df

In [192]:
CC_Leo_df = get_data('CC-4253')

In [193]:
DC_Leo_df = get_data('DC-9084')

In [198]:
Payee_map = {
    'GEICO': 'Auto: Insurance',
    'AAA CA MEMBERSHIP': 'Auto: Insurance',
    'FARMERS INS': 'Auto: Insurance',
    'CHEVRON': 'Auto: Gas',
    'MISSION FUEL': 'Auto: Gas',
    'COSTCO GAS': 'Auto: Gas',
    'CONSERV FUEL': 'Auto: Gas',
    'ARCO': 'Auto: Gas',
    'NEW CENTURY MAZDA': 'Auto: Maintainence',
    'BELLAGIO EXPRESS': 'Auto: Car Wash',
    'DMV': 'Auto: DMV fee',
    
    'GOOD FORTUNE SUPERMARKET': 'Grocery: GFM',
    '99 RANCH': 'Grocery: 99 Ranch',
    '7-ELEVEN': 'Grocery: 7-ELEVEN',
    'TARGET': 'Grocery: Target',
    'CVS/PHARMACY': 'Grocery: CVS',
    'COSTCO WHSE': 'Grocery: Costco',
    'COSTCO *ANNUAL RENEWAL': 'Grocery: Costco',
    'H MART': 'Grocery: HMart',
    'LITTLE PEACH MEAT': 'Grocery: Meat Shop',
    'VONS': 'Grocery: others',
    'GINSENG': 'Grocery: others',

    'LinkedInPre': 'Study: LinkedIn',
    'UDEMY': 'Study: Udemy',
    'OPENAI': 'Study: ChatGPT',
    'GITHUB': 'Study: GitHub',

    'The UPS Store': 'Logistic: UPS',
    'USPS': 'Logistic: USPS',

    'CITY OF ARCADIA': 'Utility: Water',
    'Spectrum': 'Utility: Spectrum',
    'SO CAL EDISON': 'Utility: Edison',
    'SO CAL GAS': 'Utility: SoCal Gas',
    'LA Co TTC Paymnt': 'Utility: Property Tax',
    
    'Chun La Hao': 'Restaurant: Hotpot',
    'CHI HUO': 'Restaurant: Hotpot',
    '101 POT': 'Restaurant: Hotpot',
    'KUAN ZHAI ALLEY': 'Restaurant: Sichuan Dish',
    'IN-N-OUT': 'Restaurant: Western Dish',
    'HABIT': 'Restaurant: Western Dish',
    'RAMEN': 'Restaurant: Japanese Dish',
    'TOFU HOUSE': 'Restaurant: Korean Dish',
    'LADY M': 'Restaurant: Desert',
    '85C': 'Restaurant: Desert',
    'SHANGHAILANDER': 'Restaurant: Shanghai Dish',

    'APPLE': 'Other: Apple',
    'HOSPITAL': 'Other: Hospital',
    'HOME DEPOT': 'Other: Home Depot',
    'VCN*LOSANGELESCODPH': 'Other: Baby Certificate',
    'CA SOS BPD LOS ANGELES': 'Other: Baby Certificate',
    'BELLA BABY PHOTOGRAPHY': 'Other: Baby Photo',
    'BKOFAMERICA MOBILE': 'Other: Mobile Check',

    'Online payment': 'CC Payback: Money',
    'CASH REWARDS STATEMENT CREDIT': 'CC Payback: Credit',
    'CASHREWARD': 'CC Payback: Credit',

    'DES:PAYROLL ID:XXXXX716960': 'Payroll: Luminys',
    'C185529 LUMINYS': 'Payroll: Luminys',
    'CERTIFY- LUMINYS': 'Payroll: Reimbursement',
    'Money Network DES:': 'Payroll: EDD',
    'IRS': 'Tax: IRS',
    'FRANCHISE TAX BD DES:CASTTAXRFD': 'Tax: CA',
    "Zelle payment to ROGER'S TAX SERVICES LLC": 'Tax: Roger Service',

    'Zelle payment from CHENWEI XU': 'Internal: Transfer',
    'Zelle payment to CHENWEI XU': 'Internal: Transfer',
    'Zelle payment from CHENWEI XU': 'Internal: Transfer',
    'Online Banking transfer to SAV 7913': 'Internal: Investment',
    'Online Banking transfer from SAV 7913': 'Internal: Investment',
    'WIRE TYPE': 'Internal: Wire',
    'Wire Transfer Fee': 'Internal: Wire',

    'Online Banking payment to CRD 4253': 'CC Payback: Money',
    'DISCOVER DES': 'CC Payback: Discover',

    'Zelle payment to YUKAI GAO': 'Rent: CTHD',
    'Zelle payment to XUE SHIMING': 'Rent: Avlon'
}

In [199]:
for keyword, mapped_value in Payee_map.items():
    CC_Leo_df.loc[CC_Leo_df["Description"].str.contains(keyword, case=False, regex=False, na=False), "Type"] = mapped_value

    DC_Leo_df.loc[DC_Leo_df["Description"].str.contains(keyword, case=False, regex=False, na=False), "Type"] = mapped_value

In [201]:
DC_Leo_df[DC_Leo_df['Type'] == 'CC Payback: Money']

Unnamed: 0,Date,Description,Amount,Type
3,2024-12-30,Online Banking payment to CRD 4253 Confirmatio...,-3702.36,CC Payback: Money
20,2025-02-03,Online Banking payment to CRD 4253 Confirmatio...,-1741.45,CC Payback: Money
21,2025-02-10,Online Banking payment to CRD 4253 Confirmatio...,-1545.82,CC Payback: Money
32,2025-03-03,Online Banking payment to CRD 4253 Confirmatio...,-728.22,CC Payback: Money
45,2025-03-21,Online Banking payment to CRD 4253 Confirmatio...,-38.56,CC Payback: Money
54,2025-05-12,Online Banking payment to CRD 4253 Confirmatio...,-109.47,CC Payback: Money
68,2025-06-23,Online Banking payment to CRD 4253 Confirmatio...,-1202.01,CC Payback: Money
75,2025-07-11,Online Banking payment to CRD 4253 Confirmatio...,-487.69,CC Payback: Money


In [202]:
CC_Leo_df[CC_Leo_df['Type'] == 'CC Payback: Money']

Unnamed: 0,Date,Description,Amount,Type
9,2024-12-28,Online payment from CHK 9084,3702.36,CC Payback: Money
46,2025-02-03,Online payment from CHK 9084,1741.45,CC Payback: Money
54,2025-02-11,Online payment from CHK 9084,1545.82,CC Payback: Money
69,2025-03-03,Online payment from CHK 9084,728.22,CC Payback: Money
75,2025-03-21,Online payment from CHK 9084,38.56,CC Payback: Money
81,2025-05-12,Online payment from CHK 9084,109.47,CC Payback: Money
97,2025-06-23,Online payment from CHK 9084,1202.01,CC Payback: Money
107,2025-07-11,Online payment from CHK 9084,487.69,CC Payback: Money


In [207]:
cc_max_i = 

In [219]:
i = 0

while round(CC_Leo_df.loc[i: max(CC_Leo_df[CC_Leo_df['Type'] == 'CC Payback: Money'].index)]['Amount'].sum(), 2) != 0:
    i += 1

In [118]:
CC_Leo_df[CC_Leo_df['Type'].isna()]['Description'].unique()

array(['COMP OF MD EST CORP 410-2607980 MD',
       'GOV SVC FEE EST CORP 703-8945000 VA',
       'WL *VUE*Testing Exam 953-6813000 MN'], dtype=object)

In [119]:
CC_Leo_df[CC_Leo_df['Type'].isna()]

Unnamed: 0,Date,Description,Amount,Type
0,2024-12-20,COMP OF MD EST CORP 410-2607980 MD,-200.0,
1,2024-12-20,GOV SVC FEE EST CORP 703-8945000 VA,-4.9,
74,2025-03-18,WL *VUE*Testing Exam 953-6813000 MN,-83.0,
76,2025-03-22,WL *VUE*Testing Exam 953-6813000 MN,83.0,


Unnamed: 0,Date,Description,Amount,Type
0,2024-12-23,Zelle payment from SHANSHAN ZHANG for return t...,1200.00,
1,2024-12-26,IRS TREAS 310 DES: TAX REF ID:XXXXXXXXXX00918 ...,1400.00,
3,2024-12-30,Online Banking payment to CRD 4253 Confirmatio...,-3702.36,
4,2025-01-02,Zelle payment from CHENG WANG Conf# k0lshzxq2,500.00,
6,2025-01-13,BKOFAMERICA MOBILE 01/13 XXXXX97559 DEPOSIT *M...,91.29,
...,...,...,...,...
76,2025-07-14,Zelle payment to Chenwei Xu Conf# cv00tvvtx,-2000.00,
77,2025-07-14,Zelle payment to Chenwei Xu Conf# ivfar1btj,-1000.00,
78,2025-07-14,DISCOVER DES:E-PAYMENT ID:5257 INDN:PENG NINGC...,-27.92,
79,2025-07-17,Online Banking transfer from SAV 7913 Confirma...,0.19,


In [100]:
CC_Leo_df.to_excel("CC_Leo.xlsx", index=False)

In [102]:
DC_Leo_df.to_excel("DC_Leo.xlsx", index=False)