In [221]:
import os
import pandas as pd

In [238]:
folder_path = "statement"

csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

statement_set = set(i[0: 7] for i in csv_files)

statement_set

{'CC-4253', 'DC-9084'}

In [255]:
def get_data(head):
    df_list = []
    for file in csv_files:
        file_path = os.path.join(folder_path, file)
        
        if file[0: 7] == head and file[0: 2] == 'CC':
            df = pd.read_csv(file_path)
            df = df.rename(columns = {'Posted Date': 'Date', 'Payee': 'Description'})
            df_list.append(df[['Date', 'Description', 'Amount']])
        elif file[0: 7] == head and file[0: 2] == 'DC':
            df = pd.read_csv(file_path, skiprows = 6)
            df['Amount'] = df['Amount'].apply(lambda x: str(x).replace(",", "")).astype(float)
            df_list.append(df[['Date', 'Description', 'Amount']])
    
    combined_df = pd.concat(df_list)
    combined_df['Card'] = head
    combined_df["Date"] = pd.to_datetime(combined_df["Date"], format="%m/%d/%Y")
    combined_df = combined_df[combined_df['Amount'].notna()]
    
    combined_df = combined_df.sort_values('Date').reset_index(drop = True)

    return combined_df

In [264]:
CC_Leo_df = get_data('CC-4253')
DC_Leo_df = get_data('DC-9084')
DC_Leo_df = DC_Leo_df[DC_Leo_df['Date'] >= '2025-01-01']

In [265]:
Payee_map = {
    'GEICO': 'Auto: Insurance',
    'AAA CA MEMBERSHIP': 'Auto: Insurance',
    'FARMERS INS': 'Auto: Insurance',
    'CHEVRON': 'Auto: Gas',
    'MISSION FUEL': 'Auto: Gas',
    'COSTCO GAS': 'Auto: Gas',
    'CONSERV FUEL': 'Auto: Gas',
    'ARCO': 'Auto: Gas',
    'NEW CENTURY MAZDA': 'Auto: Maintainence',
    'BELLAGIO EXPRESS': 'Auto: Car Wash',
    'DMV': 'Auto: DMV fee',
    
    'GOOD FORTUNE SUPERMARKET': 'Grocery: GFM',
    '99 RANCH': 'Grocery: 99 Ranch',
    '7-ELEVEN': 'Grocery: 7-ELEVEN',
    'TARGET': 'Grocery: Target',
    'CVS/PHARMACY': 'Grocery: CVS',
    'COSTCO WHSE': 'Grocery: Costco',
    'COSTCO *ANNUAL RENEWAL': 'Grocery: Costco',
    'H MART': 'Grocery: HMart',
    'LITTLE PEACH MEAT': 'Grocery: Meat Shop',
    'VONS': 'Grocery: others',
    'GINSENG': 'Grocery: others',

    'LinkedInPre': 'Study: LinkedIn',
    'UDEMY': 'Study: Udemy',
    'OPENAI': 'Study: ChatGPT',
    'GITHUB': 'Study: GitHub',

    'The UPS Store': 'Logistic: UPS',
    'USPS': 'Logistic: USPS',

    'CITY OF ARCADIA': 'Utility: Water',
    'Spectrum': 'Utility: Spectrum',
    'SO CAL EDISON': 'Utility: Edison',
    'SO CAL GAS': 'Utility: SoCal Gas',
    'LA Co TTC Paymnt': 'Utility: Property Tax',
    
    'Chun La Hao': 'Restaurant: Hotpot',
    'CHI HUO': 'Restaurant: Hotpot',
    '101 POT': 'Restaurant: Hotpot',
    'KUAN ZHAI ALLEY': 'Restaurant: Sichuan Dish',
    'IN-N-OUT': 'Restaurant: Fast Food',
    'HABIT': 'Restaurant: Fast Food',
    'RAMEN': 'Restaurant: Japanese Dish',
    'TOFU HOUSE': 'Restaurant: Korean Dish',
    'LADY M': 'Restaurant: Desert',
    '85C': 'Restaurant: Desert',
    'SHANGHAILANDER': 'Restaurant: Shanghai Dish',

    'APPLE': 'Other: Apple',
    'HOSPITAL': 'Other: Hospital',
    'HOME DEPOT': 'Other: Home Depot',
    'VCN*LOSANGELESCODPH': 'Other: Baby Certificate',
    'CA SOS BPD LOS ANGELES': 'Other: Baby Certificate',
    'BELLA BABY PHOTOGRAPHY': 'Other: Baby Photo',
    'BKOFAMERICA MOBILE': 'Other: Mobile Check',

    'Online payment': 'CC Payback: Money',
    'CASH REWARDS STATEMENT CREDIT': 'CC Payback: Credit',
    'CASHREWARD': 'CC Payback: Credit',

    'DES:PAYROLL ID:XXXXX716960': 'Payroll: Luminys',
    'C185529 LUMINYS': 'Payroll: Luminys',
    'CERTIFY- LUMINYS': 'Payroll: Reimbursement',
    'Money Network DES:': 'Payroll: EDD',
    'IRS': 'Tax: IRS',
    'FRANCHISE TAX BD DES:CASTTAXRFD': 'Tax: CA',
    "Zelle payment to ROGER'S TAX SERVICES LLC": 'Tax: Roger Service',

    'Zelle payment from CHENWEI XU': 'Internal: Transfer',
    'Zelle payment to CHENWEI XU': 'Internal: Transfer',
    'Zelle payment from CHENWEI XU': 'Internal: Transfer',
    'Online Banking transfer to SAV 7913': 'Internal: Investment',
    'Online Banking transfer from SAV 7913': 'Internal: Investment',
    'WIRE TYPE': 'Internal: Wire',
    'Wire Transfer Fee': 'Internal: Wire',

    'Online Banking payment to CRD 4253': 'CC Payback: Money',
    'DISCOVER DES': 'CC Payback: Discover',

    'Zelle payment to YUKAI GAO': 'Rent: CTHD',
    'Zelle payment to XUE SHIMING': 'Rent: Avlon'
}

In [266]:
for keyword, mapped_value in Payee_map.items():
    CC_Leo_df.loc[CC_Leo_df["Description"].str.contains(keyword, case=False, regex=False, na=False), "Type"] = mapped_value

    DC_Leo_df.loc[DC_Leo_df["Description"].str.contains(keyword, case=False, regex=False, na=False), "Type"] = mapped_value

In [269]:
def CC_get(cc_dateset):
    cc_max_i = max(CC_Leo_df[CC_Leo_df['Type'] == 'CC Payback: Money'].index)

    i = 0
    while round(CC_Leo_df.loc[i: cc_max_i]['Amount'].sum(), 2) != 0:
        i += 1

    return CC_Leo_df.loc[i: cc_max_i]

In [273]:
combined_Leo = pd.concat([DC_Leo_df, CC_get(CC_Leo_df)]).sort_values('Date').reset_index(drop = True)

if combined_Leo[combined_Leo['Type'] == 'CC Payback: Money']['Amount'].sum() == 0:
    combined_Leo = combined_Leo[combined_Leo['Type'] != 'CC Payback: Money']

In [277]:
combined_Leo.to_excel("test_Leo.xlsx", index=False)