In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
import os
import re

In [56]:
def read_excel_files_from_folder(folder_path):
    
    # Check if the folder exists
    if not os.path.isdir(folder_path):
        print(f"Error: The folder at '{folder_path}' does not exist.")
        return {}

    excel_data = {}

    # Loop through all files in the specified folder
    for filename in os.listdir(folder_path):
        
        # Check if the file is an Excel file
        if filename.endswith('.xlsx') or filename.endswith('.xls'):
            
            # Construct the full file path
            file_path = os.path.join(folder_path, filename)
            
            try:
                # Read the Excel file into a pandas DataFrame
                df = pd.read_excel(file_path)
                excel_data[filename] = df
            except Exception as e:
                print(f"Could not read {filename}: {e}")
                
    return excel_data

data_folder = 'data'
all_excel_dfs = read_excel_files_from_folder(data_folder)

transactions = pd.concat(all_excel_dfs.values())
transactions['Date'] = pd.to_datetime(transactions['Date'], format='%d/%m/%y', errors='coerce')

transactions = transactions.sort_values('Date', ascending= False)


In [62]:
len(transactions)

1425

In [64]:
#transactions[transactions.Description.str.contains('DBSSSGSGBRT')]

In [None]:
# Create a dictionary of rules for withdrawals
withdrawal_rules = {
    'Investment': ['u6473439', 'dep42ff6', 'tiger brokers', 'dep1e098', 'iirgpcsg'],
    'Bank Transfer': ['fast payment / receipt', 'funds transfer', 'remittance', 'trfsb', 'fast'],
    'Cash Withdrawal': ['cash withdrawal'],
    'Aesthetics and Fitness': ['ezypaysgd*anytime', 'chew hwee ping', 'uniqlo', 'overtime athletes', 'myactivesg', 'decathlon', "d'beauty zone pte. ltd."],
    'Food': ['paylah!', "mcdonald's", 'boleh'],
    'Bills': ['singtel', 'mysingtelapp', 'axs', 'google one', 'stbill', 'sp digital', 'simba', 'd2p st bill'],
    'Entertainment': ['netflix.com', 'steam', 'steamgames.com', 'audible', 'google*sega', 'gv'],
    'Groceries': ['ntuc', '7-eleven', 'fairprice', 'cheers', 'donki'],
    'Health': ['medic', 'medical', 'clinic'],
    'Shopping': ['lazada', '2c2p-lazada(non 3d)-ec', 'tiktok shop', 'courts', 'apple.com/sg', 'shopeepay', 'addon systems'],
    'Transport': ['grab', 'transit'],
    'Education': ['nus', 'ntu', 'udemy', 'popular']
}

def categorize_withdrawal(text, rules):

    text_lower = text.lower()

    if 'funds transfer' in text_lower:
    # Check if any food and investment keywords are present.
        hawker_keywords = rules.get('Food', [])
        contains_hawker_keyword = any(keyword in text_lower for keyword in hawker_keywords)

        investment_keywords = rules.get('Investment', [])
        contains_investment_keyword = any(keyword in text_lower for keyword in investment_keywords)

        if contains_hawker_keyword:
            return 'Food'
        
        elif contains_investment_keyword:
            return 'Investment'
        
        else:
            return 'Bank Transfer'



    elif 'paynow' in text_lower:
        health_fitness = rules.get('Aesthetics and Fitness', [])
        contains_health_keyword = any(keyword in text_lower for keyword in health_fitness)

        shopping = rules.get('Shopping', [])
        contains_shopping_keyword = any(keyword in text_lower for keyword in shopping)

        bills = rules.get('Bills', [])
        contains_bills_keyword = any(keyword in text_lower for keyword in bills)

        if contains_health_keyword:
            return 'Aesthetics and Fitness'
        
        elif contains_shopping_keyword:
            return 'Shopping'
        

        elif contains_bills_keyword:
            return 'Bills'

        else:
            return 'Bank Transfer'
        
    
        # Rule 2: Check for all other defined rules.
    for category, keywords in rules.items():
        # Iterate through the list of keywords for each category.
        for keyword in keywords:
            # We use a regex word boundary (\b) to ensure we match whole words,
            # which can prevent accidental matches in other words.
            if re.search(r'\b' + re.escape(keyword) + r'\b', text_lower):
                return category

    # Fallback: If no rules match, return 'Other'.
    return 'Other'


# Create a dictionary of rules for deposits
deposit_rules = {
    'Salary': ['salary', 'talentvis'],
    'Cash Deposit': ['cash deposit', 'cash'],
    'Government': ['mindef', 'gov', 'saf', 'gst voucher', 'govt'],
    'Investment': ['u6473439', 'tiger brokers', 'dbsssgsgbrt', '2752b', '49006'],
    'Bank Transfer': ['fast payment / receipt', 'funds transfer', 'remittance', 'trfsb', 'fast'],
    'Interest': ['interest'],
    'Refund': ['refund', 'debit card transaction']
}

def categorize_deposit(text, rules):
    """
    Categorizes a deposit description based on a set of rules.

    Args:
        text (str): The deposit description to categorize.
        rules (dict): A dictionary of rules where keys are categories and
                      values are lists of keywords.

    Returns:
        str: The category of the deposit.
    """
    text_lower = text.lower()

    # Rule 1: Check for the 'Other GIRO Deposits' condition first, as it's a
    # special case with a negative constraint.
    if 'payments / collections' in text_lower:
        # Check if any government keywords are present.
        gov_keywords = rules.get('Government', [])
        contains_gov_keyword = any(keyword in text_lower for keyword in gov_keywords)

        # If it contains 'payments / collections' AND does NOT contain any
        # government keywords, it's 'Other GIRO Deposits'.
        if not contains_gov_keyword:
            return 'Other GIRO Deposits'
        
    elif 'fast payment / receipt' in text_lower:
        investment_keywords = rules.get('Investment', [])
        contains_investment = any(keyword in text_lower for keyword in investment_keywords)
        if contains_investment:
            return 'Investment'

    # Rule 2: Check for all other defined rules.
    for category, keywords in rules.items():
        # Iterate through the list of keywords for each category.
        for keyword in keywords:
            # We use a regex word boundary (\b) to ensure we match whole words,
            # which can prevent accidental matches in other words.
            if re.search(r'\b' + re.escape(keyword) + r'\b', text_lower):
                return category

    # Fallback: If no rules match, return 'Other'.
    return 'Other'



transactions['Category'] = np.where(
    transactions['Deposit'].notna(),
    transactions['Description'].apply(lambda x: categorize_deposit(x, deposit_rules)),
    np.where(
        transactions['Withdrawal'].notna(),
        transactions['Description'].apply(lambda x: categorize_withdrawal(x, withdrawal_rules)),
        np.nan # Use np.nan for rows that are neither a deposit nor a withdrawal

    )
)


bank_transfer_to_me_stanchart = transactions[(transactions.Description.str.contains('FAST Payment / Receipt SCL:0128119942:I-BANK TRANSFER')) | (transactions.Description.str.contains('0128119942'))]
bank_transfer_to_me_posb = transactions[(transactions.Description.str.contains('FAST Payment / Receipt SG')) | (transactions.Description.str.contains('0128119942') | transactions.Description.str.contains('FROM: HO KAH POH, MICHAEN'))]
combined_bank_transfer_all_me = pd.concat([bank_transfer_to_me_posb, bank_transfer_to_me_stanchart])
transactions = transactions.drop(combined_bank_transfer_all_me.index)

# Task
# Fix the bank transfer from and to me, how come other rows got removed unexpectedly (look into the str.contains() to match properly)

In [63]:
#combined_bank_transfer_all_me

In [6]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

# pd.reset_option('all')

In [10]:
total = transactions[(transactions['Withdrawal'].isin([90, 100, 110, 120, 130, 140, 150, 160])) & (transactions.Category == 'Cash Withdrawal')]

print(total['Withdrawal'].sum())

5690.0
