In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
import os

In [4]:
def read_excel_files_from_folder(folder_path):
    
    # Check if the folder exists
    if not os.path.isdir(folder_path):
        print(f"Error: The folder at '{folder_path}' does not exist.")
        return {}

    excel_data = {}

    # Loop through all files in the specified folder
    for filename in os.listdir(folder_path):
        
        # Check if the file is an Excel file
        if filename.endswith('.xlsx') or filename.endswith('.xls'):
            
            # Construct the full file path
            file_path = os.path.join(folder_path, filename)
            
            try:
                # Read the Excel file into a pandas DataFrame
                df = pd.read_excel(file_path)
                excel_data[filename] = df
            except Exception as e:
                print(f"Could not read {filename}: {e}")
                
    return excel_data

data_folder = 'data'
all_excel_dfs = read_excel_files_from_folder(data_folder)

transactions = pd.concat(all_excel_dfs.values())
transactions['Date'] = pd.to_datetime(transactions['Date'], format='%d/%m/%y', errors='coerce')

transactions = transactions.sort_values('Date', ascending= False)


In [5]:
transactions

Unnamed: 0,Date,Description,Withdrawal,Deposit,Balance
258,2025-07-31,Funds Transfer TOP-UP TO PAYLAH! : MICHAEN,20.00,,877.60
257,2025-07-31,Funds Transfer TOP-UP TO PAYLAH! : 83420415,3.60,,897.60
259,2025-07-31,Interest Earned,,0.07,877.67
256,2025-07-30,Debit Card Transaction MCDONALD'S (SLT) SI SGP...,10.15,,901.20
255,2025-07-30,Funds Transfer TOP-UP TO PAYLAH! : MICHAEN,1.70,,911.35
...,...,...,...,...,...
4,2019-01-31,Interest Earned,,0.01,500.03
3,2019-01-29,Debit Card Transaction GOOGLE*SEGA G. CO 27JAN...,9.98,,500.02
2,2019-01-21,Cash,,500.00,510.00
1,2019-01-19,Funds Transfer 068-78291-0 : I-BANK,40.00,,10.00


In [None]:
# Create a dictionary of rules for withdrawals
withdrawal_rules = {
    'Food & Groceries': ['supermarket', 'bakery', 'restaurant', 'cafe', 'food court'],
    'Transport': ['uber', 'grab', 'bus', 'mrt', 'petrol station'],
    'Bills': ['phone bill', 'electricity', 'internet'],
    'Shopping': ['shopee', 'lazada', 'amazon', 'department store'],
    'Health': ['pharmacy', 'clinic', 'hospital']
}

# Create a dictionary of rules for deposits
deposit_rules = {
    'Salary': ['salary', 'payroll', 'company name'],
    'Bonus': ['bonus', 'incentive'],
    'Investments': ['dividend', 'investment return']
}

In [None]:
def categorize_transaction(description, rules):
    """
    Categorizes a single transaction based on a set of rules.

    Args:
        description (str): The description of the transaction.
        rules (dict): A dictionary of categories and their associated keywords.

    Returns:
        str: The assigned category, or 'Uncategorized' if no match is found.
    """
    if pd.isna(description):
        return 'Uncategorized'
        
    # Check for keywords in a case-insensitive manner
    desc_lower = description.lower()
    for category, keywords in rules.items():
        if any(keyword in desc_lower for keyword in keywords):
            return category
            
    return 'Uncategorized'

In [None]:
# Apply the categorization to create new columns
transactions['Withdrawal_Category'] = transactions.apply(
    lambda row: categorize_transaction(row['descriptions'], withdrawal_rules) if row['withdrawal'] > 0 else 'N/A',
    axis=1
)

transactions['Deposit_Category'] = transactions.apply(
    lambda row: categorize_transaction(row['descriptions'], deposit_rules) if row['deposit'] > 0 else 'N/A',
    axis=1
)

# Display the first few rows with the new categories to verify
print(transactions.head())

In [6]:
total = transactions[transactions['Withdrawal'].isin([90, 100, 110, 120, 130, 140, 150, 160])].sort_values(by = 'Date')
total

Unnamed: 0,Date,Description,Withdrawal,Deposit,Balance
38,2019-06-08,"Cash Withdrawal 01816980,TG PAGAR BR C",100.0,,3113.16
44,2019-06-29,"Cash Withdrawal 01816980,TG PAGAR BR C",100.0,,3029.48
56,2019-09-07,"Cash Withdrawal 01816980,TG PAGAR BR B",100.0,,3954.82
62,2019-09-21,"Cash Withdrawal 01816980,TG PAGAR BR C",120.0,,3118.62
71,2019-10-18,"Cash Withdrawal 01816980,TG PAGAR BR C",100.0,,3971.82
75,2019-11-01,"Cash Withdrawal 01816980,TG PAGAR MRT 1",100.0,,3801.49
85,2019-12-07,"Cash Withdrawal 01816980,TG PAGAR BR C",100.0,,4604.58
88,2019-12-14,"Cash Withdrawal 01816980,TG PAGAR BR B",100.0,,4487.43
95,2019-12-30,"Cash Withdrawal 01816980,TG PAGAR MRT 2",100.0,,4349.5
96,2019-12-31,"Cash Withdrawal 01816980,TG PAGAR BR C",100.0,,4249.5


In [7]:
total['Withdrawal'].sum()

np.float64(6860.0)