In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Parameters
family_size = 4
total_monthly_income = 6500
categories = ['Housing', 'Food & Dining', 'Transportation', 'Entertainment', 'Healthcare', 'Utilities', 'Others']
today = datetime.today()
start_date = datetime(today.year, 1, 1)
days = (today - start_date).days + 1

# Typical monthly % allocation per category (approximate)
category_distribution = {
    'Housing': 0.30,           # 30%
    'Food & Dining': 0.20,     # 20%
    'Transportation': 0.10,    # 10%
    'Entertainment': 0.10,     # 10%
    'Healthcare': 0.10,        # 10%
    'Utilities': 0.10,         # 10%
    'Others': 0.10             # 10%
}

# Calculate monthly budget per category
monthly_budget_per_category = {cat: total_monthly_income * pct for cat, pct in category_distribution.items()}

# Estimate daily budget per category (assuming uniform daily spend)
daily_budget_per_category = {cat: budget / 30 for cat, budget in monthly_budget_per_category.items()}

# Generate daily expenses with some random variation
data = []
for day in range(days):
    date = start_date + timedelta(days=day)
    daily_expenses = {}
    for cat in categories:
        # Add random variation +/- 20%
        expense = daily_budget_per_category[cat] * random.uniform(0.8, 1.2)
        daily_expenses[cat] = round(expense, 2)
    daily_expenses['Date'] = date
    data.append(daily_expenses)

# Create DataFrame
df_expenses = pd.DataFrame(data)

# Reorder columns
df_expenses = df_expenses[['Date'] + categories]
df_expenses.head(10)


Unnamed: 0,Date,Housing,Food & Dining,Transportation,Entertainment,Healthcare,Utilities,Others
0,2025-01-01,70.0,50.58,17.94,21.67,24.02,22.68,25.84
1,2025-01-02,60.44,42.49,19.63,19.65,24.12,19.82,20.66
2,2025-01-03,73.21,39.59,17.8,21.06,18.38,22.32,19.38
3,2025-01-04,61.57,50.69,23.91,18.85,25.95,21.29,24.1
4,2025-01-05,58.17,43.53,22.17,18.29,19.37,22.0,23.13
5,2025-01-06,73.33,40.9,22.81,24.67,25.36,21.63,18.88
6,2025-01-07,56.74,51.41,17.85,19.34,19.36,25.55,25.3
7,2025-01-08,77.99,51.52,19.56,17.8,18.17,22.15,23.65
8,2025-01-09,60.08,46.25,18.53,23.88,20.24,21.37,21.44
9,2025-01-10,59.98,40.58,20.94,20.84,23.19,22.25,21.51


In [9]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# === PARAMETERS ===
family_size = 4
total_monthly_income = 6500  # in dollars
categories = ['Housing', 'Food & Dining', 'Transportation', 'Entertainment', 'Healthcare', 'Utilities', 'Others']

# === DATE RANGE ===
today = datetime.today()
start_date = datetime(today.year, 1, 1)
days = (today - start_date).days + 1  # YTD

# === RANDOM CATEGORY DISTRIBUTION ===
# Generate random weights that sum to 1.0
random_weights = np.random.rand(len(categories))
random_weights /= random_weights.sum()

# Map categories to random percentages
category_distribution_random = dict(zip(categories, random_weights))

# Show distribution
print("Random Category Allocation (%):")
for cat, pct in category_distribution_random.items():
    print(f"{cat}: {pct*100:.2f}%")

# === BUDGET CALCULATION ===
# Monthly and daily budgets
monthly_budget_per_category = {
    cat: total_monthly_income * pct for cat, pct in category_distribution_random.items()
}
daily_budget_per_category = {
    cat: budget / 30 for cat, budget in monthly_budget_per_category.items()
}

# === GENERATE DAILY EXPENSES ===
data = []
for day in range(days):
    date = start_date + timedelta(days=day)
    daily_expenses = {}
    for cat in categories:
        # Add random daily variation ±20%
        expense = daily_budget_per_category[cat] * random.uniform(0.8, 1.2)
        daily_expenses[cat] = round(expense, 2)
    daily_expenses['Date'] = date
    data.append(daily_expenses)

# === CREATE DATAFRAME ===
df_expenses = pd.DataFrame(data)
df_expenses = df_expenses[['Date'] + categories]


# Add a random family member name to each transaction
family_members = ['Sean (Father)', 'Tina (Mother)', 'Jack (Son)', 'Ruby (Daughter)']

# Reuse previous dataframe
df_expenses_random = df_expenses.copy()

# Randomly assign a family member to each transaction
df_expenses_random['Name'] = [random.choice(family_members) for _ in range(len(df_expenses_random))]

# Reorder columns
cols = ['Date', 'Name'] + categories
df_expenses_random = df_expenses_random[cols]


# === DISPLAY SAMPLE ===
print("\nSample daily expenses (first 5 rows):")
df_expenses_random.head(15)

# === OPTIONAL: Save to CSV ===
#df_expenses.to_csv("family_expenses_YTD_random.csv", index=False)
#print("\nSaved as 'family_expenses_YTD_random.csv'.")


Random Category Allocation (%):
Housing: 1.42%
Food & Dining: 21.01%
Transportation: 19.32%
Entertainment: 19.88%
Healthcare: 0.55%
Utilities: 9.06%
Others: 28.75%

Sample daily expenses (first 5 rows):


Unnamed: 0,Date,Name,Housing,Food & Dining,Transportation,Entertainment,Healthcare,Utilities,Others
0,2025-01-01,Jack (Son),3.31,53.67,43.65,44.72,1.11,23.47,64.69
1,2025-01-02,Tina (Mother),3.32,39.71,50.23,35.42,1.38,19.89,68.61
2,2025-01-03,Jack (Son),2.71,44.33,40.39,43.6,1.32,23.33,73.48
3,2025-01-04,Jack (Son),2.86,46.14,43.53,43.41,1.33,20.08,59.32
4,2025-01-05,Tina (Mother),3.55,39.33,39.69,40.96,1.39,19.28,72.98
5,2025-01-06,Ruby (Daughter),3.38,47.54,43.5,36.06,1.4,16.6,69.71
6,2025-01-07,Sean (Father),3.08,51.39,49.56,46.32,1.37,19.62,55.29
7,2025-01-08,Jack (Son),2.95,43.4,37.79,37.04,1.02,21.06,63.51
8,2025-01-09,Tina (Mother),2.8,36.76,48.31,35.47,1.38,22.06,66.82
9,2025-01-10,Tina (Mother),3.41,41.32,45.1,47.67,1.23,22.07,54.39
