In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set style for better visualizations
plt.style.use('default')
sns.set_palette("husl")

print("Libraries loaded successfully!")

In [ ]:
# Load the budget data
df = pd.read_csv('data/budget.csv')

print("Dataset Shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())
print("\nDataset Info:")
print(df.info())
print("\nBasic Statistics:")
print(df.describe())

In [ ]:
# Data preprocessing and feature engineering
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['weekday'] = df['date'].dt.day_name()
df['hour'] = df['date'].dt.hour

print("Data types after preprocessing:")
print(df.dtypes)
print("\nMissing values:")
print(df.isnull().sum())
print("\nCategories in dataset:")
print(df['category'].value_counts())
print("\nDate range:")
print(f"From: {df['date'].min()} To: {df['date'].max()}")

In [ ]:
# Comprehensive visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# 1. Spending by category
category_spending = df.groupby('category')['amount'].sum().sort_values(ascending=False)
axes[0,0].pie(category_spending.values, labels=category_spending.index, autopct='%1.1f%%')
axes[0,0].set_title('Total Spending by Category')

# 2. Daily spending trend
daily_spending = df.groupby(df['date'].dt.date)['amount'].sum()
axes[0,1].plot(daily_spending.index, daily_spending.values, marker='o')
axes[0,1].set_title('Daily Spending Trend')
axes[0,1].tick_params(axis='x', rotation=45)

# 3. Spending by weekday
weekday_spending = df.groupby('weekday')['amount'].mean()
weekday_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekday_spending = weekday_spending.reindex(weekday_order)
axes[1,0].bar(weekday_spending.index, weekday_spending.values)
axes[1,0].set_title('Average Spending by Weekday')
axes[1,0].tick_params(axis='x', rotation=45)

# 4. Hourly spending pattern
hourly_spending = df.groupby('hour')['amount'].mean()
axes[1,1].bar(hourly_spending.index, hourly_spending.values)
axes[1,1].set_title('Average Spending by Hour')

plt.tight_layout()
plt.show()

# Additional analysis
print("=== SUMMARY STATISTICS ===")
print(f"Total spending: ${df['amount'].sum():.2f}")
print(f"Average transaction: ${df['amount'].mean():.2f}")
print(f"Median transaction: ${df['amount'].median():.2f}")
print(f"Number of transactions: {len(df)}")
print(f"Most expensive transaction: ${df['amount'].max():.2f}")
print(f"Cheapest transaction: ${df['amount'].min():.2f}")

print("\n=== CATEGORY ANALYSIS ===")
for category in df['category'].unique():
    cat_data = df[df['category'] == category]
    print(f"{category}: {len(cat_data)} transactions, Total: ${cat_data['amount'].sum():.2f}, Avg: ${cat_data['amount'].mean():.2f}")

print("\n=== TIME PATTERNS ===")
print("Most active day:", df['weekday'].value_counts().index[0])
print("Most active hour:", df['hour'].value_counts().index[0])
print("Most expensive day:", daily_spending.idxmax(), f"(${daily_spending.max():.2f})")
print("Cheapest day:", daily_spending.idxmin(), f"(${daily_spending.min():.2f})")