# Budget Analysis

Explore categorised transaction data from `data/budget.db`.

In [None]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt

conn = sqlite3.connect("data/budget.db")
df = pd.read_sql_query("""
    SELECT date, raw_description, amount, currency, merchant_name,
           category, source, confidence, import_batch
    FROM transactions
    ORDER BY date DESC
""", conn)

df["date"] = pd.to_datetime(df["date"])
df["month"] = df["date"].dt.to_period("M")
print(f"{len(df)} transactions loaded")
df.head(10)

## Spending by Category

In [None]:
spending = df[df["amount"] < 0].copy()
spending["amount"] = spending["amount"].abs()

by_cat = spending.groupby("category")["amount"].sum().sort_values(ascending=False)
print(by_cat.to_string())
print(f"\nTotal spending: CHF {by_cat.sum():,.2f}")

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
by_cat.plot.barh(ax=ax)
ax.set_xlabel("CHF")
ax.set_title("Spending by Category")
ax.invert_yaxis()
plt.tight_layout()
plt.show()

## Monthly Spending Trend

In [None]:
monthly = spending.groupby("month")["amount"].sum()

fig, ax = plt.subplots(figsize=(12, 5))
monthly.plot.bar(ax=ax)
ax.set_ylabel("CHF")
ax.set_title("Monthly Spending")
ax.tick_params(axis="x", rotation=45)
plt.tight_layout()
plt.show()

## Top Merchants

In [None]:
top = spending.groupby("merchant_name")["amount"].agg(["sum", "count"]).sort_values("sum", ascending=False).head(20)
top.columns = ["total_chf", "transactions"]
top

## Monthly Spending by Category

In [None]:
pivot = spending.pivot_table(index="month", columns="category", values="amount", aggfunc="sum", fill_value=0)

fig, ax = plt.subplots(figsize=(14, 7))
pivot.plot.bar(stacked=True, ax=ax)
ax.set_ylabel("CHF")
ax.set_title("Monthly Spending by Category")
ax.tick_params(axis="x", rotation=45)
ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
plt.tight_layout()
plt.show()

## Classification Quality

In [None]:
print("Source distribution:")
print(df["source"].value_counts().to_string())
print(f"\nMean confidence: {df['confidence'].mean():.2%}")
print(f"Low confidence (<0.8): {(df['confidence'] < 0.8).sum()} transactions")
print(f"Uncategorised: {(df['category'] == 'Uncategorised').sum()} transactions")

## Income vs Spending

In [None]:
income = df[df["amount"] > 0].groupby("month")["amount"].sum()
expenses = df[df["amount"] < 0].groupby("month")["amount"].sum().abs()

summary = pd.DataFrame({"Income": income, "Spending": expenses}).fillna(0)
summary["Net"] = summary["Income"] - summary["Spending"]

fig, ax = plt.subplots(figsize=(12, 5))
summary[["Income", "Spending"]].plot.bar(ax=ax)
ax.axhline(y=0, color="black", linewidth=0.5)
ax.set_ylabel("CHF")
ax.set_title("Income vs Spending")
ax.tick_params(axis="x", rotation=45)
plt.tight_layout()
plt.show()

print(summary.to_string())