In [1]:
from pathlib import Path

import pandas as pd

DATA_PATH = Path("data/Expenses-2025.11.csv")
EXPECTED_COLUMNS = [
    "Posted Date",
    "Payee",
    "Amount",
    "ABS Amount",
    "Source",
    "Bas-Lux",
    "Category",
]

df = pd.read_csv(
    DATA_PATH,
    encoding="utf-8-sig",
    thousands=",",
)
df["Posted Date"] = pd.to_datetime(df["Posted Date"].str.strip(), format="%m/%d/%y")
df.columns = df.columns.str.strip()

missing_columns = sorted(set(EXPECTED_COLUMNS) - set(df.columns))
unexpected_columns = sorted(set(df.columns) - set(EXPECTED_COLUMNS))

if missing_columns or unexpected_columns:
    raise ValueError(
        "Column mismatch detected.\n"
        f"Missing: {missing_columns or 'None'}\n"
        f"Unexpected: {unexpected_columns or 'None'}"
    )

expenses_df = df[~df["Category"]
                 .isin([
                     "Transfer",
                     "Credit Card Payment",
                     "0",
                     "Income, Investment"
                     ])].copy()

In [2]:
import matplotlib.pyplot as plt
from IPython.display import display

monthly_category = (
    expenses_df.assign(month=df["Posted Date"].dt.to_period("M"))
    .groupby(["month", "Category"], dropna=False)["ABS Amount"]
    .sum()
    .unstack(fill_value=0)
    .sort_index()
)

# remove any categories that have less than $100 total expenses in a month
monthly_category = monthly_category.loc[
    :, monthly_category.sum() >= 100
]


In [3]:
import numpy as np
import pandas as pd
import json
import pandas as pd
from IPython.display import HTML

TREEMAP_QUARTER_START = pd.Period("2025-01", freq="M")

# Make a working copy and ensure dates are datetime
treemap_df = expenses_df.copy()
if not pd.api.types.is_datetime64_any_dtype(treemap_df["Posted Date"]):
    treemap_df["Posted Date"] = pd.to_datetime(
        treemap_df["Posted Date"], errors="coerce"
    )

treemap_df = treemap_df.assign(
    MonthPeriod=treemap_df["Posted Date"].dt.to_period("M"),
)

month_periods = sorted(treemap_df["MonthPeriod"].dropna().unique())
if not month_periods:
    raise ValueError("No expenses available to visualize.")

# Map Period -> "Jan 2025" style label
period_labels = {
    str(period): period.to_timestamp().strftime("%b %Y")
    for period in month_periods
}

# Build a quarter -> [monthPeriod] mapping (only from TREEMAP_QUARTER_START onwards)
month_periods_for_quarter = [p for p in month_periods if p >= TREEMAP_QUARTER_START]
quarter_map: dict[str, list[str]] = {}
for p in month_periods_for_quarter:
    q = p.asfreq("Q-DEC")
    label = f"Q{q.quarter} {q.year}"
    quarter_map.setdefault(label, []).append(str(p))

# Flatten the expenses into a simple list of records for JS
records: list[dict] = []
for _, row in treemap_df.iterrows():
    mp = row["MonthPeriod"]
    if pd.isna(mp):
        continue
    records.append(
        {
            "monthPeriod": str(mp),
            "monthLabel": period_labels[str(mp)],
            "category": row["Category"],
            "amount": float(row["ABS Amount"]),
            "payee": str(row["Payee"]),
            "date": row["Posted Date"].strftime("%Y-%m-%d"),
        }
    )

# JSON blobs that JS will consume
js_expenses = json.dumps(records)
js_month_labels = json.dumps(period_labels)
js_quarter_map = json.dumps(quarter_map)
js_all_months = json.dumps(sorted([str(p) for p in month_periods]))

# HTML <option> lists for the month / quarter selectors
months_options_html = "\n".join(
    f'<option value="{str(p)}">{period_labels[str(p)]}</option>'
    for p in month_periods
)

quarters_options_html = "\n".join(
    f'<option value="{q}">{q}</option>'
    for q in quarter_map.keys()
)

# HTML + JS: builds the overview treemap, hooks plotly_click, and draws the detail treemap
template = """
<div style="margin-bottom:0.5rem;">
  <strong>Select Entire Dataset, Month(s), or Quarter(s), then click a category to drill down:</strong><br/>
  <label><input type="radio" name="mode" value="all" checked> Entire dataset</label>
  <label style="margin-left:1rem;"><input type="radio" name="mode" value="months"> Month(s)</label>
  <label style="margin-left:1rem;"><input type="radio" name="mode" value="quarters"> Quarter(s)</label>
</div>

<div id="month-select-wrapper" style="margin-bottom:0.5rem; display:none;">
  <label>Months:<br/>
    <select id="month-select" multiple size="4" style="min-width: 12rem;">
      {months_options}
    </select>
  </label>
</div>

<div id="quarter-select-wrapper" style="margin-bottom:0.5rem; display:none;">
  <label>Quarters:<br/>
    <select id="quarter-select" multiple size="4" style="min-width: 12rem;">
      {quarters_options}
    </select>
  </label>
</div>

<div id="overview-treemap" style="height:1200px;"></div>
<div id="detail-treemap" style="height:1200px; margin-top:1rem;"></div>

<script type="text/javascript">
(function () {{
    // Load Plotly from CDN if it's not already available
    function whenPlotlyReady(cb) {{
        if (window.Plotly) return cb();
        var script = document.createElement("script");
        script.src = "https://cdn.plot.ly/plotly-latest.min.js";
        script.onload = cb;
        document.head.appendChild(script);
    }}

    var expenses = {js_expenses};
    var monthLabels = {js_month_labels};
    var quarterMap = {js_quarter_map};
    var allMonths = {js_all_months};

    var currentMode = "all";

    var overviewDiv = document.getElementById("overview-treemap");
    var detailDiv = document.getElementById("detail-treemap");

    var monthSelectWrapper = document.getElementById("month-select-wrapper");
    var monthSelect = document.getElementById("month-select");
    var quarterSelectWrapper = document.getElementById("quarter-select-wrapper");
    var quarterSelect = document.getElementById("quarter-select");

    function getSelectedMonths() {{
        if (currentMode === "all") {{
            return allMonths.slice();
        }} else if (currentMode === "months") {{
            var opts = Array.from(monthSelect.selectedOptions || []);
            if (!opts.length) {{
                // no explicit choice in Month(s) -> treat as all months
                return allMonths.slice();
            }}
            return opts.map(function (o) {{ return o.value; }});
        }} else if (currentMode === "quarters") {{
            var opts = Array.from(quarterSelect.selectedOptions || []);
            if (!opts.length) {{
                // no explicit choice in Quarter(s) -> treat as all months
                return allMonths.slice();
            }}
            var months = [];
            opts.forEach(function (o) {{
                (quarterMap[o.value] || []).forEach(function (m) {{ months.push(m); }});
            }});
            var uniq = Array.from(new Set(months));
            uniq.sort();
            return uniq;
        }}
        return [];
    }}

    function buildOverviewTrace() {{
        var months = new Set(getSelectedMonths());
        var totals = {{}};
        expenses.forEach(function (rec) {{
            if (!months.has(rec.monthPeriod)) return;
            if (!rec.category) return;
            var key = rec.category;
            var amt = Math.abs(rec.amount || 0);
            totals[key] = (totals[key] || 0) + amt;
        }});
        var labels = Object.keys(totals).sort(function (a, b) {{
            return totals[b] - totals[a];
        }});
        var values = labels.map(function (c) {{ return totals[c]; }});
        return {{
            type: "treemap",
            labels: labels,
            parents: labels.map(function () {{ return ""; }}),
            values: values,
            hovertemplate: "<b>%{{label}}</b><br>Total: $%{{value:.2f}}<extra></extra>",
            textinfo: "label+value"
        }};
    }}

    function refreshOverview() {{
        var trace = buildOverviewTrace();
        if (!trace.labels.length) {{
            window.Plotly.newPlot(overviewDiv, [], {{
                title: "No data for current selection"
            }});
            window.Plotly.purge(detailDiv);
            return;
        }}
        var layout = {{
            title: "Expenses by Category (click a category to drill down)",
            margin: {{t: 60, l: 10, r: 10, b: 10}},
            width: 1000
        }};
        window.Plotly.newPlot(overviewDiv, [trace], layout);
        attachClickHandler();
    }}

    function attachClickHandler() {{
        overviewDiv.on("plotly_click", function (event) {{
            if (!event || !event.points || !event.points.length) return;
            var pt = event.points[0];
            var category = pt.label;
            var selectedMonths = getSelectedMonths();
            var uniqueMonths = Array.from(new Set(selectedMonths));

            // ENTIRE DATASET or QUARTERS always behave like multi-month.
            // In "Month(s)" mode, a single month -> item-level drilldown.
            var multiMonth =
                (currentMode === "all") ||
                (currentMode === "quarters") ||
                (uniqueMonths.length !== 1);

            if (multiMonth) {{
                drilldownCategoryByMonth(category, uniqueMonths);
            }} else {{
                drilldownCategoryItems(category, uniqueMonths[0]);
            }}
        }});
    }}

    // Multi-month (or entire dataset / quarters): category -> months
    function drilldownCategoryByMonth(category, months) {{
        var totals = {{}};
        var monthSet = new Set(months);
        expenses.forEach(function (rec) {{
            if (!monthSet.has(rec.monthPeriod)) return;
            if (rec.category !== category) return;
            var amt = Math.abs(rec.amount || 0);
            totals[rec.monthPeriod] = (totals[rec.monthPeriod] || 0) + amt;
        }});
        var keys = Object.keys(totals).sort();
        if (!keys.length) {{
            window.Plotly.newPlot(detailDiv, [], {{title: "No data for " + category}});
            return;
        }}
        var labels = keys.map(function (mp) {{ return monthLabels[mp] || mp; }});
        var values = keys.map(function (mp) {{ return totals[mp]; }});
        var trace = {{
            type: "treemap",
            labels: labels,
            parents: labels.map(function () {{ return ""; }}),
            values: values,
            hovertemplate: "<b>%{{label}}</b><br>Total: $%{{value:.2f}}<extra></extra>",
            textinfo: "label+value"
        }};
        var layout = {{
            title: "'" + category + "' expenses by month",
            margin: {{t: 60, l: 10, r: 10, b: 10}},
            width: 1000
        }};
        window.Plotly.newPlot(detailDiv, [trace], layout);
    }}

    // Single-month (Month(s) mode): category -> group by payee
    function drilldownCategoryItems(category, monthPeriod) {{
        var filteredItems = expenses.filter(function (rec) {{
            return rec.category === category && rec.monthPeriod === monthPeriod;
        }});

        if (!filteredItems.length) {{
            window.Plotly.newPlot(detailDiv, [], {{title: "No data for " + category + " in " + (monthLabels[monthPeriod] || monthPeriod)}});
            return;
        }}

        var payeeTotals = {{}};
        filteredItems.forEach(function (rec) {{
            var payeeKey = String(rec.payee).trim() || "Unknown Payee"; // Added .trim() and String() for robustness
            var amt = Math.abs(rec.amount || 0);
            payeeTotals[payeeKey] = (payeeTotals[payeeKey] || 0) + amt;
        }});

        var labels = Object.keys(payeeTotals).sort(function(a, b) {{
            return payeeTotals[b] - payeeTotals[a]; // Sort by amount descending
        }});
        var values = labels.map(function (p) {{ return payeeTotals[p]; }});

        var trace = {{
            type: "treemap",
            labels: labels,
            parents: labels.map(function () {{ return ""; }}),
            values: values,
            hovertemplate: "<b>Payee:</b> %{{label}}<br><b>Total Amount:</b> $%{{value:.2f}}<extra></extra>",
            textinfo: "label+value"
        }};
        var label = monthLabels[monthPeriod] || monthPeriod;
        var layout = {{
            title: "'" + category + "' expenses in " + label + " by Payee",
            margin: {{t: 60, l: 10, r: 10, b: 10}},
            width: 1000
        }};
        window.Plotly.newPlot(detailDiv, [trace], layout);
    }}

    // Wire up the radio buttons and selects
    Array.from(document.querySelectorAll("input[name='mode']")).forEach(function (radio) {{
        radio.addEventListener("change", function () {{
            currentMode = this.value;
            if (currentMode === "all") {{
                monthSelectWrapper.style.display = "none";
                quarterSelectWrapper.style.display = "none";
            }} else if (currentMode === "months") {{
                monthSelectWrapper.style.display = "";
                quarterSelectWrapper.style.display = "none";
            }} else if (currentMode === "quarters") {{
                monthSelectWrapper.style.display = "none";
                quarterSelectWrapper.style.display = "";
            }}
            refreshOverview();
            window.Plotly.purge(detailDiv);
        }});
    }});

    monthSelect.addEventListener("change", function () {{
        if (currentMode === "months") {{
            refreshOverview();
            window.Plotly.purge(detailDiv);
        }}
    }});

    quarterSelect.addEventListener("change", function () {{
        if (currentMode === "quarters") {{
            refreshOverview();
            window.Plotly.purge(detailDiv);
        }}
    }});

    whenPlotlyReady(function () {{
        refreshOverview();
    }});
}})();
</script>
"""

html = template.format(
    months_options=months_options_html,
    quarters_options=quarters_options_html,
    js_expenses=js_expenses,
    js_month_labels=js_month_labels,
    js_quarter_map=js_quarter_map,
    js_all_months=js_all_months,
)

HTML(html)