In [None]:
from pathlib import Path

import pandas as pd

DATA_PATH = Path("data/expenses-2025.12.csv")
EXPECTED_COLUMNS = [
    "Date",
    "Payee",
    "Amount",
    "Category",
    "Bas-Lux",
    "source"
]

df = pd.read_csv(
    DATA_PATH,
    encoding="utf-8-sig",
    thousands=",",
)
df["Date"] = pd.to_datetime(df["Date"].str.strip(), format="%m/%d/%y")
df.columns = df.columns.str.strip()

missing_columns = sorted(set(EXPECTED_COLUMNS) - set(df.columns))
unexpected_columns = sorted(set(df.columns) - set(EXPECTED_COLUMNS))

if missing_columns or unexpected_columns:
    raise ValueError(
        "Column mismatch detected.\n"
        f"Missing: {missing_columns or 'None'}\n"
        f"Unexpected: {unexpected_columns or 'None'}"
    )

expenses_df = df[~df["Category"]
                 .isin([
                     "Transfer",
                     "Credit Card Payment",
                     "0",
                     "Income, Investment"
                     ])].copy()

In [15]:
import matplotlib.pyplot as plt
from IPython.display import display

monthly_category = (
    expenses_df.assign(month=df["Date"].dt.to_period("M"))
    .groupby(["month", "Category"], dropna=False)["Amount"]
    .sum()
    .unstack(fill_value=0)
    .sort_index()
)

# remove any categories that have less than $100 total expenses in a month
# Using abs() to correctly filter based on the magnitude of expenses
monthly_category = monthly_category.loc[
    :, monthly_category.abs().sum() >= 100
]


In [None]:
import json
import plotly.graph_objects as go
from IPython.display import HTML

TREEMAP_QUARTER_START = pd.Period("2025-01", freq="M")

# Make a working copy and ensure dates are datetime
treemap_df = expenses_df.copy()
if not pd.api.types.is_datetime64_any_dtype(treemap_df["Date"]):
    treemap_df["Date"] = pd.to_datetime(
        treemap_df["Date"], errors="coerce"
    )

treemap_df = treemap_df.assign(
    MonthPeriod=treemap_df["Date"].dt.to_period("M"),
)

month_periods = sorted(treemap_df["MonthPeriod"].dropna().unique())
if not month_periods:
    raise ValueError("No expenses available to visualize.")

# Map Period -> "Jan 2025" style label
period_labels = {
    str(period): period.to_timestamp().strftime("%b %Y")
    for period in month_periods
}

# Build a quarter -> [monthPeriod] mapping (only from TREEMAP_QUARTER_START onwards)
month_periods_for_quarter = [p for p in month_periods if p >= TREEMAP_QUARTER_START]
quarter_map: dict[str, list[str]] = {}
for p in month_periods_for_quarter:
    q = p.asfreq("Q-DEC")
    label = f"Q{q.quarter} {q.year}"
    quarter_map.setdefault(label, []).append(str(p))

# Flatten the expenses into a simple list of records for JS
records: list[dict] = []
for _, row in treemap_df.iterrows():
    mp = row["MonthPeriod"]
    if pd.isna(mp):
        continue
    category_val = row["Category"]
    # Handle NaN categories by assigning them a specific string
    category_str = "Unknown Category" if pd.isna(category_val) else str(category_val)

    records.append(
        {
            "monthPeriod": str(mp),
            "monthLabel": period_labels[str(mp)],
            "category": category_str, # Use the cleaned category string
            "amount": float(row["Amount"]),
            "payee": str(row["Payee"]),
            "date": row["Date"].strftime("%Y-%m-%d"),
        }
    )

# JSON blobs that JS will consume
js_expenses = json.dumps(records)
js_month_labels = json.dumps(period_labels)
js_quarter_map = json.dumps(quarter_map)
js_all_months = json.dumps(sorted([str(p) for p in month_periods]))


# --- START MODIFIED CODE FOR INITIAL PLOT ---
# Calculate the initial trace for "all" mode in Python
initial_overview_totals = {}
for rec in records:
    # No need for `if not rec["category"]:` anymore as category_str handles NaN
    key = rec["category"] # Will be "Unknown Category" or actual string
    amt = abs(rec["amount"] or 0)
    initial_overview_totals[key] = (initial_overview_totals.get(key) or 0) + amt

initial_overview_labels = sorted(initial_overview_totals.keys(), key=lambda k: initial_overview_totals[k], reverse=True)
initial_overview_values = [initial_overview_totals[c] for c in initial_overview_labels]

initial_overview_trace_data = go.Treemap(
    labels=initial_overview_labels,
    parents=[""] * len(initial_overview_labels),
    values=initial_overview_values,
    hovertemplate="<b>%{label}</b><br>Total: $%{value:.2f}<extra></extra>",
    textinfo="label+value"
)

initial_overview_layout_data = go.Layout(
    title="Expenses by Category (click a category to drill down)",
    margin={"t": 60, "l": 10, "r": 10, "b": 10},
    width=None,
    height=1000
)

# Create a Plotly figure with the initial trace and layout
initial_fig = go.Figure(data=[initial_overview_trace_data], layout=initial_overview_layout_data)

# Convert the figure to HTML div string WITHOUT full HTML but INCLUDING plotly.js for self-containment
initial_overview_div_html = initial_fig.to_html(full_html=False, include_plotlyjs=True)
# --- END MODIFIED CODE FOR INITIAL PLOT ---


# HTML <option> lists for the month / quarter selectors
months_options_html = "\n".join(
    f'<option value="{str(p)}">{period_labels[str(p)]}</option>'
    for p in month_periods
)

quarters_options_html = "\n".join(
    f'<option value="{q}">{q}</option>'
    for q in quarter_map.keys()
)

# HTML + JS: builds the overview treemap, hooks plotly_click, and draws the detail treemap
template = """
<div style="margin-bottom:0.5rem;">
  <strong>Select Entire Dataset, Month(s), or Quarter(s), then click a category to drill down:</strong><br/>
  <label><input type="radio" name="mode" value="all" checked> Entire dataset</label>
  <label style="margin-left:1rem;"><input type="radio" name="mode" value="months"> Month(s)</label>
  <label style="margin-left:1rem;"><input type="radio" name="mode" value="quarters"> Quarter(s)</label>
</div>

<div id="month-select-wrapper" style="margin-bottom:0.5rem; display:none;">
  <label>Months:<br/>
    <select id="month-select" multiple size="4" style="min-width: 12rem;">
      {months_options}
    </select>
  </label>
</div>

<div id="quarter-select-wrapper" style="margin-bottom:0.5rem; display:none;">
  <label>Quarters:<br/>
    <select id="quarter-select" multiple size="4" style="min-width: 12rem;">
      {quarters_options}
    </select>
  </label>
</div>

<div id="overview-treemap" style="height:1200px;">{initial_overview_treemap_content}</div> <!-- MODIFIED: Pre-filled with initial plot -->
<div id="detail-treemap" style="height:1200px; margin-top:1rem;"></div>

<script type="text/javascript">
(function () {{
    // Load Plotly from CDN if it's not already available
    // NOTE: For the initial plot, plotly.js is now EMBEDDED directly by the Python call.
    // This CDN loading is primarily for dynamic updates/interactions ONLY.
    function whenPlotlyReady(cb) {{
        if (window.Plotly) return cb();
        var script = document.createElement("script");
        script.src = "https://cdn.plot.ly/plotly-latest.min.js";
        script.onload = cb;
        document.head.appendChild(script);
    }}

    var expenses = {js_expenses};
    var monthLabels = {js_month_labels};
    var quarterMap = {js_quarter_map};
    var allMonths = {js_all_months};

    var currentMode = "all";

    var overviewDivContainer = document.getElementById("overview-treemap"); // Renamed for clarity
    var detailDiv = document.getElementById("detail-treemap");

    var monthSelectWrapper = document.getElementById("month-select-wrapper");
    var monthSelect = document.getElementById("month-select");
    var quarterSelectWrapper = document.getElementById("quarter-select-wrapper");
    var quarterSelect = document.getElementById("quarter-select");

    // Function to find the actual Plotly graph div within its container
    function getCurrentPlotlyGraphDiv(containerElement) {{
        // Plotly renders its graph into a div inside the provided container,
        // usually with class 'plotly-graph-div'
        var plotDiv = containerElement.querySelector('.plotly-graph-div');
        console.log("getCurrentPlotlyGraphDiv: Found Plotly div:", plotDiv);
        return plotDiv;
    }}

    function getSelectedMonths() {{
        if (currentMode === "all") {{
            return allMonths.slice();
        }} else if (currentMode === "months") {{
            var opts = Array.from(monthSelect.selectedOptions || []);
            if (!opts.length) {{
                // no explicit choice in Month(s) -> treat as all months
                return allMonths.slice();
            }}
            return opts.map(function (o) {{ return o.value; }});
        }} else if (currentMode === "quarters") {{
            var opts = Array.from(quarterSelect.selectedOptions || []);
            if (!opts.length) {{
                // no explicit choice in Quarter(s) -> treat as all months
                return allMonths.slice();
            }}
            var months = [];
            opts.forEach(function (o) {{
                (quarterMap[o.value] || []).forEach(function (m) {{ months.push(m); }});
            }});
            var uniq = Array.from(new Set(months));
            uniq.sort();
            return uniq;
        }}
        return [];
    }}

    function buildOverviewTrace() {{
        var months = new Set(getSelectedMonths());
        var totals = {{}};
        expenses.forEach(function (rec) {{
            if (!months.has(rec.monthPeriod)) return;
            // rec.category is now always a string, so `!rec.category` check is not strictly needed for NaN anymore
            var key = rec.category;
            var amt = Math.abs(rec.amount || 0);
            totals[key] = (totals[key] || 0) + amt;
        }});
        var labels = Object.keys(totals).sort(function (a, b) {{
            return totals[b] - totals[a];
        }});
        var values = labels.map(function (c) {{ return totals[c]; }});
        return {{
            type: "treemap",
            labels: labels,
            parents: labels.map(function () {{ return ""; }}),
            values: values,
            hovertemplate: "<b>%{{label}}</b><br>Total: $%{{value:.2f}}<extra></extra>",
            textinfo: "label+value"
        }};
    }}

    function refreshOverview() {{
        var trace = buildOverviewTrace();
        var targetDiv = getCurrentPlotlyGraphDiv(overviewDivContainer); // Get the actual Plotly div inside the container

        if (!trace.labels.length) {{
            window.Plotly.newPlot(targetDiv || overviewDivContainer, [], {{
                title: "No data for current selection"
            }});
            window.Plotly.purge(detailDiv);
            return;
        }}
        var layout = {{
            title: "Expenses by Category (click a category to drill down)",
            margin: {{t: 60, l: 10, r: 10, b: 10}}
        }};
        // Use Plotly.react to update existing plot or create if none
        window.Plotly.react(targetDiv || overviewDivContainer, [trace], layout);
        attachClickHandler(); // Re-attach handler as react might re-render
    }}

    // Define the click handler function once
    var plotlyClickHandler = function (event) {{ // Escaped
        console.log("plotly_click event fired!");
        // NEW: Stop propagation to prevent default treemap zoom behavior
        if (event.event) event.event.stopPropagation();

        if (!event || !event.points || !event.points.length) return;
        var pt = event.points[0];
        var category = pt.label;
        console.log("plotly_click: Clicked category:", category);
        var selectedMonths = getSelectedMonths();
        var uniqueMonths = Array.from(new Set(selectedMonths));
        console.log("plotly_click: Selected Months:", uniqueMonths);

        // ENTIRE DATASET or QUARTERS always behave like multi-month.
        // In "Month(s)" mode, a single month -> item-level drilldown.
        var multiMonth =
            (currentMode === "all") ||
            (currentMode === "quarters") ||
            (uniqueMonths.length !== 1);

        if (multiMonth) {{
            console.log("plotly_click: Calling drilldownCategoryByMonth");
            drilldownCategoryByMonth(category, uniqueMonths);
        }} else {{
            console.log("plotly_click: Calling drilldownCategoryItems");
            drilldownCategoryItems(category, uniqueMonths[0]);
        }}
    }};

    function attachClickHandler() {{
        var plotDiv = getCurrentPlotlyGraphDiv(overviewDivContainer); // Get the actual Plotly div
        if (!plotDiv) {{
            console.warn("Could not find the actual Plotly graph div to attach click handler.");
            return;
        }}
        // Only attach if not already attached to this specific plotDiv instance
        if (!plotDiv._plotly_click_handler_attached) {{
            console.log("attachClickHandler: Attaching new click handler to:", plotDiv);
            plotDiv.on("plotly_click", plotlyClickHandler);
            plotDiv._plotly_click_handler_attached = true;
        }} else {{
            console.log("attachClickHandler: Click handler already attached, skipping re-attachment.");
        }}
    }}

    // Multi-month (or entire dataset / quarters): category -> months
    function drilldownCategoryByMonth(category, months) {{
        console.log("drilldownCategoryByMonth called with category:", category, "months:", months);
        var totals = {{}};
        var monthSet = new Set(months);
        expenses.forEach(function (rec) {{
            if (!monthSet.has(rec.monthPeriod)) return;
            if (rec.category !== category) return;
            var amt = Math.abs(rec.amount || 0);
            totals[rec.monthPeriod] = (totals[rec.monthPeriod] || 0) + amt;
        }});
        var keys = Object.keys(totals).sort();
        console.log("drilldownCategoryByMonth: Data keys length:", keys.length);
        if (!keys.length) {{
            window.Plotly.newPlot(detailDiv, [], {{title: "No data for " + category}});
            console.log("drilldownCategoryByMonth: No data found for category", category, "in months", months);
            return;
        }}
        var labels = keys.map(function (mp) {{ return monthLabels[mp] || mp; }});
        var values = keys.map(function (mp) {{ return totals[mp]; }});
        var trace = {{
            type: "treemap",
            labels: labels,
            parents: labels.map(function () {{ return ""; }}),
            values: values,
            hovertemplate: "<b>%{{label}}</b><br>Total: $%{{value:.2f}}<extra></extra>",
            textinfo: "label+value"
        }};
        var layout = {{
            title: "'" + category + "' expenses by month",
            margin: {{t: 60, l: 10, r: 10, b: 10}}
        }};
        window.Plotly.newPlot(detailDiv, [trace], layout);
    }}

    // Single-month (Month(s) mode): category -> group by payee
    function drilldownCategoryItems(category, monthPeriod) {{
        console.log("drilldownCategoryItems called with category:", category, "monthPeriod:", monthPeriod);
        var filteredItems = expenses.filter(function (rec) {{
            return rec.category === category && rec.monthPeriod === monthPeriod;
        }});
        console.log("drilldownCategoryItems: filteredItems length:", filteredItems.length);

        if (!filteredItems.length) {{
            window.Plotly.newPlot(detailDiv, [], {{title: "No data for " + category + " in " + (monthLabels[monthPeriod] || monthPeriod)}});
            console.log("drilldownCategoryItems: No data found for category", category, "in month", monthPeriod);
            return;
        }}

        var payeeTotals = {{}};
        filteredItems.forEach(function (rec) {{
            var payeeKey = String(rec.payee).trim() || "Unknown Payee"; // Added .trim() and String() for robustness
            var amt = Math.abs(rec.amount || 0);
            payeeTotals[payeeKey] = (payeeTotals[payeeKey] || 0) + amt;
        }});

        console.log("drilldownCategoryItems: payeeTotals:", payeeTotals);
        var labels = Object.keys(payeeTotals).sort(function(a, b) {{
            return payeeTotals[b] - payeeTotals[a]; // Sort by amount descending
        }});
        var values = labels.map(function (p) {{ return payeeTotals[p]; }});
        console.log("drilldownCategoryItems: Data keys length:", labels.length);

        var trace = {{
            type: "treemap",
            labels: labels,
            parents: labels.map(function () {{ return ""; }}),
            values: values,
            hovertemplate: "<b>Payee:</b> %{{label}}<br><b>Total Amount:</b> $%{{value:.2f}}<extra></extra>",
            textinfo: "label+value"
        }};
        var label = monthLabels[monthPeriod] || monthPeriod;
        var layout = {{
            title: "'" + category + "' expenses in " + label + " by Payee",
            margin: {{t: 60, l: 10, r: 10, b: 10}}
        }};
        window.Plotly.newPlot(detailDiv, [trace], layout);
    }}

    // Wire up the radio buttons and selects
    Array.from(document.querySelectorAll("input[name='mode']")).forEach(function (radio) {{
        radio.addEventListener("change", function () {{
            currentMode = this.value;
            if (currentMode === "all") {{
                monthSelectWrapper.style.display = "none";
                quarterSelectWrapper.style.display = "none";
            }} else if (currentMode === "months") {{
                monthSelectWrapper.style.display = "";
                quarterSelectWrapper.style.display = "none";
            }} else if (currentMode === "quarters") {{
                monthSelectWrapper.style.display = "none";
                quarterSelectWrapper.style.display = "";
            }}
            refreshOverview();
            window.Plotly.purge(detailDiv);
        }});
    }});

    monthSelect.addEventListener("change", function () {{
        if (currentMode === "months") {{
            refreshOverview();
            window.Plotly.purge(detailDiv);
        }}
    }});

    quarterSelect.addEventListener("change", function () {{
        if (currentMode === "quarters") {{
            refreshOverview();
            window.Plotly.purge(detailDiv);
        }}
    }});

    whenPlotlyReady(function () {{
        // The initial plot (from initial_overview_div_html) has already been rendered here.
        // We just need to attach the click handler to *that* rendered plot div.
        attachClickHandler();
    }});
}})();
</script>
"""

html = template.format(
    months_options=months_options_html,
    quarters_options=quarters_options_html,
    js_expenses=js_expenses,
    js_month_labels=js_month_labels,
    js_quarter_map=js_quarter_map,
    js_all_months=js_all_months,
    initial_overview_treemap_content=initial_overview_div_html # MODIFIED: Pass initial plot HTML
)

HTML(html)

Output hidden; open in https://colab.research.google.com to view.