# Market Data Visulaizer

### Step 0: Imports

In [1]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import re
import ipywidgets as widgets
from IPython.display import display, clear_output
from ipywidgets import interact

# make plots a bit larger
plt.rcParams["figure.figsize"] = (11, 4)

## Part 1: Loading Data

### Step 1.1: Data Location & Available Files

In [2]:
# Output .csv files in data/raw/..

raw_dir = Path("../data/raw")
raw_dir.mkdir(parents=True, exist_ok=True)  # ensure folder exists
csv_files = sorted([p for p in raw_dir.glob("*.csv")])
if not csv_files:
    print("No CSV files found in ../data/raw yet. Save one with fetch_data_yf.py first.")
csv_files[:5]  # preview list

[PosixPath('../data/raw/aapl_1d_2010-01-01_2025-08-16_yf.csv'),
 PosixPath('../data/raw/spy_1d_2020-01-01_2025-08-16_yf.csv')]

### Step 1.2: Choose & Load Dataset

In [3]:
# --- Easy loader: pick 2 CSVs from ../data/raw with widgets (robust names) ---

import re, glob
from pathlib import Path
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output

RAW_DIR = Path("../data/raw")

# 1) list candidates
files = sorted(glob.glob(str(RAW_DIR / "*_yf.csv")))
if not files:
    raise FileNotFoundError(f"No *_yf.csv files found in {RAW_DIR}. Run your fetcher first.")

# 2) parse meta from filename
pat = re.compile(
    r"(?P<sym>[a-z0-9_\-]+)_(?P<intv>1[dwhm]|1wk|1mo|[0-9]+[mhdw])(?:_(?P<start>\d{4}-\d{2}-\d{2})_(?P<end>\d{4}-\d{2}-\d{2}))?_yf\.csv$",
    re.I,
)

def parse_meta(path: str):
    m = pat.search(Path(path).name)
    if not m:
        return None
    return {
        "symbol": m.group("sym").upper(),
        "interval": m.group("intv"),
        "start": m.group("start"),
        "end": m.group("end"),
        "name": Path(path).name,
        "path": path,
    }

def label_for(path: str) -> str:
    meta = parse_meta(path)
    if meta:
        s = meta["start"] or "?"
        e = meta["end"] or "?"
        return f"{meta['symbol']}  |  {meta['interval']}  |  {s} → {e}  |  {meta['name']}"
    return f"(unknown) | {Path(path).name}"

options = [(label_for(p), p) for p in files]

# 3) widgets
primary_dd = widgets.Dropdown(options=options, description="Primary")
bench_dd   = widgets.Dropdown(options=[("None (no benchmark)", "")] + options, description="Benchmark")
load_btn   = widgets.Button(description="Load", button_style="primary")
out        = widgets.Output()

display(widgets.VBox([primary_dd, bench_dd, load_btn, out]))

# 4) loader helpers
def load_csv(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, parse_dates=["timestamp"])
    df["timestamp"] = pd.to_datetime(df["timestamp"]).dt.tz_localize(None)
    df = df.sort_values("timestamp").drop_duplicates("timestamp").reset_index(drop=True)
    if "Adj Close" in df.columns and not df["Adj Close"].isna().all():
        df["close"] = df["Adj Close"]
    return df

# 5) click handler (uses unambiguous variable names)
def on_load(_):
    with out:
        clear_output()
        global primary_df, primary_symbol, primary_interval, primary_meta
        global bench_df, bench_symbol, bench_interval, bench_meta
        # (optional) compatibility mirrors:
        global df, symbol, interval

        # primary
        p_path = primary_dd.value
        primary_meta = parse_meta(p_path) or {"symbol":"UNKNOWN","interval":"(unknown)","name":Path(p_path).name}
        primary_symbol  = primary_meta["symbol"]
        primary_interval= primary_meta["interval"]
        primary_df      = load_csv(p_path)

        print(f"Primary: {primary_meta['name']}")
        print(f"  rows={len(primary_df)}  range={primary_df['timestamp'].min().date()} → {primary_df['timestamp'].max().date()}  interval={primary_interval}")

        # (compatibility) mirror to df/symbol/interval so older cells still work
        df = primary_df
        symbol = primary_symbol
        interval = primary_interval

        # benchmark (optional)
        bench_df = None
        bench_symbol = None
        bench_interval = None
        bench_meta = None

        if bench_dd.value:
            b_path = bench_dd.value
            bench_meta = parse_meta(b_path) or {"symbol":"UNKNOWN","interval":"(unknown)","name":Path(b_path).name}
            bench_symbol   = bench_meta["symbol"]
            bench_interval = bench_meta["interval"]
            bench_df       = load_csv(b_path)

            print(f"Benchmark: {bench_meta['name']}")
            print(f"  rows={len(bench_df)}  range={bench_df['timestamp'].min().date()} → {bench_df['timestamp'].max().date()}  interval={bench_interval}")

            if (primary_interval != bench_interval
                and "(unknown)" not in (primary_interval, bench_interval)):
                print(f"⚠️ Intervals differ (primary={primary_interval}, benchmark={bench_interval}). "
                      "Overlays will use date overlap only.")

            # report overlap
            overlap = primary_df[["timestamp"]].merge(bench_df[["timestamp"]], on="timestamp", how="inner")
            if overlap.empty:
                print("⚠️ No overlapping dates between primary and benchmark.")
            else:
                print(f"Overlap: {overlap['timestamp'].min().date()} → {overlap['timestamp'].max().date()}  ({len(overlap)} rows)")

        print("\n✅ Loaded. Variables:")
        print("   primary_df, primary_symbol, primary_interval")
        print("   bench_df,   bench_symbol,   bench_interval (None if not chosen)")
        print("   (compat) df, symbol, interval\n")

load_btn.on_click(on_load)

VBox(children=(Dropdown(description='Primary', options=(('AAPL  |  1d  |  2010-01-01 → 2025-08-16  |  aapl_1d_…

## Part 2: Initial EDA

### Step 2.1: Sanity Check

In [4]:
# --- Robust OHLCV sanity check (order-safe & beginner-friendly) ---

import numpy as np
import pandas as pd

def sanity_check_prices(
    df: pd.DataFrame,
    name: str = "asset",
    *,
    interval: str | None = None,          # e.g., "1d", "1h" (optional; helps with heuristics)
    first_rows: int = 3,                   # scan first N rows for odd gaps
    open_close_pct_thresh: float = 0.08,   # 8% tolerance (higher by default for daily equities)
    adj_close_scale_tol: float = 0.10,     # 10% tolerance between Close and Adj Close scales
    echo_missing_breakdown: bool = True,
) -> None:
    """
    Prints human-readable QA for an OHLCV DataFrame.
    - Verifies required columns, monotonic timestamps, NaNs/dupes.
    - Checks 'high >= low', non-negative prices, non-zero-only volume.
    - Flags unusually large Open vs Close gaps in the first few rows.
    - Warns if 'Adj Close' is on a different scale than 'close' (bad pipeline).
    """
    msgs: list[str] = []

    # 0) Required columns present?
    required = {"timestamp", "open", "high", "low", "close"}
    missing = required - set(df.columns)
    if missing:
        msgs.append(f"Missing required columns: {sorted(missing)}")

    # 1) Timestamp sanity
    if "timestamp" in df.columns:
        if not np.issubdtype(df["timestamp"].dtype, np.datetime64):
            msgs.append("timestamp is not datetime64; consider parse_dates=['timestamp'] on read.")
        if df["timestamp"].isna().any():
            msgs.append("NaNs in timestamp.")
        dupes = int(df["timestamp"].duplicated().sum())
        if dupes > 0:
            msgs.append(f"{dupes} duplicated timestamps.")
        if not df["timestamp"].is_monotonic_increasing:
            msgs.append("timestamps not strictly increasing (unsorted and/or duplicates present).")

    # 2) NaNs in OHLCV
    ohlcv_cols = [c for c in ["open", "high", "low", "close", "volume"] if c in df.columns]
    nan_counts = df[ohlcv_cols].isna().sum()
    if nan_counts.sum() > 0:
        msgs.append(f"NaNs in OHLCV: {nan_counts.to_dict()}")

    # 3) Price ordering / sign checks
    if {"high", "low"} <= set(df.columns):
        bad = int((df["high"] < df["low"]).sum())
        if bad > 0:
            msgs.append(f"{bad} rows where high < low (corrupt).")
    for c in ["open", "high", "low", "close"]:
        if c in df.columns:
            neg = int((df[c] < 0).sum())
            if neg > 0:
                msgs.append(f"{neg} negative values in '{c}' (invalid for prices).")

    # 4) Large Open vs Close gaps on the first few rows
    #    Daily bars often have real overnight gaps. Use a *percentage* threshold and a higher default (8%).
    if {"open", "close"} <= set(df.columns) and len(df) > 0:
        n = min(first_rows, len(df))
        oc = (df["open"].head(n) - df["close"].head(n)).abs() / df["close"].head(n).replace(0, np.nan)
        if oc.dropna().gt(open_close_pct_thresh).any():
            pct = 100 * open_close_pct_thresh
            hint = "overnight gaps are common on daily bars" if interval == "1d" else "check file alignment"
            msgs.append(f"Large open/close gap (>~{pct:.1f}%) in first {n} row(s) — {hint}.")

    # 5) Volume sanity
    if "volume" in df.columns:
        if df["volume"].fillna(0).sum() == 0:
            msgs.append("All volumes are zero (suspicious for stocks; normal for some crypto sources).")
        if (df["volume"] < 0).any():
            msgs.append("Negative volume values present (invalid).")

    # 6) Adj Close vs Close scale check
    if "Adj Close" in df.columns and "close" in df.columns and not df["Adj Close"].isna().all():
        with np.errstate(divide="ignore", invalid="ignore"):
            ratio = (df["Adj Close"] / df["close"]).replace([np.inf, -np.inf], np.nan).median()
        if pd.notna(ratio) and abs(ratio - 1.0) > adj_close_scale_tol:
            msgs.append(f"'Adj Close' scale differs from 'close' (median ratio ≈ {ratio:.3f}). "
                        "You may be mixing adjusted and unadjusted prices.")

    # 7) Print results
    header = name.upper()
    if msgs:
        print(f"⚠️  {header}: sanity warnings:")
        for m in msgs:
            print("   -", m)
    else:
        print(f"✅ {header}: sanity check passed.")

    # Summary context
    cols = list(df.columns)
    print(f"columns: {cols}")
    if "timestamp" in df.columns:
        try:
            print(f"range:   {df['timestamp'].min().date()} → {df['timestamp'].max().date()}  | rows: {len(df)}")
        except Exception:
            print(f"rows: {len(df)}")
    if echo_missing_breakdown and ohlcv_cols:
        print("missing values per column:")
        print(df.reindex(columns=ohlcv_cols).isna().sum())
    print()

In [5]:
sanity_check_prices(primary_df, name=primary_symbol, interval=primary_interval)
if bench_df is not None:
    sanity_check_prices(bench_df, name=bench_symbol, interval=bench_interval)

   - Large open/close gap (>~8.0%) in first 3 row(s) — overnight gaps are common on daily bars.
columns: ['timestamp', 'open', 'high', 'low', 'close', 'volume']
range:   2010-01-04 → 2025-08-15  | rows: 3929
missing values per column:
open      0
high      0
low       0
close     0
volume    0
dtype: int64

   - Large open/close gap (>~8.0%) in first 3 row(s) — overnight gaps are common on daily bars.
columns: ['timestamp', 'open', 'high', 'low', 'close', 'volume']
range:   2020-01-02 → 2025-08-15  | rows: 1413
missing values per column:
open      0
high      0
low       0
close     0
volume    0
dtype: int64



### Step 2.2: Closing Price Chart

The closing price is the last traded price of the asset for each interval (e.g., daily close).
Plotting it over time gives an immediate sense of:

	• Whether the asset is trending upward, downward, or moving sideways.
	• Periods of sharp rises or crashes (e.g., during market events).
	• Long-term growth patterns versus short-term fluctuations.

In [6]:
# --- Closing price plot (primary + optional benchmark with fair comparison) ---

import ipywidgets as widgets
from ipywidgets import interact
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np

def _align_on_dates(df1, df2, name_a="a", name_b="b"):
    merged = (
        df1[["timestamp","close"]].rename(columns={"close": f"close_{name_a}"})
        .merge(
            df2[["timestamp","close"]].rename(columns={"close": f"close_{name_b}"}),
            on="timestamp", how="inner"
        )
        .sort_values("timestamp").reset_index(drop=True)
    )
    return merged.dropna()

_have_bench = ("bench_df" in globals()) and (bench_df is not None)

@interact(
    show_bench = widgets.Checkbox(value=_have_bench, description="Overlay benchmark"),
    normalize  = widgets.Checkbox(value=False,       description="Normalize (rebase to 100)"),
    log_scale  = widgets.Checkbox(value=False,       description="Log scale (y)")
)
def plot_close(show_bench, normalize, log_scale):
    if "df" not in globals() or df is None or df.empty:
        print("⚠️ Primary DataFrame `df` is missing or empty.")
        return

    sym   = symbol.upper() if "symbol" in globals() else "ASSET"
    bench = bench_symbol.upper() if ("bench_symbol" in globals() and bench_df is not None) else None
    iv    = f" ({interval})" if "interval" in globals() else ""

    fig, ax = plt.subplots(figsize=(11, 5))
    title = f"{sym} price{iv}"

    if show_bench and _have_bench and bench:
        merged = _align_on_dates(df, bench_df, name_a="p", name_b="b")

        if merged.empty:
            print("⚠️ No overlapping dates with benchmark — cannot overlay.")
            ts, vals = df["timestamp"], df["close"].astype(float).ffill().bfill()
            # Solo stats (return & CAGR)
            years = (ts.iloc[-1] - ts.iloc[0]).days / 365.25
            total_ret = vals.iloc[-1] / vals.iloc[0] - 1 if vals.iloc[0] != 0 else np.nan
            cagr = (1 + total_ret)**(1/years) - 1 if years > 0 and np.isfinite(total_ret) else np.nan

            if normalize:
                vals = 100.0 * vals / vals.iloc[0]
                ax.set_ylabel("index (100 = start)")
            else:
                ax.set_ylabel("price")

            ax.plot(ts, vals, lw=2, label=f"{sym} ({total_ret:+.1%}, CAGR {cagr:.2%})")
        else:
            ts = merged["timestamp"]
            p  = merged["close_p"].astype(float).to_numpy()
            b  = merged["close_b"].astype(float).to_numpy()

            # returns & CAGR on overlap
            p_ret = p[-1]/p[0] - 1 if p[0] != 0 else np.nan
            b_ret = b[-1]/b[0] - 1 if b[0] != 0 else np.nan
            years = (ts.iloc[-1] - ts.iloc[0]).days / 365.25
            p_cagr = (p[-1]/p[0])**(1/years) - 1 if years > 0 and p[0] > 0 else np.nan
            b_cagr = (b[-1]/b[0])**(1/years) - 1 if years > 0 and b[0] > 0 else np.nan

            if normalize:
                p = 100.0 * p / p[0]
                b = 100.0 * b / b[0]
                ax.set_ylabel("index (100 at start of overlap)")
            else:
                ax.set_ylabel("price")

            ax.plot(ts, p, lw=2, label=f"{sym} ({p_ret:+.1%}, CAGR {p_cagr:.2%})")
            ax.plot(ts, b, lw=1.6, alpha=0.95, label=f"{bench} ({b_ret:+.1%}, CAGR {b_cagr:.2%})")

            title += f" — vs {bench}"
            ax.text(0.01, 0.01,
                    f"Overlap: {ts.iloc[0].date()} → {ts.iloc[-1].date()}",
                    transform=ax.transAxes, fontsize=9, color="gray")
    else:
        # Primary only (still show return & CAGR)
        ts   = df["timestamp"]
        vals = df["close"].astype(float).ffill().bfill()

        years = (ts.iloc[-1] - ts.iloc[0]).days / 365.25
        total_ret = vals.iloc[-1] / vals.iloc[0] - 1 if vals.iloc[0] != 0 else np.nan
        cagr = (1 + total_ret)**(1/years) - 1 if years > 0 and np.isfinite(total_ret) else np.nan

        if normalize:
            vals = 100.0 * vals / vals.iloc[0]
            ax.set_ylabel("index (100 = start)")
        else:
            ax.set_ylabel("price")

        ax.plot(ts, vals, lw=2, label=f"{sym} ({total_ret:+.1%}, CAGR {cagr:.2%})")

    # Cosmetics
    ax.set_title(title)
    ax.set_xlabel("date")
    if log_scale:
        ax.set_yscale("log")
    ax.grid(alpha=0.3, linestyle="--")
    ax.legend(loc="best")

    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    fig.autofmt_xdate(rotation=0)
    plt.show()

interactive(children=(Checkbox(value=True, description='Overlay benchmark'), Checkbox(value=False, description…

### Step 2.3: Trading Volume

The volume chart shows how much of the asset is traded over time. This matters because large price moves on high volume indicate stronger conviction (more participants), while moves on low volume can be misleading.

What’s shown here:

	• Blue line = raw daily trading volume.
	• Orange line = rolling average of volume (default = 20 bars ≈ one month for daily data).
    • X-bar average” means the average over the last X data points. One bar = one row of your dataset (daily = 1 day, hourly = 1 hour, weekly = 1 week). So a  20-bar average on daily data ≈ 1 month of trading.
	• This helps you compare today’s activity to “typical” recent levels.
	• You can adjust the window interactively depending on your data frequency (daily, weekly, hourly).
	• Red circles = top-5 spike days (unusually high activity).

How to interpret:

	• Spikes far above the average = event days (earnings, splits, news).
	• Persistent rising average = growing participation.
	• Very low volume = quieter regimes, where signals may be less reliable.

Typical ranges for the rolling average:

	• Daily data → 10–30 bars (≈ 2–6 weeks)
	• Weekly data → 4–12 bars (≈ 1–3 months)
	• Hourly data → 24–168 bars (≈ 1–7 days)

In [7]:
# --- Trading Volume Visualization (bars + rolling averages + spikes, with overlay) ---

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import ipywidgets as widgets

# Sliders + checkbox
avg_slider   = widgets.IntSlider(value=20, min=5, max=100, step=5, description="Avg window")
topn_slider  = widgets.IntSlider(value=5,  min=3, max=20,  step=1, description="Mark top-N")
bench_check  = widgets.Checkbox(value=(bench_df is not None), description="Overlay benchmark (overlap only)")

def plot_volume(avg_window, topn, show_bench):
    fig, axes = plt.subplots(
        2 if (show_bench and bench_df is not None) else 1,
        1,
        figsize=(12, 8 if (show_bench and bench_df is not None) else 5),
        sharex=False
    )

    if not isinstance(axes, np.ndarray):
        axes = [axes]

    # --- Primary asset ---
    vol_p = df["volume"] / 1e6
    ax1 = axes[0]
    ax1.bar(df["timestamp"], vol_p, width=1.0, alpha=0.3, label=f"{symbol.upper()} volume")
    ax1.plot(df["timestamp"], vol_p.rolling(avg_window).mean(),
             lw=2, label=f"{symbol.upper()} {avg_window}-bar avg")

    top_p = vol_p.nlargest(topn)
    ax1.scatter(df.loc[top_p.index, "timestamp"], top_p.values,
                color="red", s=50, zorder=5, label=f"{symbol.upper()} top-{topn}")

    ax1.set_title(f"{symbol.upper()} — Trading volume ({interval})")
    ax1.set_ylabel("millions of shares")
    ax1.legend()
    ax1.grid(alpha=0.3, linestyle="--")

    # --- Benchmark overlay (only if overlap exists) ---
    if show_bench and bench_df is not None:
        merged = df[["timestamp","volume"]].merge(
            bench_df[["timestamp","volume"]], on="timestamp", how="inner", suffixes=("_p","_b")
        )
        if merged.empty:
            print("⚠️ No overlapping dates with benchmark.")
        else:
            ts = merged["timestamp"]
            vol_p = merged["volume_p"] / 1e6
            vol_b = merged["volume_b"] / 1e6
            ax2 = axes[1]

            # Bars
            ax2.bar(ts, vol_p, width=1.0, alpha=0.3, label=f"{symbol.upper()} volume")
            ax2.bar(ts, vol_b, width=1.0, alpha=0.3, label=f"{bench_symbol.upper()} volume")

            # Rolling averages
            ax2.plot(ts, vol_p.rolling(avg_window).mean(),
                     lw=2, label=f"{symbol.upper()} {avg_window}-bar avg")
            ax2.plot(ts, vol_b.rolling(avg_window).mean(),
                     lw=2, label=f"{bench_symbol.upper()} {avg_window}-bar avg")

            # Top spikes
            top_p = vol_p.nlargest(topn)
            top_b = vol_b.nlargest(topn)
            ax2.scatter(ts.iloc[top_p.index], top_p.values,
                        color="red", s=40, label=f"{symbol.upper()} top-{topn}")
            ax2.scatter(ts.iloc[top_b.index], top_b.values,
                        color="orange", s=40, label=f"{bench_symbol.upper()} top-{topn}")

            ax2.set_title(f"Volume overlay — {symbol.upper()} vs {bench_symbol.upper()}  |  "
                          f"Overlap: {ts.min().date()} → {ts.max().date()}")
            ax2.set_ylabel("millions of shares")
            ax2.set_xlabel("date")
            ax2.legend()
            ax2.grid(alpha=0.3, linestyle="--")

    # Cosmetics for all axes
    for ax in axes:
        ax.xaxis.set_major_locator(mdates.YearLocator())
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))

    plt.tight_layout()
    plt.show()

# Interactive widget
widgets.interact(
    plot_volume,
    avg_window=avg_slider,
    topn=topn_slider,
    show_bench=bench_check
)

interactive(children=(IntSlider(value=20, description='Avg window', min=5, step=5), IntSlider(value=5, descrip…

<function __main__.plot_volume(avg_window, topn, show_bench)>

### Step 2.4: Cumulative Returns with Risk Metrics

This chart shows how an initial investment would have grown (or shrunk) over time if you bought and held the asset.  
It also adds risk insights like **drawdowns** (losses from peaks) and the **CAGR** (average annual growth).

What it does:
- Start with your chosen initial investment (e.g., $1,000).
- Each day, the price moves up or down. We calculate the daily return (% change from yesterday).
- Returns are **compounded** — every gain or loss builds on the prior day’s value.
- This creates a portfolio curve showing how your money evolves over time.
- On top of growth, we shade drawdown periods (the drops from prior peaks) and calculate key stats.

Key terms explained:
- **Compounded Growth**: Returns multiply over time. Example → +10% then –10% leaves you at –1%, not 0.
- **Drawdown**: How much you fall from the latest peak. Big drawdowns = rough ride for investors.
- **CAGR (Compound Annual Growth Rate)**: The “smoothed” yearly growth rate that would turn your start value into the final value, as if it grew steadily.
- **Max Drawdown**: The worst % drop from a peak during the period — a key risk measure.

How to read the chart:
- **Purple line** = portfolio growth (buy & hold performance).
- **Shaded areas** = drawdowns (losses from peaks).
- **Metrics box** (bottom-right) = Final $, total % return, CAGR, and Max Drawdown.
- **Steady upward slope** → consistent growth.  
- **Sharp drops** → crashes or corrections.  
- **Flat periods** → stagnation.  
- **Start date slider** → shows how entry timing changes your outcome.  
- **Log scale toggle** → makes exponential growth periods easier to compare.

Finance note ⚖️:
- Using **Close prices** = raw market performance (ignores dividends).  
- Using **Adjusted Close** = assumes dividends reinvested and stock splits accounted for → truer “investor returns.”  
- That’s why numbers here may differ from Google/Yahoo charts, which usually use Adjusted Close.

👉 Example:  
If you invested $1,000 in Apple in 2010 and it grew to ~$36,000 by 2025, that’s a +3500% return.  
The CAGR would be ~24% per year — meaning, on average, it’s like compounding 24% every year, even though the actual path had big ups and downs.

In [8]:
from ipywidgets import interact
import ipywidgets as widgets
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd

def _get_bench():
    """Return (bench_df, bench_label) if a usable benchmark is in memory; else (None, None)."""
    bdf = globals().get("bench_df", None)
    if isinstance(bdf, pd.DataFrame) and not bdf.empty:
        return bdf, globals().get("bench_symbol", "Benchmark")
    return None, None

# Date slider options (built from primary df)
date_options = [(ts.strftime("%Y-%m-%d"), ts) for ts in df["timestamp"].unique()]
default_date = df["timestamp"].iloc[0]

@interact(
    invest=widgets.IntSlider(value=1000, min=100, max=20000, step=100, description="Invest $"),
    start=widgets.SelectionSlider(options=date_options, value=default_date,
                                  description="Start", continuous_update=True),
    log_scale=widgets.Checkbox(value=False, description="Log scale (y)"),
    show_stats=widgets.Checkbox(value=True, description="Show stats"),
    # default reflects what's in memory *right now*
    show_bench=widgets.Checkbox(value=(_get_bench()[0] is not None), description="Overlay benchmark")
)
def plot_cumulative(invest, start, log_scale, show_stats, show_bench):
    # Slice
    sub = df[df["timestamp"] >= start].copy()
    if sub.empty:
        print("No data from this start date.")
        return

    # Returns & compounding
    sub["ret"] = sub["close"].pct_change().fillna(0.0)
    sub["cum"] = (1.0 + sub["ret"]).cumprod()
    sub["portfolio"] = invest * sub["cum"]

    # Audit & CAGR
    cum_ratio_end = sub["close"].iloc[-1] / sub["close"].iloc[0]
    if abs(sub["cum"].iloc[-1] - cum_ratio_end) > 1e-3:
        print("⚠️ Cumprod and price-ratio disagree. Check gaps/NaNs or whether 'close' is adjusted.")
    years = (sub["timestamp"].iloc[-1] - sub["timestamp"].iloc[0]).days / 365.25
    cagr  = cum_ratio_end**(1/years) - 1 if years > 0 else float("nan")

    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(sub["timestamp"], sub["portfolio"], color="purple", lw=2, label=f"Growth of ${invest:,}")

    # Drawdown shading
    runmax   = sub["portfolio"].cummax()
    drawdown = sub["portfolio"] / runmax - 1.0
    ax.fill_between(sub["timestamp"], sub["portfolio"], runmax,
                    where=sub["portfolio"] < runmax, alpha=0.15, label="Drawdown")

    # Worst DD band & label (only when stats on)
    worst_dd = float(drawdown.min()) if np.isfinite(drawdown.min()) else 0.0
    if show_stats and np.isfinite(worst_dd) and worst_dd < 0:
        dd_vals = drawdown.values
        trough_idx = int(np.argmin(dd_vals))
        runmax_vals = runmax.values
        peak_idx = int(np.argmax(runmax_vals[:trough_idx+1]))
        t0 = sub["timestamp"].iloc[peak_idx]
        t1 = sub["timestamp"].iloc[trough_idx]
        ax.axvspan(t0, t1, color="red", alpha=0.08, zorder=0)
        ax.text(sub["timestamp"].iloc[0], ax.get_ylim()[1]*0.98,
                f"Worst DD: {worst_dd:.1%}  ({t0.date()} → {t1.date()})",
                fontsize=9, color="#b04040", va="top")

    # Start / End
    start_val = float(sub["portfolio"].iloc[0])
    end_val   = float(sub["portfolio"].iloc[-1])
    pct_total = (end_val / start_val - 1.0) * 100.0
    ax.scatter(sub["timestamp"].iloc[0], start_val, color="green", s=60, zorder=3, label=f"Start: ${start_val:,.2f}")
    ax.scatter(sub["timestamp"].iloc[-1], end_val,   color="red",   s=60, zorder=3, label=f"End: ${end_val:,.2f}")
    ax.text(sub["timestamp"].iloc[-1], end_val, f"  Final: ${end_val:,.2f}  ({pct_total:+.1f}%)",
            va="center", fontsize=10, color="red")

    # Benchmark overlay (look up the live global every time)
    bench_cagr = None
    if show_bench:
        bdf, blabel = _get_bench()
        if bdf is None:
            # Gentle hint on the chart rather than a noisy print
            ax.text(0.99, 0.98, "No benchmark loaded", transform=ax.transAxes,
                    ha="right", va="top", fontsize=9, color="gray")
        else:
            bsub = bdf[bdf["timestamp"] >= start]
            merged = sub[["timestamp","close"]].rename(columns={"close":"asset"}).merge(
                bsub.rename(columns={"close":"bench"}), on="timestamp", how="inner"
            )
            if not merged.empty:
                asset_ix = merged["asset"] / merged["asset"].iloc[0]
                bench_ix = merged["bench"] / merged["bench"].iloc[0]
                ax.plot(merged["timestamp"], invest * bench_ix, lw=1.8, alpha=0.9, label=blabel)
                if years > 0:
                    bench_cagr = bench_ix.iloc[-1]**(1/years) - 1

    # Metrics box
    if show_stats:
        lines = [
            f"Final: ${end_val:,.0f}",
            f"Total: {pct_total:+.1f}%",
            f"CAGR:  {cagr:.2%}",
            f"Max DD: {worst_dd:.1%}",
        ]
        if bench_cagr is not None and np.isfinite(bench_cagr):
            lines.append(f"vs {globals().get('bench_symbol','Bench')} CAGR: {(cagr - bench_cagr):+.2%} (Δ)")
        ax.text(0.985, 0.02, "\n".join(lines),
                transform=ax.transAxes, ha="right", va="bottom",
                fontsize=9, bbox=dict(facecolor="white", alpha=0.85, boxstyle="round,pad=0.3"))

    # Cosmetics
    title_tag = "Adj Close" if ("Adj Close" in df.columns and df["close"].equals(df["Adj Close"])) else "Close"
    ttl = f"Cumulative Returns (Compounded Growth)\n{title_tag}; CAGR: {cagr:.2%}"
    if bench_cagr is not None and np.isfinite(bench_cagr):
        ttl += f" | {globals().get('bench_symbol','Bench')} CAGR: {bench_cagr:.2%}"
    ax.set_title(ttl)
    ax.set_xlabel("Date"); ax.set_ylabel("Portfolio Value ($)")
    if log_scale: ax.set_yscale("log")
    ax.grid(alpha=0.3, linestyle="--"); ax.legend(loc="best")
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    ax.text(0.01, 0.01, f"Source: {title_tag} | Rebased from {sub['timestamp'].iloc[0].date()}",
            transform=ax.transAxes, fontsize=9, color="gray")
    plt.show()

interactive(children=(IntSlider(value=1000, description='Invest $', max=20000, min=100, step=100), SelectionSl…

### Cumulative + DCA

In [9]:
# --- Cumulative returns with DCA (lump sum + recurring contributions) ---
# What this cell does:
# - Lets you choose an initial investment AND a recurring contribution (e.g., $100/month)
# - Snaps each contribution to the next available trading day in your data
# - Buys fractional shares at those dates; portfolio value = shares * price over time
# - Shows Total Invested, Final Value, P/L, Multiple, and money-weighted return (XIRR)
# - Optional benchmark overlay runs the EXACT same contribution schedule on the benchmark
# - Works with your existing df (primary) and optional bench_df prepared earlier

from ipywidgets import interact
import ipywidgets as widgets
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# ---------- helpers ----------

def _get_bench():
    """Return (bench_df, bench_label) if a usable benchmark is in memory; else (None, None)."""
    bdf = globals().get("bench_df", None)
    if isinstance(bdf, pd.DataFrame) and not bdf.empty:
        return bdf, globals().get("bench_symbol", "Benchmark").upper()
    return None, None

def _next_trading_on_or_after(ts_series: pd.Series, d: pd.Timestamp):
    """Find the first index in ts_series where timestamp >= d. Returns index or None."""
    i = ts_series.searchsorted(d)
    if i >= len(ts_series):
        return None
    return int(i)

def _build_schedule(start_ts: pd.Timestamp,
                    end_ts:   pd.Timestamp,
                    freq:     str = "M",
                    day:      int = 1):
    """
    Build nominal contribution dates between start and end:
    - freq: 'M' (monthly), 'W' (weekly), 'Q' (quarterly)
    - day: day-of-month (1..28) used for monthly/quarterly anchors
    """
    if freq == "W":
        dates = pd.date_range(start_ts.normalize(), end_ts.normalize(), freq="W")
    else:
        # monthly/quarterly: use month starts then shift to desired day
        base = pd.date_range(start_ts.normalize(), end_ts.normalize(),
                             freq=("QS" if freq == "Q" else "MS"))
        # clamp day to [1,28] to avoid month-end issues
        day = int(np.clip(day, 1, 28))
        dates = []
        for d in base:
            dates.append(pd.Timestamp(year=d.year, month=d.month, day=day))
        dates = pd.to_datetime(dates)
    # ensure we don't include the very first date if it's exactly the start (we'll buy lump-sum at t0)
    return dates[dates > start_ts.normalize()]

def _simulate_dca(sub: pd.DataFrame,
                  initial: float,
                  contrib: float,
                  freq:    str = "M",
                  day:     int = 1):
    """
    Vectorized DCA on a single price series (sub with columns timestamp, close).
    Returns: portfolio Series, shares Series, contributions DataFrame, totals dict.
    """
    sub = sub.copy()
    ts  = sub["timestamp"].reset_index(drop=True)
    px  = sub["close"].astype(float).reset_index(drop=True)
    n   = len(sub)
    buys_shares = np.zeros(n, dtype=float)

    # Initial buy at first available bar
    if n == 0:
        raise ValueError("Empty sub-series for DCA simulation.")
    buys_shares[0] += (initial / px.iloc[0]) if px.iloc[0] > 0 else 0.0

    # Build nominal schedule and snap to trading days
    sched_nom = _build_schedule(ts.iloc[0], ts.iloc[-1], freq=freq, day=day)
    snapped = []
    for d in sched_nom:
        j = _next_trading_on_or_after(ts, d)
        if j is not None:
            snapped.append((j, ts.iloc[j]))
    snapped = list(dict.fromkeys(snapped))  # dedupe if multiple map to same bar

    # Add recurring buys
    for j, _dt in snapped:
        if px.iloc[j] > 0:
            buys_shares[j] += contrib / px.iloc[j]

    shares = pd.Series(buys_shares).cumsum()
    portfolio = shares * px

    # Cashflow table for XIRR (negative = cash out, positive = cash in at the *end*)
    cashflows = []
    cashflows.append((ts.iloc[0].to_pydatetime(), -float(initial)))
    for j, _dt in snapped:
        cashflows.append((ts.iloc[j].to_pydatetime(), -float(contrib)))
    cashflows.append((ts.iloc[-1].to_pydatetime(), float(portfolio.iloc[-1])))

    totals = {
        "total_invested": float(initial + contrib * len(snapped)),
        "final_value":    float(portfolio.iloc[-1]),
        "contrib_count":  int(len(snapped))
    }
    return portfolio, shares, pd.DataFrame({"timestamp":[d for _,d in snapped]}), totals, cashflows

def _xirr(cashflows, guess=0.15, max_iter=100, tol=1e-6):
    """
    Simple XIRR solver via Newton-Raphson.
    cashflows: list of (datetime, amount) with positives = inflows (final value), negatives = investments
    Returns annualized rate, or np.nan if it fails.
    """
    if len(cashflows) < 2:
        return np.nan
    t0 = cashflows[0][0]
    def npv(rate):
        v = 0.0
        for dt, amt in cashflows:
            y = (dt - t0).days / 365.25
            v += amt / ((1 + rate) ** y)
        return v
    def dnpv(rate):
        v = 0.0
        for dt, amt in cashflows:
            y = (dt - t0).days / 365.25
            if rate == -1:  # avoid blow-up
                return np.inf
            v += -y * amt / ((1 + rate) ** (y + 1))
        return v
    r = guess
    for _ in range(max_iter):
        f = npv(r)
        df = dnpv(r)
        if df == 0:
            return np.nan
        r_new = r - f/df
        if abs(r_new - r) < tol:
            return r_new
        r = r_new
    return np.nan

# ---------- interactive plot ----------

# Date slider from df
date_options = [(ts.strftime("%Y-%m-%d"), ts) for ts in df["timestamp"].unique()]
default_date = df["timestamp"].iloc[0]

@interact(
    start       = widgets.SelectionSlider(options=date_options, value=default_date, description="Start", continuous_update=True),
    initial_usd = widgets.IntSlider(value=1000, min=0, max=20000, step=100, description="Initial ($)"),
    contrib_usd = widgets.IntSlider(value=100,  min=0, max=5000,  step=25,  description="Contrib ($)"),
    freq        = widgets.Dropdown(options=[("Monthly","M"), ("Weekly","W"), ("Quarterly","Q")], value="M", description="Frequency"),
    day_of_mo   = widgets.IntSlider(value=1, min=1, max=28, step=1, description="Day (M/Q)"),
    log_scale   = widgets.Checkbox(value=False, description="Log scale (y)"),
    show_bench  = widgets.Checkbox(value=(_get_bench()[0] is not None), description="Overlay benchmark"),
    show_stats  = widgets.Checkbox(value=True, description="Show stats")
)
def plot_cumulative_dca(start, initial_usd, contrib_usd, freq, day_of_mo, log_scale, show_bench, show_stats):
    # Primary slice
    sub = df[df["timestamp"] >= start][["timestamp","close"]].copy()
    if sub.empty:
        print("No data from this start date.")
        return

    # Simulate DCA on primary
    port, shares, sched_df, totals, cashflows = _simulate_dca(
        sub, initial=initial_usd, contrib=contrib_usd, freq=freq, day=day_of_mo
    )
    # Price-only CAGR (for reference)
    years = (sub["timestamp"].iloc[-1] - sub["timestamp"].iloc[0]).days / 365.25
    price_mult = float(sub["close"].iloc[-1] / sub["close"].iloc[0])
    cagr_price = (price_mult**(1/years) - 1) if years > 0 else np.nan
    # Money-weighted return
    xirr = _xirr(cashflows)

    # Plot
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(sub["timestamp"], port, color="purple", lw=2, label=f"Portfolio (DCA)")

    # Drawdown shading
    runmax   = port.cummax()
    drawdown = port / runmax - 1.0
    ax.fill_between(sub["timestamp"], port, runmax, where=(port < runmax), alpha=0.15, label="Drawdown")

    # Mark contribution dots (optional: big markers so users see schedule)
    if len(sched_df):
        # snap contributions to plotted values
        contrib_idx = sub["timestamp"].searchsorted(sched_df["timestamp"].values)
        contrib_idx = contrib_idx[contrib_idx < len(sub)]
        ax.scatter(sub["timestamp"].iloc[contrib_idx], port.iloc[contrib_idx],
                   s=20, color="gray", alpha=0.6, label="Contrib points")

    # Start/End markers + final label
    ax.scatter(sub["timestamp"].iloc[0], port.iloc[0], color="green", s=60, zorder=3, label=f"Start: ${port.iloc[0]:,.2f}")
    ax.scatter(sub["timestamp"].iloc[-1], port.iloc[-1], color="red", s=60, zorder=3, label=f"End: ${port.iloc[-1]:,.2f}")
    ax.text(sub["timestamp"].iloc[-1], port.iloc[-1],
            f"  Final: ${port.iloc[-1]:,.2f}", va="center", fontsize=10, color="red")

    # Optional: run the same schedule on the benchmark
    bench_xirr = None
    bench_label = None
    if show_bench:
        bdf, bench_label = _get_bench()
        if bdf is None:
            ax.text(0.99, 0.98, "No benchmark loaded", transform=ax.transAxes,
                    ha="right", va="top", fontsize=9, color="gray")
        else:
            bsub = bdf[bdf["timestamp"] >= start][["timestamp","close"]].copy()
            if not bsub.empty:
                b_port, b_shares, b_sched_df, b_totals, b_cashflows = _simulate_dca(
                    bsub, initial=initial_usd, contrib=contrib_usd, freq=freq, day=day_of_mo
                )
                ax.plot(bsub["timestamp"], b_port, lw=1.8, alpha=0.9, label=f"{bench_label} (DCA)")
                bench_xirr = _xirr(b_cashflows)

    # Cosmetics
    title_tag = "Adj Close" if ("Adj Close" in df.columns and df["close"].equals(df.get("Adj Close", df["close"]))) else "Close"
    title = f"Cumulative Returns (DCA: lump + recurring)\n{title_tag}; Price CAGR: {cagr_price:.2%} | XIRR: {xirr:.2%}"
    if bench_xirr is not None and np.isfinite(bench_xirr):
        title += f" | {bench_label} XIRR: {bench_xirr:.2%}"
    ax.set_title(title)
    ax.set_xlabel("Date"); ax.set_ylabel("Portfolio Value ($)")
    if log_scale: ax.set_yscale("log")
    ax.grid(alpha=0.3, linestyle="--")
    ax.legend(loc="upper left", frameon=True)
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    ax.text(0.01, 0.01,
            f"Invested: ${totals['total_invested']:,.0f}  |  Contribs: {totals['contrib_count']}  |  Multiple: {totals['final_value']/max(totals['total_invested'],1):.2f}×",
            transform=ax.transAxes, fontsize=9, color="gray")

    # Optional stats box with drawdown and P/L
    if show_stats:
        max_dd = float(drawdown.min()) if np.isfinite(drawdown.min()) else 0.0
        pnl = totals["final_value"] - totals["total_invested"]
        box = (f"Final: ${totals['final_value']:,.0f}\n"
               f"Total Invested: ${totals['total_invested']:,.0f}\n"
               f"P/L: {pnl:+,.0f}\n"
               f"Max DD: {max_dd:.1%}")
        ax.text(0.985, 0.02, box,
                transform=ax.transAxes, ha="right", va="bottom",
                fontsize=9, bbox=dict(facecolor="white", alpha=0.85, boxstyle="round,pad=0.3"))

    plt.show()

interactive(children=(SelectionSlider(description='Start', options=(('2010-01-04', Timestamp('2010-01-04 00:00…

### Step 2.5: Candle Stick

In [19]:
# === Pro Candles: resample, x-unified hover, MA, range tools, separate benchmark ===

import pandas as pd, numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, clear_output

# ---------- helpers ----------
def _prep(_df: pd.DataFrame) -> pd.DataFrame:
    d = _df.copy()
    d["timestamp"] = pd.to_datetime(d["timestamp"]).dt.tz_localize(None)
    d = d.sort_values("timestamp").drop_duplicates("timestamp").reset_index(drop=True)
    if "Adj Close" in d.columns and not d["Adj Close"].isna().all():
        d["close"] = d["Adj Close"]
    return d

def _resample_ohlcv(d: pd.DataFrame, rule: str) -> pd.DataFrame:
    """rule in {'Original','W','M'}"""
    if rule in (None, "", "Original", "D"):
        return d.copy()
    agg = {"open":"first","high":"max","low":"min","close":"last"}
    if "volume" in d.columns:
        agg["volume"] = "sum"
    s = d.set_index("timestamp").sort_index().resample(rule).apply(agg)
    s = s.dropna(subset=["open","high","low","close"]).reset_index()
    return s

def _vol_scale_and_label(y: pd.Series | None):
    if y is None or len(y)==0: return 1.0, "units"
    m = float(np.nanmax(y))
    if m >= 1e9: return 1e9, "billions"
    if m >= 1e6: return 1e6, "millions"
    if m >= 1e3: return 1e3, "thousands"
    return 1.0, "units"

def _make_price_panel(d: pd.DataFrame, *, title:str, chart_type:str,
                      log_y:bool, show_ma:bool, ma_s:int, ma_l:int) -> go.Figure:
    have_vol = "volume" in d.columns
    vol_scale, vol_label = _vol_scale_and_label(d["volume"] if have_vol else None)

    # Hover text with OHLC values for candlesticks (Plotly 6 has no hovertemplate on Candlestick)
    ohlc_text = [f"O {o:.2f} | H {h:.2f} | L {l:.2f} | C {c:.2f}"
                 for o,h,l,c in zip(d["open"], d["high"], d["low"], d["close"])]

    fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                        row_heights=[0.70, 0.30], vertical_spacing=0.06)

    price_kw = dict(x=d["timestamp"], open=d["open"], high=d["high"],
                    low=d["low"], close=d["close"], name="OHLC",
                    increasing_line_color="#16a34a", decreasing_line_color="#ef4444")

    if chart_type == "ohlc":
        fig.add_trace(go.Ohlc(**price_kw), row=1, col=1)
    else:
        fig.add_trace(go.Candlestick(**price_kw, text=ohlc_text, hoverinfo="x+name+text"),
                      row=1, col=1)

    if show_ma:
        if ma_s>1: fig.add_trace(go.Scatter(x=d["timestamp"], y=d["close"].rolling(ma_s).mean(),
                                            name=f"MA{ma_s}", mode="lines",
                                            line=dict(width=1.5, color="#3b82f6")), row=1, col=1)
        if ma_l>1: fig.add_trace(go.Scatter(x=d["timestamp"], y=d["close"].rolling(ma_l).mean(),
                                            name=f"MA{ma_l}", mode="lines",
                                            line=dict(width=1.5, color="#f59e0b")), row=1, col=1)

    if have_vol:
        up = d["close"] >= d["open"]
        vol_colors = np.where(up, "rgba(34, 197, 94, 0.45)", "rgba(239, 68, 68, 0.45)")
        fig.add_trace(go.Bar(x=d["timestamp"], y=d["volume"]/vol_scale,
                             marker_color=vol_colors, name=f"Volume ({vol_label})"),
                      row=2, col=1)

    # Layout & interaction
    fig.update_layout(
        title=title,
        template="plotly_white",
        hovermode="x unified",                 # single vertical hover line
        legend=dict(orientation="h", y=-0.18, x=0),  # keep clear of range buttons
        margin=dict(l=50, r=30, t=60, b=80),
        height=500,   # 👈 increase this number (default ~450)
        xaxis_rangeslider_visible=True,
        xaxis_rangeselector=dict(
            buttons=[
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=3, label="3m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="YTD", step="year",  stepmode="todate"),
                dict(count=1, label="1y",  step="year",  stepmode="backward"),
                dict(label="All", step="all"),
            ]
        ),
    )
    fig.update_yaxes(title_text="price", type=("log" if log_y else "linear"), row=1, col=1)
    fig.update_yaxes(title_text=f"volume ({vol_label})" if have_vol else "volume", row=2, col=1)

    # nice hover spike
    fig.update_xaxes(showspikes=True, spikemode="across", spikesnap="cursor",
                     spikedash="dot", spikethickness=1)
    return fig

# ---------- normalize inputs ----------
df = _prep(df)
bench_df = _prep(bench_df) if ("bench_df" in globals() and isinstance(bench_df, pd.DataFrame)) else None
bench_symbol = globals().get("bench_symbol","Benchmark").upper()
asset_label  = globals().get("symbol","ASSET").upper()

# ---------- widgets ----------
resample_dd = widgets.Dropdown(options=[("Original","Original"), ("Weekly","W"), ("Monthly","M")],
                               value="Original", description="Candles", layout=widgets.Layout(width="210px"))
type_dd     = widgets.Dropdown(options=[("Candlestick","candle"), ("OHLC","ohlc")],
                               value="candle", description="Type", layout=widgets.Layout(width="210px"))
log_cb      = widgets.Checkbox(value=False, description="Log scale (y)")
show_ma_cb  = widgets.Checkbox(value=True, description="Show MAs")
ma_s_sl     = widgets.IntSlider(value=20, min=5, max=60, step=1, description="MA short")
ma_l_sl     = widgets.IntSlider(value=50, min=10, max=200, step=5, description="MA long")
sep_bench_cb= widgets.Checkbox(value=False, description="Separate benchmark figure")
bench_on_cb = widgets.Checkbox(value=(bench_df is not None), description="Use benchmark")

def _keep_ma_sensible(*_):
    if ma_s_sl.value >= ma_l_sl.value:
        ma_l_sl.value = max(ma_s_sl.value+5, ma_l_sl.value)
ma_s_sl.observe(_keep_ma_sensible, "value")

out_main  = widgets.Output()
out_bench = widgets.Output()

def _render(*_):
    with out_main:
        clear_output(wait=True)
        base = _resample_ohlcv(df, resample_dd.value)
        fig_main = _make_price_panel(
            base,
            title=f"{asset_label} — Candlestick ({'Original' if resample_dd.value=='Original' else resample_dd.value})",
            chart_type=type_dd.value, log_y=log_cb.value,
            show_ma=show_ma_cb.value, ma_s=ma_s_sl.value, ma_l=ma_l_sl.value
        )

        # overlay benchmark or not
        if bench_on_cb.value and bench_df is not None and not sep_bench_cb.value:
            b = _resample_ohlcv(bench_df, resample_dd.value)
            merged = base[["timestamp","close"]].merge(
                b[["timestamp","close"]].rename(columns={"close":"bench"}), on="timestamp", how="inner"
            )
            if not merged.empty:
                # normalized line for fair comparison
                idx = 100.0 * merged["bench"] / merged["bench"].iloc[0]
                fig_main.add_trace(go.Scatter(x=merged["timestamp"], y=idx,
                                              name=bench_symbol, mode="lines",
                                              line=dict(width=1.8, color="#0ea5e9")), row=1, col=1)
                fig_main.update_yaxes(title_text="price / index (100)", row=1, col=1)
        fig_main.show(config={"displaylogo": False, "toImageButtonOptions": {"format": "png"}})

    # separate benchmark figure (full chart) if requested
    with out_bench:
        clear_output(wait=True)
        if bench_on_cb.value and bench_df is not None and sep_bench_cb.value:
            b = _resample_ohlcv(bench_df, resample_dd.value)
            fig_b = _make_price_panel(
                b, title=f"{bench_symbol} — Candlestick ({'Original' if resample_dd.value=='Original' else resample_dd.value})",
                chart_type=type_dd.value, log_y=log_cb.value,
                show_ma=show_ma_cb.value, ma_s=ma_s_sl.value, ma_l=ma_l_sl.value
            )
            fig_b.show(config={"displaylogo": False, "toImageButtonOptions": {"format": "png"}})

controls = widgets.VBox([
    widgets.HBox([resample_dd, type_dd, log_cb]),
    widgets.HBox([show_ma_cb, ma_s_sl, ma_l_sl]),
    widgets.HBox([bench_on_cb, sep_bench_cb]),
])

for w in [resample_dd, type_dd, log_cb, show_ma_cb, ma_s_sl, ma_l_sl, bench_on_cb, sep_bench_cb]:
    w.observe(_render, "value")

display(controls, out_main, out_bench)
_render()

VBox(children=(HBox(children=(Dropdown(description='Candles', layout=Layout(width='210px'), options=(('Origina…

Output()

Output()