# Volatility Scaling & Portfolio Analysis

This notebook demonstrates how to:
1. Imports, Data Loader and Rf Detector
2. Select fund (month period logic)
3. Weight prep
4. Core Stats + Run Analysis
5. Export
6. Widget /UI
7. Output in-sample and out-of-sample results to Excel with formatting.

In [1]:
# Legacy helper and metric functions replaced by modules
# See trend_analysis.metrics and run_analysis.py


In [2]:
import pandas as pd
import numpy as np
import logging
import inspect
from collections import namedtuple
from typing import Dict, Optional, Callable
import ipywidgets as widgets
from ipyfilechooser import FileChooser
from IPython.display import display, clear_output
from trend_analysis.data import load_csv, identify_risk_free_fund
from trend_analysis.core.rank_selection import (
    FundSelectionConfig,
    RiskStatsConfig,
    select_funds,
    register_metric,
    METRIC_REGISTRY,
)
from trend_analysis.export import make_summary_formatter


## 2. Select Funds

In [3]:
# ===============================================================
# 2 · SELECT_FUNDS  (restored ≤ 3-missing-months rule)
# ===============================================================

cfg = FundSelectionConfig(
    max_missing_months           = 3,
    max_consecutive_month_gap    = 6,
    outlier_threshold            = 0.5,
    zero_return_threshold        = 0.2,
    enforce_monotonic_index      = True,
    allow_duplicate_dates        = False,
    max_missing_ratio            = 0.05,
    max_drawdown                 = 0.3,
    min_volatility               = 0.05,
    max_volatility               = 1.0,
    min_avg_return               = 0.0,
    max_skewness                 = 3.0,
    max_kurtosis                 = 10.0,
    expected_freq                = "B",
    max_gap_days                 = 3,
    min_aum_usd                  = 1e7,
)

def select_funds(
    df: pd.DataFrame,
    rf_col: str,
    fund_columns: list[str],
    in_sdate: str,
    in_edate: str,
    out_sdate: str,
    out_edate: str,
    cfg: FundSelectionConfig,
    selection_mode: str = "all",
    random_n: int | None = None
) -> list[str]:
    """
    Select eligible funds with additional data-validity and coverage checks driven by FundSelectionConfig.
    """
    # Ensure Date is sorted
    df = df.sort_values("Date")  # guarantee monotonic index

    # Prepare monthly periods within analysis window
    df["Month"] = df["Date"].dt.to_period("M")
    span = pd.period_range(
        pd.Period(in_sdate, "M"), pd.Period(out_edate, "M"), freq="M"
    )

    eligible_funds: list[str] = []
    for f in fund_columns:
        try:
            ser = df.set_index("Date")[f]
            clean = ser.dropna()

            # 1. Implausible value limits
            if not clean.between(-cfg.implausible_value_limit, cfg.implausible_value_limit).all():
                raise ValueError(f"Values outside ±{cfg.implausible_value_limit}")

            # 2. Extreme outlier threshold
            if (clean.abs() > cfg.outlier_threshold).any():
                raise ValueError(f"Outliers beyond ±{cfg.outlier_threshold}")

            # 3. Excessive zero-return rate
            if (clean == 0).mean() > cfg.zero_return_threshold:
                raise ValueError(f"Zero-return proportion > {cfg.zero_return_threshold}")

            # 4. Monotonic date index
            if cfg.enforce_monotonic_index and not clean.index.is_monotonic_increasing:
                raise ValueError("Date index not monotonically increasing")

            # 5. Duplicate dates
            if not cfg.allow_duplicate_dates and clean.index.duplicated().any():
                raise ValueError("Duplicate dates detected in index")

            # 6. Coverage checks using config thresholds
            m_ok = df.groupby("Month")[f].apply(lambda col: col.notna().any())
            mask = m_ok.reindex(span, fill_value=False).to_numpy()

            # tolerance for missing months per-cfg
            missing_count = (~mask).sum()
            if missing_count > cfg.max_missing_months:
                raise ValueError(f"Missing-month count {missing_count} exceeds {cfg.max_missing_months}")

            # maximum run of consecutive missing months per-cfg with guard
            temp = np.flatnonzero(np.r_[True, mask, True])
            if temp.size <= 1:
                gap = 0
            else:
                gap = np.diff(temp).max() - 1
            if gap > cfg.max_consecutive_month_gap:
                raise ValueError(f"Consecutive-missing gap {gap} exceeds {cfg.max_consecutive_month_gap}")

            eligible_funds.append(f)

        except ValueError:
            continue
        except KeyError:
            continue
        except Exception:
            continue

    # Final selection-mode logic
    if selection_mode == "all" or random_n is None:
        return eligible_funds
    if selection_mode == "random":
        if random_n > len(eligible_funds):
            raise ValueError(
                f"random_n exceeds eligible pool: {random_n} > {len(eligible_funds)}"
            )
        return list(np.random.choice(eligible_funds, random_n, replace=False))

    raise ValueError(f"Unsupported selection_mode '{selection_mode}'")



## 3. Weight Prep

In [4]:
# ───────────────────────────────────────────────────────────────
#  3 · WEIGHT PREP
# ───────────────────────────────────────────────────────────────
def prepare_weights(selected: list[str],
                    custom: Dict[str, int] | None) -> tuple[Dict[str, float], np.ndarray]:
    if not custom:
        w = {f: 1/len(selected) for f in selected}
    else:
        missing = [f for f in selected if f not in custom]
        if missing:
            raise ValueError(f"Missing weights for {missing}")
        w = {f: pct/100 for f, pct in custom.items()}
        if abs(sum(w.values()) - 1) > 1e-6:
            raise ValueError("Custom weights must sum to 100.")
    vec = np.array([w[f] for f in selected])
    return w, vec


## 4. Analysis (In-Sample & Out-of-Sample)
The `run_analysis` function orchestrates the entire process:
- Function definitions
- Validates date inputs.
- Converts 'Date' column.
- Identifies risk-free column.
- Fills short gaps.
- Selects funds.
- Computes in-sample scaling factors and applies them in- and out-of-sample.
- Computes individual fund stats and portfolio stats.

In [5]:
# ===============================================================
# 4 · CORE STATS  +  RUN_ANALYSIS  (helpers included, weight fix)
# ===============================================================

M_PER_YEAR = 12           # constant used across helpers

# ---------- helpers --------------------------------------------
def _ensure_dt(df: pd.DataFrame) -> pd.DataFrame:
    """Return a copy whose Date column is datetime64[ns]."""
    if pd.api.types.is_datetime64_any_dtype(df["Date"]):
        return df
    df = df.copy()
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
    df.dropna(subset=["Date"], inplace=True)
    return df

# 3. Metric function definitions
# === Metric Function Definitions with flexible annualization ===
@register_metric("AnnualReturn")
def compute_annual_return(
    returns: pd.Series,
    periods_per_year: int = 252,
) -> float:
    """
    Geometric annualized return based on periods_per_year.
    """
    r = returns.dropna()
    if r.empty:
        return np.nan
    total_growth = (1 + r).prod()
    n_periods = len(r)
    return total_growth ** (periods_per_year / n_periods) - 1

@register_metric("Volatility")
def compute_volatility(
    returns: pd.Series,
    periods_per_year: int = 252,
) -> float:
    """
    Annualized standard deviation of returns with flexible scaling.
    """
    r = returns.dropna()
    if r.empty:
        return 0.0
    return r.std(ddof=0) * np.sqrt(periods_per_year)

@register_metric("Sharpe")
def compute_sharpe(
    returns: pd.Series,
    risk_free: float = 0.0,
    periods_per_year: int = 252,
) -> float:
    """
    Sharpe ratio using flexible annualized return and volatility.
    """
    vol = compute_volatility(returns, periods_per_year=periods_per_year)
    if vol == 0:
        return np.nan
    ann_ret = compute_annual_return(returns, periods_per_year=periods_per_year)
    return (ann_ret - risk_free) / vol

@register_metric("Sortino")
def compute_sortino(
    returns: pd.Series,
    risk_free: float = 0.0,
    periods_per_year: int = 252,
) -> float:
    """
    Sortino ratio using flexible annualized return and downside deviation.
    """
    r = returns.dropna()
    if r.empty:
        return np.nan
    ann_ret = compute_annual_return(returns, periods_per_year=periods_per_year)
    # Define per-period risk-free rate
    period_rf = risk_free / periods_per_year
    excess = r - period_rf
    downside = excess[excess < 0]
    if downside.empty:
        return np.nan
    down_dev = np.sqrt((downside ** 2).mean()) * np.sqrt(periods_per_year)
    if down_dev == 0:
        return np.nan
    return (ann_ret - risk_free) / down_dev

@register_metric("MaxDrawdown")
def compute_max_drawdown(
    returns: pd.Series,
) -> float:
    """
    Maximum drawdown (peak-to-trough) of cumulative returns.
    """
    r = returns.dropna()
    if r.empty:
        return 0.0
    cum = (1 + r).cumprod()
    peak = cum.cummax()
    drawdown = (cum / peak) - 1
    return float(drawdown.min())

# Alias for backward compatibility
_ann_vol = compute_volatility

# === Aggregator with Centralized Error Handling ===

def _stats(
    returns: pd.Series,
    cfg: RiskStatsConfig,
    **metric_kwargs
) -> namedtuple:
    """
    Run each metric in cfg.metrics_to_run, returning a namedtuple of values.
    Uses cfg.periods_per_year for annualization.
    Centralized try/except ensures one failing metric doesn’t break the batch.
    """
    Stat = namedtuple("Stat", cfg.metrics_to_run)
    values: list[float] = []
    for name in cfg.metrics_to_run:
        fn = METRIC_REGISTRY.get(name)
        if fn is None:
            logging.error("Metric '%s' not registered", name)
            values.append(np.nan)
            continue
        try:
            params = {
                "risk_free": cfg.risk_free,
                "periods_per_year": cfg.periods_per_year,
                **metric_kwargs
            }
            valid = {k: v for k, v in params.items() if k in inspect.signature(fn).parameters}
            val = fn(returns, **valid)
        except ZeroDivisionError:
            logging.warning("%s: division by zero, setting NaN", name)
            val = np.nan
        except (ValueError, TypeError) as e:
            logging.warning("%s: invalid input (%s), setting NaN", name, e)
            val = np.nan
        except Exception as e:
            logging.error("%s: unexpected error (%s), setting NaN", name, e)
            val = np.nan
        values.append(val)
    return Stat(*values)

# ---------- main ------------------------------------------------
def run_analysis(
    df,
    selected,
    w_vec,
    w_dict,
    rf_col,
    in_start,
    in_end,
    out_start,
    out_end,
    target_vol,
    monthly_cost,
    indices_list
):
    """
    Vectorised run_analysis with correct weight re-normalisation
    after funds are dropped.
    Returns the same keys used by the UI and export functions.
    """
    df = _ensure_dt(df)

    # ---- date masks --------------------------------------------------
    in_s = pd.to_datetime(in_start)  + pd.offsets.MonthEnd(0)
    in_e = pd.to_datetime(in_end)    + pd.offsets.MonthEnd(0)
    out_s= pd.to_datetime(out_start) + pd.offsets.MonthEnd(0)
    out_e= pd.to_datetime(out_end)   + pd.offsets.MonthEnd(0)

    m_in  = df["Date"].between(in_s,  in_e)
    m_out = df["Date"].between(out_s, out_e)

    in_df,  out_df  = df.loc[m_in,  selected], df.loc[m_out, selected]
    in_rf,  out_rf  = df.loc[m_in,  rf_col],   df.loc[m_out, rf_col]

    # ---- drop funds with any NaNs in either window ------------------
    good = [f for f in selected
            if in_df[f].notna().all() and out_df[f].notna().all()]
    dropped = list(set(selected) - set(good))
    if dropped:
        logging.warning("Dropped funds: %s", dropped)

    selected = good
    # >>>> new guard: kick out any accidental index columns
    selected = [f for f in selected if f not in (indices_list or [])]
    # <<<<

    in_df, out_df = in_df[selected], out_df[selected]

    # rebuild weights
    if w_dict is None:                      # equal-weight path
        w_dict = {f: 1/len(selected) for f in selected}
    else:                                   # manual path → rescale
        pct   = {f: w_dict[f]*100 for f in selected}
        total = sum(pct.values())
        w_dict = {f: p/total for f, p in pct.items()}
    w_vec = np.array([w_dict[f] for f in selected])

    # ---- scaling ----------------------------------------------------
    vols = in_df.apply(compute_volatility)
    scale = np.where(vols > 0, target_vol / vols, 1.0)
    in_sc  = (in_df * scale) - monthly_cost
    out_sc = (out_df * scale) - monthly_cost
    in_sc.clip(lower=-1, inplace=True)
    out_sc.clip(lower=-1, inplace=True)

    # ---- stats ------------------------------------------------------
    rf_value = in_rf.mean() if hasattr(in_rf, "mean") else float(in_rf)

    # Create a RiskStatsConfig for in-sample stats
    stats_cfg = RiskStatsConfig(risk_free=rf_value)

    # Now compute stats for each scenario, always passing stats_cfg first
    in_stat = {
        f: _stats(in_sc[f], stats_cfg)
        for f in selected
    }
    out_rf_value = out_rf.mean() if hasattr(out_rf, "mean") else float(out_rf)

    # Re‐use the same config, updating only the risk_free field
    stats_cfg.risk_free = out_rf_value

    out_stat = {
        f: _stats(out_sc[f], stats_cfg)
        for f in selected
    }

    ew_vec = np.full(len(selected), 1/len(selected))

    in_ew_stats  = _stats(in_sc.dot(ew_vec),  stats_cfg)
    out_ew_stats = _stats(out_sc.dot(ew_vec), stats_cfg)
    in_user_stats  = _stats(in_sc.dot(w_vec),  stats_cfg)
    out_user_stats = _stats(out_sc.dot(w_vec), stats_cfg)

    results = {
        "selected_funds": selected,
        "indices_list":   indices_list or [],
        "fund_weights":   w_dict,
        "ew_weights":     {f: 1/len(selected) for f in selected},
        "in_sample_stats":  in_stat,
        "out_sample_stats": out_stat,
        "in_ew_stats":     in_ew_stats,
        "out_ew_stats":    out_ew_stats,
        "in_user_stats":   in_user_stats,
        "out_user_stats":  out_user_stats,
        "dropped":         dropped,
    }

    # ---- optional index stats ---------------------------------------
    if indices_list:
        idx_stats = {}
        for col in indices_list:
            idx_stats[col] = {
                "in_sample":  _stats(df.loc[m_in,  col], stats_cfg),
                "out_sample": _stats(df.loc[m_out, col], stats_cfg),
            }
        results["index_stats"] = idx_stats

    return results


## 5. Excel Export
Creates an Excel file with In-Sample, Out-of-Sample and Equal-weight and User-weight.

In [6]:
# ───────────────────────────────────────────────────────────────
#  5 · EXPORT  (NaN-safe, weight-format fix)
# ───────────────────────────────────────────────────────────────
# ───────── 5 · EXPORT  (final, bug-free) ───────────────────────
# ───────── 5 · EXPORT  (self-healing index section) ───────────
# ───────── 5 · EXPORT  (final safe version) ───────────────────

def export_to_excel(
    data: dict[str, pd.DataFrame],
    output_path: str,
    default_format: Optional[Callable] = None
) -> None:
    """
    Exports each DataFrame in `data` to its own sheet in `output_path`.
    Applies a registered formatter for each category (sheet name).
    If no formatter is found, applies `default_format` if provided.

    For the Summary sheet, data is written starting at row 5 to make room for custom headers.
    """
    startrows = {"summary": 5}
    with pd.ExcelWriter(output_path, engine="xlsxwriter") as writer:
        for category, df in data.items():
            startrow = startrows.get(category, 0)
            df.to_excel(writer, sheet_name=category, index=False, startrow=startrow, header=True)
            fn = FORMATTERS_EXCEL.get(category, default_format)
            if fn: fn(writer.sheets[category], writer.book)
    # Workbook is auto-saved and closed by the context manager


## 6. Run Parameters, Widgets & User Inputs
Here we define some IPython widgets for in-sample/out-of-sample dates, target volatility, monthly cost, etc. Also lets us use custom weights.

### Using This Notebook
1. Run all cells.
2. Call `demo_run()` in a new cell to see a quick example with dummy data.
3. To use your own data, load it into a DataFrame (make sure it has a 'Date' column and decimal returns in other columns), then call `run_analysis()` and `export_to_excel()`.
4. For interactive selection, do:
   ```python
   display(ui_inputs)
   ```
   Then wire the `apply_button` to a callback function that reads the widget values and runs `run_analysis()`.
5. For custom weights, call:
   ```python
   my_weights = get_custom_weights(selected_funds)
   ```
   Then pass `my_weights` into your logic.


In [7]:
# ===============================================================
#            STREAMLINED ANALYSIS UI  (phase-2 clean)
# ===============================================================

# ---------- session store ----------
session = {"df": None, "rf": None, "sel": None, "cweights": None}

# ---------- 1 · DATA LOAD ----------
src = widgets.ToggleButtons(
    options=[("Local", "local"), ("URL", "url")],
    description="Source:"
)

chooser = FileChooser()
url_box = widgets.Text(placeholder="https://…/file.csv", layout={"width":"70%"})
load_btn = widgets.Button(description="Load CSV", button_style="success")
load_out = widgets.Output()

def _toggle_src(c):
    chooser.layout.display = "block" if c["new"]=="local" else "none"
    url_box.layout.display  = "block" if c["new"]=="url"   else "none"
src.observe(_toggle_src, names="value"); _toggle_src({"new":src.value})

def _load(_):
    with load_out:
        clear_output()
        try:
            path = chooser.selected if src.value=="local" else url_box.value.strip()
            if not path: raise ValueError("choose file / URL")
            if src.value=="url" and not path.lower().endswith(".csv"):
                raise ValueError("URL must end with .csv")
            df = load_csv(path)
            if df is None:
                print(f'❌ Failed to load data from {path}')
                session["df"] = None
                return
            rf = identify_risk_free_fund(df)
            session.update(df=df, rf=rf, sel=None, cweights=None)
            print(f"✅ Loaded {len(df):,} rows × {df.shape[1]} cols | RF → {rf}")
        except Exception as e:
            print("❌", e); session["df"]=None
load_btn.on_click(_load)

# ---------- 2 · PARAMS ------------
index_cnt = widgets.BoundedIntText(0, min=0, max=10, description="# Indices:")
in_start,in_end  = widgets.Text("2005-07"), widgets.Text("2008-06")
out_start,out_end= widgets.Text("2008-07"), widgets.Text("2009-06")
for w,lbl in [(in_start,"In Start:"),(in_end,"In End:"),
              (out_start,"Out Start:"),(out_end,"Out End:")]:
    w.description = lbl
target_vol   = widgets.FloatText(0.25,  description="Target Vol:")
monthly_cost = widgets.FloatText(0.0033, description="Monthly Cost:")

# ---------- 3 · SELECTION ----------
mode_dd = widgets.Dropdown(
    options=[("All", "all"), ("Random", "random"), ("Manual", "manual")],
    value="all",
    description="Mode:"
)
rand_n   = widgets.BoundedIntText(5, min=2, max=100, description="Sample N:")
fund_table, total_lbl = widgets.VBox([]), widgets.Label("Total = 0 %")

def _toggle_sel(_=None):
    rand_n.layout.display  = "block" if mode_dd.value=="random" else "none"
    vis = "block" if mode_dd.value=="manual" else "none"
    fund_table.layout.display = total_lbl.layout.display = vis
mode_dd.observe(_toggle_sel, names="value"); _toggle_sel()

# ---------- helpers ---------------
def _eligible_pool():
    df, rf = session["df"], session["rf"]
    if df is None:
        print("⚠️ data not loaded"); return []

    # ---- date parse guard -----------------------------------
    try:
        in_s  = pd.to_datetime(in_start.value)+pd.offsets.MonthEnd(0)
        in_e  = pd.to_datetime(in_end.value)  +pd.offsets.MonthEnd(0)
        out_s = pd.to_datetime(out_start.value)+pd.offsets.MonthEnd(0)
        out_e = pd.to_datetime(out_end.value)  +pd.offsets.MonthEnd(0)
    except Exception:
        print("❌ invalid dates"); return []

    # ---- build indices (RIGHT-most idx_n non-RF columns) ----
    idx_n     = index_cnt.value
    data_cols = [c for c in df.columns if c not in ["Date", rf, "Month"]]
    non_rf    = [c for c in data_cols if c != rf]
    indices   = non_rf[-idx_n:] if idx_n else []          # <- fixed
    cand      = [c for c in data_cols if c not in indices]

    # ---- run select_funds ----------------------------------
    elig = select_funds(
        df=df,
        rf_col=rf,
        fund_columns=cand,
        in_sdate=in_s,
        in_edate=in_e,
        out_sdate=out_s,
        out_edate=out_e,
        cfg=cfg,                   
        selection_mode="all",
    )
    # … diagnostics print unchanged …
    return elig

def _build_manual(*_):
    if mode_dd.value!="manual" or session["df"] is None: return
    valid = _eligible_pool()
    print("DEBUG  eligible funds =", len(valid))              # ← line 1
    print("DEBUG  list sample   →", valid[:25], "…")           # ← line 2
    if not valid:
        print("❌ No eligible funds"); return
    fund_table.children = []                # reset

    def _update_total(*_):
        tot = sum(r.children[1].value for r in fund_table.children
                  if r.children[0].value)
        total_lbl.value = f"Total = {tot} %"

    for f in valid:
        cb = widgets.Checkbox(description=f, layout={"width":"200px"})
        wt = widgets.BoundedIntText(0, min=0, max=100,
                                    layout={"width":"60px"}, disabled=True)
        def _toggle(ch, box=wt):           # single observer
            box.disabled = not ch["new"]
            if box.disabled: box.value = 0
            _update_total()
        cb.observe(_toggle, names="value")
        wt.observe(_update_total, names="value")
        fund_table.children += (widgets.HBox([cb, wt]),)
    _update_total()

mode_dd.observe(lambda ch: _build_manual() if ch["new"]=="manual" else None,
                names="value")
for w in (in_start,in_end,out_start,out_end): w.observe(_build_manual,names="value")

# ---------- 4 · RUN ---------------
run_btn = widgets.Button(description="Run Analysis", button_style="success")
run_out = widgets.Output(layout={"border":"1px solid #999",
                                 "height":"340px","overflow_y":"auto"})

def _run(_):
    with run_out:
        clear_output()
        df, rf = session["df"], session["rf"]
        if df is None: print("⚠️ Load data first"); return

        # indices (robust)
        idx_n     = index_cnt.value
        data_cols = [c for c in df.columns if c not in ["Date", rf, "Month"]]
        non_rf    = [c for c in data_cols if c != rf]
        indices   = non_rf[-idx_n:] if idx_n else []

        # pool + selection
        pool = _eligible_pool()
        if not pool: print("❌ No eligible funds"); return
        if mode_dd.value=="all":
            sel, custom = pool, None
        elif mode_dd.value=="random":
            if rand_n.value>len(pool): print("⚠️ Sample N too big"); return
            sel, custom = list(np.random.choice(pool, rand_n.value, replace=False)), None
        else:
            sel, custom = [], {}
            if not fund_table.children: _build_manual()
            for row in fund_table.children:
                cb, wt = row.children
                if cb.value: sel.append(cb.description); custom[cb.description]=wt.value
            if sum(custom.values())!=100: print("⚠️ Weights ≠ 100"); return

        w_dict,w_vec = prepare_weights(sel, custom)

        res = run_analysis(df, sel, w_vec, w_dict, rf,
                           in_start.value, in_end.value,
                           out_start.value, out_end.value,
                           target_vol.value, monthly_cost.value,
                           indices)

        print("✅ analysis complete |", len(sel), "funds")
        if res["dropped"]:
            print("⚠️ Dropped:", res["dropped"])
        if indices: print("📊 Indices:", indices)

        fname=f"IS_{in_start.value}_{out_start.value}.xlsx"
        # register only the combined summary formatter
        make_summary_formatter(
            res,
            in_start.value,
            in_end.value,
            out_start.value,
            out_end.value
        )

        # Build a minimal data dict with just the 'summary' sheet.
        # The formatter will populate all rows (portfolio, funds, spacer, indices).
        data = {
            "summary": pd.DataFrame()
        }

        print("Sheets to write:", list(data.keys()))
        print("Formatters:", list(FORMATTERS_EXCEL.keys()))

        # Export — this will call fmt_summary(ws, wb) on the 'summary' sheet.
        export_to_excel(data, fname)
        print("Workbook saved as", fname)

run_btn.on_click(_run)

# ---------- DISPLAY --------------
display(widgets.VBox([
    widgets.HTML("<h4>1. Load data</h4>"),
    src, chooser, url_box, load_btn, load_out,
    widgets.HTML("<hr><h4>2. Parameters</h4>"),
    widgets.HBox([index_cnt]),
    widgets.HBox([in_start,in_end,out_start,out_end]),
    widgets.HBox([target_vol,monthly_cost]),
    widgets.HTML("<hr><h4>3. Fund selection</h4>"),
    widgets.HBox([mode_dd,rand_n]),
    fund_table, total_lbl,
    widgets.HTML("<hr>"),
    run_btn,
    run_out
]))


VBox(children=(HTML(value='<h4>1. Load data</h4>'), ToggleButtons(description='Source:', options=(('Local', 'l…

In [8]:
# 1. Run the cell that defines load_csv and call it
dt = load_csv("/Users/teacher/Library/CloudStorage/Dropbox/Learning/Code/Trend Modeling Project/hedge_fund_returns_with_indexes.csv")

# 2. Capture all original fields (Date + fund columns)
col_list = dt.columns.tolist()
print(col_list)

# 3.  (Optional)  Save to CSV for your spec
import pandas as pd
pd.DataFrame({"ColumnName": col_list}).to_csv("variable_spec.csv", index=False)


['Date', 'Risk-Free Rate', 'Quantum Capital', 'Crescent Strategies', 'Echo Advisors', 'Quantum Group', 'Quantum LP', 'Meridian Capital', 'Adaptive Partners', 'Echo Strategies', 'Vista LP', 'Adaptive Group', 'Echo Group', 'Meridian Strategies', 'Axiom LP', 'Crescent Capital', 'Sentinel Management', 'Ascent Holdings', 'Sentinel Strategies', 'Ascent Partners', 'Quantum Management', 'Quantum Investments', 'Ascent LP', 'Axiom Group', 'Crescent LP', 'Echo Partners', 'Adaptive Strategies', 'Crescent Partners', 'Meridian Holdings', 'Axiom Investments', 'Sentinel Investments', 'Ascent Strategies', 'Forge Advisors', 'Vista Strategies', 'Sentinel Global', 'Ascent Capital', 'Echo Capital', 'Ascent Management', 'Echo LP', 'Axiom Management', 'Axiom Advisors', 'Crescent Advisors', 'Vista Holdings', 'Forge Management', 'Sentinel Group', 'Axiom Partners', 'Sentinel Advisors', 'Meridian LP', 'Crescent Group', 'Crescent Management', 'Adaptive Holdings', 'Vista Advisors', 'Vista Partners', 'Forge Strateg