In [1]:
# ===============================================================
#      VOL-ADJ TREND ANALYSIS  –  SINGLE-FILE VERSION
# ===============================================================

# ───────────────────────────────────────────────────────────────
#  0 · IMPORTS  (all in one place)
# ───────────────────────────────────────────────────────────────
import pandas as pd
import numpy as np
import xlsxwriter
import logging
from io import BytesIO
import ipywidgets as widgets
from IPython.display import display, clear_output
from ipyfilechooser import FileChooser
from typing import List, Dict, Optional

# ───────────────────────────────────────────────────────────────
#  1 · CSV LOADER + RF DETECTOR
# ───────────────────────────────────────────────────────────────
def load_csv(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    if "Date" not in df.columns:
        raise ValueError("CSV must contain a 'Date' column.")
    return df

def identify_risk_free_fund(df: pd.DataFrame) -> str:
    returns = df.drop(columns="Date", errors="ignore")
    stdevs  = returns.std(skipna=True, ddof=0)
    return stdevs.idxmin()

## 2. Select Funds

In [2]:
# ───────────────────────────────────────────────────────────────
#  2 · SELECT_FUNDS  (month-period logic)
# ───────────────────────────────────────────────────────────────
def select_funds(
        df: pd.DataFrame,
        rf_col: str,
        fund_columns: list[str],
        in_sdate, in_edate,
        out_sdate, out_edate,
        selection_mode: str = "all",
        random_n: int | None = None
) -> list[str]:

    if not pd.api.types.is_datetime64_any_dtype(df["Date"]):
        df = df.copy()
        df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
        df.dropna(subset=["Date"], inplace=True)

    df["__Month"] = df["Date"].dt.to_period("M")

    in_s,  in_e  = pd.Period(in_sdate,  "M"), pd.Period(in_edate,  "M")
    out_s, out_e = pd.Period(out_sdate, "M"), pd.Period(out_edate, "M")

    must_have = list(pd.period_range(in_s,  in_e,  "M")) + \
                list(pd.period_range(out_s, out_e, "M"))

    eligible = []
    for f in fund_columns:
        s = df[["__Month", f]]
        ok = s.groupby("__Month")[f].apply(lambda col: col.notna().any())

        if not all(ok.get(m, False) for m in must_have):
            continue

        full = ok.reindex(pd.period_range(ok.index.min(),
                                          ok.index.max(), "M"),
                          fill_value=False)
        gaps = full.astype(int).groupby((~full).cumsum()).cumsum().where(~full, 0)
        if gaps.max() > 3:
            continue

        eligible.append(f)

    if selection_mode == "all" or random_n is None:
        return eligible
    if selection_mode == "random":
        if random_n > len(eligible):
            raise ValueError("random_n exceeds eligible pool.")
        return list(np.random.choice(eligible, random_n, replace=False))
    raise ValueError(f"Unsupported mode {selection_mode}")


## 3. Weight Prep

In [3]:
# ───────────────────────────────────────────────────────────────
#  3 · WEIGHT PREP
# ───────────────────────────────────────────────────────────────
def prepare_weights(selected: list[str],
                    custom: Dict[str, int] | None) -> tuple[Dict[str, float], np.ndarray]:
    if not custom:
        w = {f: 1/len(selected) for f in selected}
    else:
        missing = [f for f in selected if f not in custom]
        if missing:
            raise ValueError(f"Missing weights for {missing}")
        w = {f: pct/100 for f, pct in custom.items()}
        if abs(sum(w.values()) - 1) > 1e-6:
            raise ValueError("Custom weights must sum to 100.")
    vec = np.array([w[f] for f in selected])
    return w, vec

## 4. Analysis (In-Sample & Out-of-Sample)
The `run_analysis` function orchestrates the entire process:
- Function definitions
- Validates date inputs.
- Converts 'Date' column.
- Identifies risk-free column.
- Fills short gaps.
- Selects funds.
- Computes in-sample scaling factors and applies them in- and out-of-sample.
- Computes individual fund stats and portfolio stats.

In [4]:
# ───────────────────────────────────────────────────────────────
#  4 · CORE STATS + RUN_ANALYSIS
# ───────────────────────────────────────────────────────────────
M_PER_YEAR = 12
def _ensure_dt(df):
    if pd.api.types.is_datetime64_any_dtype(df["Date"]):
        return df
    df = df.copy()
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
    df.dropna(subset=["Date"], inplace=True)
    return df

def _ann_ret(s): s=s.dropna(); return (1+s).prod()**(M_PER_YEAR/len(s))-1 if len(s) else np.nan
def _ann_vol(s): s=s.dropna(); return s.std(ddof=0)*np.sqrt(M_PER_YEAR) if len(s) else np.nan
def _sharpe(s,rf): ex=(s-rf).dropna(); v=_ann_vol(ex); return _ann_ret(ex)/v if v else np.nan
def _sortino(s,rf):
    ex=(s-rf).dropna(); neg=ex[ex<0]
    d=neg.std(ddof=0)*np.sqrt(M_PER_YEAR) if len(neg) else np.nan
    return _ann_ret(ex)/d if d else np.nan
def _mdd(s): nav=(1+s).dropna().cumprod(); return ((nav/nav.cummax())-1).min() if len(nav) else np.nan
def _stats(s,rf): return (_ann_ret(s),_ann_vol(s),_sharpe(s,rf),_sortino(s,rf),_mdd(s))

def run_analysis(df: pd.DataFrame,
                 selected: List[str],
                 w_vec: np.ndarray,
                 w_dict: Dict[str,float],
                 rf_col: str,
                 in_start,in_end,out_start,out_end,
                 target_vol=0.25, monthly_cost=0.0033,
                 indices_list: Optional[List[str]]=None):

    df=_ensure_dt(df)
    in_s=pd.to_datetime(in_start)+pd.offsets.MonthEnd(0)
    in_e=pd.to_datetime(in_end)  +pd.offsets.MonthEnd(0)
    out_s=pd.to_datetime(out_start)+pd.offsets.MonthEnd(0)
    out_e=pd.to_datetime(out_end) +pd.offsets.MonthEnd(0)

    m_in=df.Date.between(in_s,in_e);  m_out=df.Date.between(out_s,out_e)
    in_df,out_df=df.loc[m_in,selected],df.loc[m_out,selected]
    in_rf,out_rf=df.loc[m_in,rf_col], df.loc[m_out,rf_col]

    good=[f for f in selected if in_df[f].notna().all() and out_df[f].notna().all()]
    dropped=list(set(selected)-set(good))
    if dropped:
        logging.warning("Dropped funds: %s", dropped)
        selected=good; in_df,out_df=in_df[good],out_df[good]
        w_dict,w_vec=prepare_weights(selected,None if w_dict is None else
                                     {f:w_dict[f]*100 for f in selected})

    vol={f:_ann_vol(in_df[f]) for f in selected}
    scl={f:target_vol/v if v else 1.0 for f,v in vol.items()}
    in_scaled  = in_df.mul(scl).sub(monthly_cost).clip(lower=-1)
    out_scaled = out_df.mul(scl).sub(monthly_cost).clip(lower=-1)

    in_stats  ={f:_stats(in_scaled[f], in_rf)  for f in selected}
    out_stats ={f:_stats(out_scaled[f],out_rf) for f in selected}

    ew_vec=np.full(len(selected),1/len(selected))
    res={
        "selected_funds":selected,
        "indices_list":indices_list or [],
        "fund_weights":w_dict,
        "ew_weights":{f:1/len(selected) for f in selected},
        "in_sample_stats":in_stats,
        "out_sample_stats":out_stats,
        "in_ew_stats":_stats(in_scaled.dot(ew_vec),  in_rf),
        "out_ew_stats":_stats(out_scaled.dot(ew_vec), out_rf),
        "in_user_stats":_stats(in_scaled.dot(w_vec),  in_rf),
        "out_user_stats":_stats(out_scaled.dot(w_vec), out_rf),
        "dropped":dropped
    }
    return res


## 5. Excel Export
Creates an Excel file with In-Sample, Out-of-Sample and Equal-weight and User-weight.

In [5]:
# ───────────────────────────────────────────────────────────────
#  5 · EXPORT  (NaN-safe, weight-format fix)
# ───────────────────────────────────────────────────────────────
def export_to_excel(results, full_df, fname,
                    in_start, in_end, out_start, out_end):

    buf = BytesIO(); wb = xlsxwriter.Workbook(buf, {"in_memory":True})
    ws  = wb.add_worksheet("Summary")

    bold = wb.add_format({"bold":True})
    int0 = wb.add_format({"num_format":"0"})
    num2 = wb.add_format({"num_format":"0.00"})
    red  = wb.add_format({"num_format":"0.00","font_color":"red"})
    def safe(v): return "" if (pd.isna(v) or not np.isfinite(v)) else v
    def pct(t): r,v,s,so,m=t; return [r*100,v*100,s,so,m*100]

    ws.write_row(0,0,["Vol-Adj Trend Analysis"],bold)
    ws.write_row(1,0,[f"In:  {in_start} → {in_end}"])
    ws.write_row(2,0,[f"Out: {out_start} → {out_end}"])

    hdr=["Name","Weight %","R (IN)%","V (IN)%","Sharpe","Sortino","MDD (IN)%",
         "R (OUT)%","V (OUT)%","Sharpe","Sortino","MDD (OUT)%"]
    row=4; ws.write_row(row,0,hdr,bold); row+=1
    ew_w=results["ew_weights"]; uw=results["fund_weights"]

    def write_row(r,name,wt,tin,tout,b=False):
        ws.write(r,0,name,bold if b else None)
        if wt=="": ws.write(r,1,"")
        else:
            pct_val=wt*100 if wt<=1 else wt
            ws.write(r,1,pct_val,int0 if pct_val>=1 else num2)
        vals=pct(tin)+pct(tout)
        fmts=[num2,num2,num2,num2,red]*2
        for c,(v,fm) in enumerate(zip(vals,fmts),start=2):
            ws.write(r,c,safe(v),fm)

    write_row(row,"Equal-Weight",1,
              results["in_ew_stats"],results["out_ew_stats"],True); row+=1
    write_row(row,"User-Weight",1,
              results["in_user_stats"],results["out_user_stats"],True); row+=2

    ws.write(row,0,"Funds",bold); row+=1
    for f in results["selected_funds"]:
        write_row(row,f,uw.get(f,""),
                  results["in_sample_stats"][f],
                  results["out_sample_stats"][f])
        row+=1

    if results["indices_list"]:
        ws.write(row,0,"INDEX",bold); row+=1
        for idx,sd in results["index_stats"].items():
            write_row(row,idx,"",
                      sd["in_sample"],sd["out_sample"],True); row+=1

    wb.close(); open(fname,"wb").write(buf.getvalue())
    logging.info("Workbook saved → %s", fname)

## 6. Run Parameters,Widgets & User Inputs
Here we define some IPython widgets for in-sample/out-of-sample dates, target volatility, monthly cost, etc.

### Using This Notebook
1. Run all cells.
2. Call `demo_run()` in a new cell to see a quick example with dummy data.
3. To use your own data, load it into a DataFrame (make sure it has a 'Date' column and decimal returns in other columns), then call `run_analysis()` and `export_to_excel()`.
4. For interactive selection, do:
   ```python
   display(ui_inputs)
   ```
   Then wire the `apply_button` to a callback function that reads the widget values and runs `run_analysis()`.
5. For custom weights, call:
   ```python
   my_weights = get_custom_weights(selected_funds)
   ```
   Then pass `my_weights` into your logic.


In [6]:
# ===============================================================
#              ONE-STOP ANALYSIS UI  (eligibility-synced)
# ===============================================================

import pandas as pd, numpy as np, ipywidgets as widgets, logging
from IPython.display import display, clear_output
from ipyfilechooser import FileChooser

# ───────── session dict ─────────
session = {"df": None, "rf_col": None,
           "selected_funds": None, "custom_weights": None}

# ───────── 1 · DATA SOURCE ───────
src_toggle = widgets.ToggleButtons(
    options=[("Local file", "local"), ("GitHub raw URL", "url")],
    description="Source:"
)
file_chooser = FileChooser(title="Pick .csv")
url_box  = widgets.Text(description="Raw-URL:", placeholder="https://…/file.csv")
load_btn = widgets.Button(description="Load CSV", button_style="success")
load_out = widgets.Output()

def _show_src(ch):
    file_chooser.layout.display = "block" if ch["new"] == "local" else "none"
    url_box.layout.display      = "block" if ch["new"] == "url"   else "none"
src_toggle.observe(_show_src, names="value")
_show_src({"new": src_toggle.value})

def _load(_):
    with load_out:
        clear_output()
        try:
            path = file_chooser.selected if src_toggle.value=="local" else url_box.value.strip()
            if not path:  print("⚠️ choose file / URL"); return
            if src_toggle.value=="url" and not path.lower().endswith(".csv"):
                print("⚠️ URL must end with .csv"); return
            df = load_csv(path)                            # helper
            rf = identify_risk_free_fund(df)
            session.update(df=df, rf_col=rf,
                           selected_funds=None, custom_weights=None)
            print(f"✅ Loaded {len(df):,} rows × {df.shape[1]} cols | RF → {rf}")
        except Exception as e:
            session["df"] = None
            print("❌", e)
load_btn.on_click(_load)

# ───────── 2 · PARAMETERS ─────────
index_cnt   = widgets.BoundedIntText(0, min=0, max=10, description="# Indices:")
in_start    = widgets.Text("2005-07", description="In Start:")
in_end      = widgets.Text("2008-06", description="In End:")
out_start   = widgets.Text("2008-07", description="Out Start:")
out_end     = widgets.Text("2009-06", description="Out End:")
target_vol   = widgets.FloatText(0.25,  description="Target Vol:")
monthly_cost = widgets.FloatText(0.0033, description="Monthly Cost:")

# ───────── 3 · FUND SELECTION ─────
mode_dd = widgets.Dropdown(
    options=[("All funds", "all"),
             ("Random sample", "random"),
             ("Manual pick",   "manual")],
    value="all", description="Mode:"
)
rand_n      = widgets.BoundedIntText(5, min=2, max=100, description="Sample N:")
fund_table  = widgets.VBox([])
total_lbl   = widgets.Label("Total = 0 %")

def _toggle_vis(_=None):
    rand_n.layout.display         = "block" if mode_dd.value == "random" else "none"
    show                           = "block" if mode_dd.value == "manual" else "none"
    fund_table.layout.display      = show
    total_lbl.layout.display       = show
mode_dd.observe(_toggle_vis, names="value")
_toggle_vis()

def _eligible_pool() -> list[str]:
    df, rf = session["df"], session["rf_col"]
    if df is None: return []
    cand = [c for c in df.columns if c not in ["Date", rf]]
    return select_funds(
        df, rf, cand,
        in_start.value + "-01", in_end.value + "-01",
        out_start.value + "-01", out_end.value + "-01",
        "all"
    )

def _build_manual(*_):
    if mode_dd.value != "manual" or session["df"] is None: return
    valid = _eligible_pool()
    rows, cbxs, wbx = [], [], []
    def _update_total(*_):
        total_lbl.value = f"Total = {sum(w.value for c,w in zip(cbxs,wbx) if c.value)} %"
    fund_table.children = []
    for f in valid:
        cb = widgets.Checkbox(description=f, layout=widgets.Layout(width="200px"))
        wt = widgets.BoundedIntText(0, min=0, max=100,
                                    layout=widgets.Layout(width="60px"), disabled=True)
        cb.observe(lambda ch, box=wt: (setattr(box,"disabled",not ch["new"]), _update_total()),
                   names="value")
        wt.observe(_update_total, names="value")
        cbxs.append(cb); wbx.append(wt); rows.append(widgets.HBox([cb, wt]))
    fund_table.children = rows
    _update_total()

# rebuild table when mode toggles to manual or dates change
mode_dd.observe(lambda ch: _build_manual() if ch["new"]=="manual" else None,
                names="value")
for w in (in_start, in_end, out_start, out_end):
    w.observe(_build_manual, names="value")

# ───────── 4 · RUN ANALYSIS ───────
run_btn = widgets.Button(description="Run Analysis", button_style="success")
run_out = widgets.Output(layout={"border":"1px solid #999",
                                 "height":"340px", "overflow_y":"auto"})

def _run(_):
    with run_out:
        clear_output()
        df, rf = session["df"], session["rf_col"]
        if df is None:
            print("⚠️ Load data first"); return

        # validate dates
        try:
            _ = [pd.to_datetime(s+"-01", format="%Y-%m-%d", errors="raise")
                 for s in (in_start.value, in_end.value, out_start.value, out_end.value)]
        except Exception:
            print("❌ dates must be YYYY-MM"); return

        # indices list
        idx_n = index_cnt.value
        indices_list = df.columns.drop("Date").to_list()[-idx_n:] if idx_n else []

        # eligible pool for the current window
        valid = _eligible_pool()
        if not valid:
            print("❌ No eligible funds for this window."); return

        # selection
        if mode_dd.value == "all":
            sel, custom_w = valid, None

        elif mode_dd.value == "random":
            if rand_n.value > len(valid):
                print(f"❌ Only {len(valid)} eligible funds; Sample N exceeds that."); return
            sel = list(np.random.choice(valid, rand_n.value, replace=False))
            custom_w = None

        else:
            sel, custom_w = [], {}
            if not fund_table.children: _build_manual()
            for row in fund_table.children:
                cb, wt = row.children
                if cb.value:
                    sel.append(cb.description)
                    custom_w[cb.description] = wt.value
            if not sel:
                print("⚠️ Tick at least one fund."); return
            if sum(custom_w.values()) != 100:
                print("⚠️ Weights must sum to 100 %."); return

        session.update(selected_funds=sel, custom_weights=custom_w)

        # weights
        try:
            w_dict, w_vec = prepare_weights(sel, custom_w)
        except ValueError as e:
            print("❌", e); return

        # run analysis
        res = run_analysis(
            df, sel, w_vec, w_dict, rf,
            in_start.value, in_end.value,
            out_start.value, out_end.value,
            target_vol.value, monthly_cost.value,
            indices_list
        )

        # export
        print("✅ analysis complete |", len(sel), "funds")
        fname = f"IS_{in_start.value}_{out_start.value}.xlsx"
        export_to_excel(res, df, fname,
                        in_start.value, in_end.value,
                        out_start.value, out_end.value)
        print("Workbook saved as", fname)

run_btn.on_click(_run)

# ───────── DISPLAY PANEL ─────────
display(widgets.VBox([
    widgets.HTML("<h4>1. Load data</h4>"),
    src_toggle, file_chooser, url_box, load_btn, load_out,
    widgets.HTML("<hr><h4>2. Parameters</h4>"),
    widgets.HBox([index_cnt]),
    widgets.HBox([in_start, in_end, out_start, out_end]),
    widgets.HBox([target_vol, monthly_cost]),
    widgets.HTML("<hr><h4>3. Fund selection</h4>"),
    widgets.HBox([mode_dd, rand_n]),
    fund_table, total_lbl,
    widgets.HTML("<hr>"),
    run_btn,
    run_out
]))


VBox(children=(HTML(value='<h4>1. Load data</h4>'), ToggleButtons(description='Source:', options=(('Local file…