In [1]:

import warnings, random
import pandas as pd

def fill_short_gaps_with_zero(series):
    """
    Given a pandas Series indexed by Date (month‐ends), wherever there is
    a run of 1 or 2 consecutive NaNs, replace them with 0.0. If a run of 3
    or more NaNs appears, leave those NaNs intact.
    """
    isnan = series.isna().astype(int)
    run_lengths = isnan.groupby((isnan == 0).cumsum()).transform('sum')
    filled = series.copy()
    mask_short = (isnan == 1) & (run_lengths <= 2)
    filled[mask_short] = 0.0
    return filled
    
import warnings
import random

def select_funds(
    df,                 # full DataFrame with datetime64 ‘Date’
    rf_col,             # name of risk‐free column
    fund_columns,       # list of candidate funds (should be your 27)
    in_sdate, in_edate, # pd.Timestamps for in‐sample window
    out_sdate, out_edate,# pd.Timestamps for out‐sample window
    selection_mode='all',
    random_n=5
):
    """
    1) Start from fund_columns.  
    2) Filter out any that contain 'index' in name.  
    3) Keep only those with no NaN anywhere in both windows.  
    4) Keep only those with < 3 consecutive NaNs in each window.  
    5) Return based on selection_mode.
    """
    # Step 1: base list
    candidates = fund_columns.copy()
    print(f"DEBUG [select_funds]: initial candidates (n={len(candidates)}): {candidates}")

    # Step 2: drop any with “index” in name (case‐insensitive)
    no_index = [f for f in candidates if 'index' not in f.lower()]
    print(f"DEBUG [select_funds]: after dropping 'index' (n={len(no_index)}): {no_index}")

    # Step 3: full‐history check (no NaN at all in each window)
    full_hist = []
    for f in no_index:
        in_sub  = df[(df['Date'] >= in_sdate)  & (df['Date'] <= in_edate)][f]
        out_sub = df[(df['Date'] >= out_sdate) & (df['Date'] <= out_edate)][f]
        if in_sub.notna().all() and out_sub.notna().all():
            full_hist.append(f)
    print(f"DEBUG [select_funds]: after full‐history check (n={len(full_hist)}): {full_hist}")

    # Step 4: no 3‐consecutive‐NaNs check
    after_run_check = []
    for f in full_hist:
        # In‐sample gap runs
        sub_in   = df[(df['Date'] >= in_sdate) & (df['Date'] <= in_edate)][f]
        isnan_in = sub_in.isna().astype(int)
        run_len_in = isnan_in.groupby((isnan_in == 0).cumsum()).sum()
        max_run_in = run_len_in.max() if not run_len_in.empty else 0

        # Out‐sample gap runs
        sub_out   = df[(df['Date'] >= out_sdate) & (df['Date'] <= out_edate)][f]
        isnan_out = sub_out.isna().astype(int)
        run_len_out = isnan_out.groupby((isnan_out == 0).cumsum()).sum()
        max_run_out = run_len_out.max() if not run_len_out.empty else 0

        # Keep only if both max runs < 3
        if max_run_in < 3 and max_run_out < 3:
            after_run_check.append(f)

    print(f"DEBUG [select_funds]: after run‐length check (n={len(after_run_check)}): {after_run_check}")

    # Step 5: selection_mode
    if selection_mode == 'all':
        return after_run_check

    if selection_mode == 'random':
        if len(after_run_check) <= random_n:
            warnings.warn(
                f"Fewer valid funds ({len(after_run_check)}) than sample size ({random_n}). Returning all."
            )
            return after_run_check
        return random.sample(after_run_check, random_n)

    # (Placeholder for manual widget selection, if you implement it later)
    return after_run_check

print("select_funds (replaced) is ready.")


select_funds (replaced) is ready.


In [None]:


import warnings, random
import pandas as pd

def fill_short_gaps_with_zero(series):
    """
    Given a pandas Series indexed by Date (month‐ends), wherever there is
    a run of 1 or 2 consecutive NaNs, replace them with 0.0. If a run of 3
    or more NaNs appears, leave those NaNs intact.
    """
    isnan = series.isna().astype(int)
    run_lengths = isnan.groupby((isnan == 0).cumsum()).transform('sum')
    filled = series.copy()
    mask_short = (isnan == 1) & (run_lengths <= 2)
    filled[mask_short] = 0.0
    return filled

def select_funds(
    df, rf_col, fund_columns,
    in_sdate, in_edate, out_sdate, out_edate,
    selection_mode, random_n
):
    """
    df             : DataFrame (with 'Date' as datetime64[ns])
    rf_col         : name of the risk-free column (string)
    fund_columns   : list of actual fund names (no Date, no rf_col, no indices)
    in_sdate       : pd.Timestamp for in-sample start (first day of month)
    in_edate       : pd.Timestamp for in-sample end (last day of month)
    out_sdate      : pd.Timestamp for out-sample start (first day of month)
    out_edate      : pd.Timestamp for out-sample end (last day of month)
    selection_mode : 'all' / 'random' / 'manual'
    random_n       : integer for random sampling if mode == 'random'
    """

    # 1) Initial fund candidates
    all_fund_cols = fund_columns.copy()
    print(f"DEBUG [select_funds]: initial candidates (n={len(all_fund_cols)}): {all_fund_cols}")

    # 2) Extract in‐sample & out‐sample DataFrames (just the Date + fund columns)
    in_df  = df[(df['Date'] >= in_sdate)  & (df['Date'] <= in_edate)].copy()
    out_df = df[(df['Date'] >= out_sdate) & (df['Date'] <= out_edate)].copy()

    # 3) For each fund, fill short gaps ≤2 months in both windows, then check for any remaining run ≥ 3
    funds_after_run_check = []
    for f in all_fund_cols:
        # 3a) Pull the in‐sample return series for this fund, indexed by Date
        ser_in = in_df.set_index('Date')[f]

        # 3b) Fill any 1–2 consecutive NaNs → 0
        filled_in = fill_short_gaps_with_zero(ser_in)

        # 3c) Check longest run of NaNs left in in‐sample
        mask_in = filled_in.isna().astype(int)
        run_len_in = (
            mask_in.groupby((mask_in == 0).cumsum())
                   .sum()
        )
        max_run_in = run_len_in.max() if not run_len_in.empty else 0

        # 3d) Do the same for out‐sample
        ser_out = out_df.set_index('Date')[f]
        filled_out = fill_short_gaps_with_zero(ser_out)
        mask_out = filled_out.isna().astype(int)
        run_len_out = (
            mask_out.groupby((mask_out == 0).cumsum())
                    .sum()
        )
        max_run_out = run_len_out.max() if not run_len_out.empty else 0

        # 3e) Print debug so you see if any fund truly has a run ≥3 inside the window
        print(f"DEBUG [select_funds]: '{f}' max consecutive NaNs in in-sample = {max_run_in}")
        print(f"DEBUG [select_funds]: '{f}' max consecutive NaNs in out-sample = {max_run_out}")

        # 3f) Only keep the fund if BOTH windows have max_run < 3
        if (max_run_in < 3) and (max_run_out < 3):
            funds_after_run_check.append(f)

    print(f"DEBUG [select_funds]: after run‐length check (n={len(funds_after_run_check)}): {funds_after_run_check}")

    # 4) Apply selection_mode
    if selection_mode == 'all':
        return funds_after_run_check

    if selection_mode == 'random':
        if len(funds_after_run_check) <= random_n:
            warnings.warn(
                f"Fewer valid funds ({len(funds_after_run_check)}) than sample size ({random_n}). Returning all."
            )
            return funds_after_run_check
        return random.sample(funds_after_run_check, random_n)

    # 5) Manual selection (widget) would be implemented here if desired.
    return funds_after_run_check

print("select_funds (gap‐filled, windowed) is defined.")


In [2]:
import numpy as np
import pandas as pd
import logging

def run_analysis(
    df, in_start, in_end, out_start, out_end,
    target_vol, monthly_cost,
    selection_mode='all', random_n=5
):
    """
    1) Parse/validate date inputs
    2) Convert Date column if needed
    3) Identify rf_col
    4) Prepare in/out sample DataFrames
    5) CALL select_funds (with debug) and print checkpoints
    6) Compute scale_factors and wrap stats in try/except
    """

    # (1) Parse input dates
    in_sdate  = pd.to_datetime(in_start + "-01", errors='coerce')
    in_edate  = pd.to_datetime(in_end   + "-01", errors='coerce') + pd.offsets.MonthEnd(0)
    out_sdate = pd.to_datetime(out_start + "-01", errors='coerce')
    out_edate = pd.to_datetime(out_end   + "-01", errors='coerce') + pd.offsets.MonthEnd(0)

    # checkpoint A
    print("CHECKPOINT A: Dates parsed:", in_sdate, in_edate, out_sdate, out_edate)

    # (2) Ensure Date is datetime64
    if not np.issubdtype(df['Date'].dtype, np.datetime64):
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df.dropna(subset=['Date'], inplace=True)
        df.sort_values(by='Date', inplace=True)
        df.reset_index(drop=True, inplace=True)

    # checkpoint B
    print("CHECKPOINT B: Date column is datetime64, first dates:", df['Date'].iloc[0], df['Date'].iloc[-1])

    # (3) Identify risk-free column
    rf_col = identify_risk_free_fund(df)
    print(f"INFO: Identified '{rf_col}' as the risk-free column (lowest stdev).")

    # (4) Build in-sample & out-sample slices
    in_sample_df  = df[(df['Date'] >= in_sdate)  & (df['Date'] <= in_edate)].copy()
    out_sample_df = df[(df['Date'] >= out_sdate) & (df['Date'] <= out_edate)].copy()
    in_sample_rf  = in_sample_df[rf_col]
    out_sample_rf = out_sample_df[rf_col]

    print(f"CHECKPOINT C: in_sample rows = {len(in_sample_df)}, out_sample rows = {len(out_sample_df)}")

    # (5) Assemble fund_cols and call select_funds
    all_fund_cols = fund_cols.copy()
    print(f"CHECKPOINT D: about to call select_funds with {len(all_fund_cols)} candidates")

    selected_funds = select_funds(
        df,
        rf_col,
        fund_columns=all_fund_cols,
        in_sdate=in_sdate,
        in_edate=in_edate,
        out_sdate=out_sdate,
        out_edate=out_edate,
        selection_mode=selection_mode,
        random_n=random_n
    )

    print(f"CHECKPOINT E: select_funds returned {len(selected_funds)} funds → {selected_funds}")

    if not selected_funds:
        print("No valid funds after select_funds. Exiting run_analysis.")
        return None

    # (6) Compute scale_factors BEFORE entering stats‐try/except
    scale_factors = {}
    for fund in selected_funds:
        fund_in_rets = in_sample_df[fund].dropna()
        current_vol  = annualize_volatility(fund_in_rets)
        if pd.isna(current_vol) or current_vol == 0:
            scale_factors[fund] = 1.0
        else:
            scale_factors[fund] = target_vol / current_vol

    print("CHECKPOINT F: scale_factors computed (showing first 5):", 
          {f: scale_factors[f] for f in selected_funds[:5]})

    # Pre-allocate DataFrames for scaled returns
    in_sample_scaled  = pd.DataFrame(index=in_sample_df.index, columns=selected_funds)
    out_sample_scaled = pd.DataFrame(index=out_sample_df.index, columns=selected_funds)

    # ────── TRY/EXCEPT AROUND ONLY THE “STATS” PORTION ──────
    try:
        # Scale returns (with cost)
        for fund in selected_funds:
            sf = scale_factors[fund]
            adj_in  = in_sample_df[fund] * sf - monthly_cost
            adj_in[adj_in < -1.0] = -1.0
            in_sample_scaled[fund] = adj_in

            if not out_sample_df.empty:
                adj_out = out_sample_df[fund] * sf - monthly_cost
                adj_out[adj_out < -1.0] = -1.0
                out_sample_scaled[fund] = adj_out

        # Helper for stats
        def compute_stats(series, rf_series):
            r   = annualize_return(series)
            v   = annualize_volatility(series)
            sr  = sharpe_ratio(series, rf_series)
            so  = sortino_ratio(series, rf_series)
            mdd = max_drawdown(series)
            return (r, v, sr, so, mdd)

        # In-sample per-fund stats
        in_sample_stats = {}
        for fund in selected_funds:
            in_sample_stats[fund] = compute_stats(in_sample_scaled[fund], in_sample_rf)

        # Out-sample per-fund (adjusted) stats
        out_sample_stats = {}
        for fund in selected_funds:
            out_sample_stats[fund] = compute_stats(out_sample_scaled[fund], out_sample_rf)

        # Out-sample per-fund (raw) stats
        out_sample_stats_raw = {}
        for fund in selected_funds:
            out_sample_stats_raw[fund] = compute_stats(out_sample_df[fund], out_sample_rf)

        # Equal-weight portfolio
        ew_w = np.array([1.0/len(selected_funds)] * len(selected_funds))
        in_ew_port      = calc_portfolio_returns(ew_w, in_sample_scaled[selected_funds])
        out_ew_port     = calc_portfolio_returns(ew_w, out_sample_scaled[selected_funds])
        out_ew_port_raw = calc_portfolio_returns(ew_w, out_sample_df[selected_funds])

        in_ew_stats      = compute_stats(in_ew_port, in_sample_rf)
        out_ew_stats     = compute_stats(out_ew_port, out_sample_rf)
        out_ew_stats_raw = compute_stats(out_ew_port_raw, out_sample_rf)

        # User‐weight placeholder
        user_weight_dict = {f: 1.0/len(selected_funds) for f in selected_funds}
        custom_w = np.array([user_weight_dict[f] for f in selected_funds])

        in_user_port      = calc_portfolio_returns(custom_w, in_sample_scaled[selected_funds])
        out_user_port     = calc_portfolio_returns(custom_w, out_sample_scaled[selected_funds])
        out_user_port_raw = calc_portfolio_returns(custom_w, out_sample_df[selected_funds])

        in_user_stats      = compute_stats(in_user_port, in_sample_rf)
        out_user_stats     = compute_stats(out_user_port, out_sample_rf)
        out_user_stats_raw = compute_stats(out_user_port_raw, out_sample_rf)

        results = {
            'selected_funds':       selected_funds,
            'in_sample_scaled':     in_sample_scaled,
            'out_sample_scaled':    out_sample_scaled,
            'in_sample_stats':      in_sample_stats,
            'out_sample_stats':     out_sample_stats,
            'out_sample_stats_raw': out_sample_stats_raw,
            'in_ew_stats':          in_ew_stats,
            'out_ew_stats':         out_ew_stats,
            'out_ew_stats_raw':     out_ew_stats_raw,
            'in_user_stats':        in_user_stats,
            'out_user_stats':       out_user_stats,
            'out_user_stats_raw':   out_user_stats_raw
        }

    except Exception as e:
        print("ERROR inside stats-block:", e)
        return None

    # Everything succeeded
    return results

print("run_analysis (with checkpoints) is defined.")


run_analysis (with checkpoints) is defined.


In [3]:
from IPython.display import clear_output
from IPython.utils.capture import capture_output

def on_apply_clicked(b):
    with output_area:
        clear_output()
        # 1) Read widget values
        in_start_val     = in_sample_start.value.strip()
        in_end_val       = in_sample_end.value.strip()
        out_start_val    = out_sample_start.value.strip()
        out_end_val      = out_sample_end.value.strip()
        target_vol_val   = target_vol_widget.value
        monthly_cost_val = monthly_cost_widget.value
        mode_val         = selection_mode_widget.value
        rnd_n_val        = random_sample_size_widget.value

        # 2) Print parameter summary
        print("Running analysis with parameters:")
        print(f"  In-Sample:  {in_start_val} → {in_end_val}")
        print(f"  Out-Sample: {out_start_val} → {out_end_val}")
        print(f"  Target Volatility: {target_vol_val}")
        print(f"  Monthly Cost: {monthly_cost_val}")
        print(f"  Selection Mode: {mode_val}")
        if mode_val == 'random':
            print(f"  Random Sample Size: {rnd_n_val}")

        # 3) Capture everything printed by run_analysis (and select_funds)
        with capture_output() as cap:
            try:
                results = run_analysis(
                    df,
                    in_start=in_start_val,
                    in_end=in_end_val,
                    out_start=out_start_val,
                    out_end=out_end_val,
                    target_vol=target_vol_val,
                    monthly_cost=monthly_cost_val,
                    selection_mode=mode_val,
                    random_n=rnd_n_val
                )
            except Exception as e:
                print("Error inside run_analysis():", e)
                return

        # 4) Print the captured debug/info text
        print(cap.stdout)

        # 5) If no funds survived, warn
        if results is None or ('selected_funds' in results and not results['selected_funds']):
            print("No valid funds remain after filtering. Check your date range or data.")
            return

        # 6) Summary
        print("Analysis complete. Summary:")
        sf = results['selected_funds']
        if 'selected_funds' in results:
            print(f"  Funds selected: {len(results['selected_funds'])}")
        if 'in_ew_stats' in results:
            ir, iv, isr, _, _ = results['in_ew_stats']
            print(f"  In-Sample EW → Return: {ir*100:.2f}%, Vol: {iv*100:.2f}%, Sharpe: {isr:.2f}")
        if 'out_ew_stats' in results:
            or_, ov, osr, _, _ = results['out_ew_stats']
            print(f"  Out-Sample EW → Return: {or_*100:.2f}%, Vol: {ov*100:.2f}%, Sharpe: {osr:.2f}")
        print(f"  Funds selected: {len(sf)} → {sf}")
        
        export_to_excel(results, "InteractiveOutput.xlsx")



# Wire the button once (after all definitions are loaded)

print("Run Analysis callback is now wired.")

Run Analysis callback is now wired.


In [None]:
    for fund in selected_funds:
        sf = scale_factors[fund]
        # In-sample
        adj_in = in_sample_df[fund] * sf - monthly_cost
        adj_in[adj_in < -1.0] = -1.0
        in_sample_scaled[fund] = adj_in
        
        # Out-of-sample
        if out_sample_df.shape[0] > 0:
            adj_out = out_sample_df[fund] * sf - monthly_cost
            adj_out[adj_out < -1.0] = -1.0
            out_sample_scaled[fund] = adj_out
    
    # Helper function for stats
    def compute_stats(series, rf_series):
        r = annualize_return(series)
        v = annualize_volatility(series)
        sr = sharpe_ratio(series, rf_series)
        so = sortino_ratio(series, rf_series)
        mdd = max_drawdown(series)
        return (r, v, sr, so, mdd)
    
    in_sample_stats = {}
    for fund in selected_funds:
        in_sample_stats[fund] = compute_stats(in_sample_scaled[fund], in_sample_rf)
    
    out_sample_stats = {}
    for fund in selected_funds:
        out_sample_stats[fund] = compute_stats(out_sample_scaled[fund], out_sample_rf)
    
    out_sample_stats_raw = {}
    for fund in selected_funds:
        out_sample_stats_raw[fund] = compute_stats(out_sample_df[fund], out_sample_rf)
    
    # Portfolio (equal-weight)
    ew_w = np.array([1.0/len(selected_funds)]*len(selected_funds))
    in_ew_port = calc_portfolio_returns(ew_w, in_sample_scaled[selected_funds])
    out_ew_port = calc_portfolio_returns(ew_w, out_sample_scaled[selected_funds])
    out_ew_port_raw = calc_portfolio_returns(ew_w, out_sample_df[selected_funds])
    
    in_ew_stats = compute_stats(in_ew_port, in_sample_rf)
    out_ew_stats = compute_stats(out_ew_port, out_sample_rf)
    out_ew_stats_raw = compute_stats(out_ew_port_raw, out_sample_rf)
    
    # Portfolio (user-weighted) - placeholder
    user_weight_dict = {f: 1.0/len(selected_funds) for f in selected_funds}
    custom_w = np.array([user_weight_dict[f] for f in selected_funds])
    in_user_port = calc_portfolio_returns(custom_w, in_sample_scaled[selected_funds])
    out_user_port = calc_portfolio_returns(custom_w, out_sample_scaled[selected_funds])
    out_user_port_raw = calc_portfolio_returns(custom_w, out_sample_df[selected_funds])
    
    in_user_stats = compute_stats(in_user_port, in_sample_rf)
    out_user_stats = compute_stats(out_user_port, out_sample_rf)
    out_user_stats_raw = compute_stats(out_user_port_raw, out_sample_rf)
    
    results = {
        'selected_funds': selected_funds,
        'in_sample_scaled': in_sample_scaled,
        'out_sample_scaled': out_sample_scaled,
        'in_sample_stats': in_sample_stats,
        'out_sample_stats': out_sample_stats,
        'out_sample_stats_raw': out_sample_stats_raw,
        'in_ew_stats': in_ew_stats,
        'out_ew_stats': out_ew_stats,
        'out_ew_stats_raw': out_ew_stats_raw,
        'in_user_stats': in_user_stats,
        'out_user_stats': out_user_stats,
        'out_user_stats_raw': out_user_stats_raw

# Volatility Scaling & Portfolio Analysis

This notebook demonstrates how to:
1. Load and validate data.
2. Handle missing data (short vs. long gaps).
3. Adjust returns to a target volatility in-sample, then apply the same scaling out-of-sample.
4. Compute Sharpe, Sortino, Max Drawdown.
5. Provide multiple fund selection modes (all, random sample, manual).
6. Calculate portfolio results (equal-weight and custom-weight).
7. Output in-sample and out-of-sample results to Excel with formatting.

**Note**: The manual fund selection and custom weights features are partially implemented. In a real interactive workflow, you would wire widget selections and weights into the final analysis.

In [4]:
# ============ 1. SETUP CELL ============

import logging
import sys
import numpy as np
import pandas as pd
import os
import math
import ipywidgets as widgets
from ipywidgets import interact, interactive, VBox, HBox
from IPython.display import display, clear_output
from ipyfilechooser import FileChooser
import datetime
import random
import warnings

# If you need to install these packages on your environment, uncomment:
!{sys.executable} -m pip install --quiet ipywidgets openpyxl xlsxwriter

# For exporting to Excel with styling
import xlsxwriter

# Set up logging to console
logging.basicConfig(
    stream=sys.stdout,
    level=logging.INFO,
    format="%(levelname)s: %(message)s"
)

logging.info("Logging started. Volatility Scaling & Portfolio Analysis Notebook initialized.")

# (Optional) If widgets aren't enabled, run:
# !jupyter nbextension enable --py widgetsnbextension --sys-prefix

print("Setup complete.")

INFO: Logging started. Volatility Scaling & Portfolio Analysis Notebook initialized.
Setup complete.


## 2. Data Loading
Here we create options to load a dataset from a local file or a GitHub repository.

In [5]:
def identify_risk_free_fund(df):
    """
    Identify which column (after 'Date') is the risk-free rate by smallest stdev among columns.
    """
    numeric_cols = df.columns[1:]  # skip the Date column
    stdevs = {}
    for col in numeric_cols:
        vals = df[col].dropna()
        if len(vals) > 0:
            stdevs[col] = vals.std()
        else:
            stdevs[col] = np.inf

    rf_col = min(stdevs, key=stdevs.get)
    logging.info(f"Identified '{rf_col}' as the risk-free column (lowest stdev).")
    return rf_col


# ------------------------------------------------------------------------------
# 1) Helper to read a local CSV robustly (handles BOMs and minor parsing issues)
# ------------------------------------------------------------------------------
def robust_read_csv(path):
    """
    Try loading `path` as CSV in three ways:
    1. Default C engine
    2. BOM-stripped with the Python engine
    3. Skip bad lines with the Python engine
    """
    try:
        return pd.read_csv(path)
    except Exception as e1:
        print("Default read_csv failed:", e1)

    try:
        return pd.read_csv(path, sep=",", encoding="utf-8-sig", engine="python")
    except Exception as e2:
        print("utf-8-sig + python engine failed:", e2)

    return pd.read_csv(
        path,
        sep=",",
        engine="python",
        encoding="utf-8-sig",
        skip_blank_lines=True,
        on_bad_lines="skip",    # for pandas ≥ 1.3
    )

# ------------------------------------------------------------------------------
# 2) Build the widgets
# ------------------------------------------------------------------------------
source_info = widgets.HTML(
    "<b>Step 1: Choose your CSV</b><br>"
    "<i>Remember:</i> If you included any index columns (e.g. S&P 500, MSCI World, SG Trend), they "
    "must appear to the right of all fund columns in your sheet."
)


source_dropdown = widgets.Dropdown(
    options=['Local', 'GitHub'],
    value='Local',
    description='Data Source:',
    style={'description_width': '120px'}
)

# FileChooser for Local mode
fc = FileChooser(os.getcwd())
fc.title = "<b>Select local CSV file</b>"

# Text box for GitHub raw URL
github_text = widgets.Text(
    value=(
        "https://raw.githubusercontent.com/stranske/Trend_Model_Project/"
        "main/data/TrendData.csv"
    ),
    description="GitHub URL:",
    layout=widgets.Layout(width="80%"),
)

# Ask how many index columns are on the far right
n_indices_widget = widgets.BoundedIntText(
    value=0,
    min=0,
    max=10,  # adjust if you expect more than 10 indices
    description='# Index cols:',
    style={'description_width': '120px'},
    tooltip="Enter the number of index columns at the far right of your CSV"
)

# Load button
load_button = widgets.Button(description="Load Data", button_style="success")

# Output area for status and debug prints
output_area = widgets.Output()

# ------------------------------------------------------------------------------
# 3) Show/hide widgets depending on source selection
# ------------------------------------------------------------------------------
def on_source_change(change):
    if change["new"] == "Local":
        fc.layout.display = "block"
        github_text.layout.display = "none"
    else:
        fc.layout.display = "none"
        github_text.layout.display = "block"

# Initially, GitHub textbox is hidden; FileChooser is visible
github_text.layout.display = "none"
fc.layout.display = "block"

source_dropdown.observe(on_source_change, names="value")

ui_load = widgets.VBox([
    source_info,
    source_dropdown,
    fc,
    github_text,
    n_indices_widget,   # ← new widget here
    load_button,
    output_area
])

# ------------------------------------------------------------------------------
# 4) Callback for the Load button (auto-detect date format)
# ------------------------------------------------------------------------------
def on_load_clicked(_):
    global df, fund_cols, indices_list, rf_col            # declare df as global
    with output_area:
        output_area.clear_output()
        src_choice = source_dropdown.value
        n_indices = int(n_indices_widget.value)

        # 4a) Load the DataFrame
        try:
            if src_choice == 'Local':
                local_path = fc.selected  # FileChooser’s selected path
                if not local_path or not os.path.exists(local_path):
                    print(f"Error: Local file not found:\n  {local_path}")
                    return
                print(f"Loading from local file:\n  {local_path}")
                df = pd.read_csv(local_path)
            else:
                github_url = github_text.value.strip()
                if not github_url:
                    print("Error: Please enter a valid GitHub raw URL.")
                    return
                print(f"Loading from GitHub URL:\n  {github_url}")
                df = pd.read_csv(github_url)
        except Exception as e:
            print("Failed to load CSV:", e)
            return

        # 4b) Debug: print columns & a few rows
        print("Columns found in DataFrame:", df.columns.tolist())
        display(df.head(3))

        # 4c) Identify which column is the date
        date_col = None
        for candidate in ["Date", "DATE", "date"]:
            if candidate in df.columns:
                date_col = candidate
                break

        if date_col is None:
            print("Error: No column named 'Date' / 'DATE' / 'date' found.")
            print("Please check the column names above and adjust code accordingly.")
            return

        date_col = 'Date'

        # 1) Show the first few raw date strings (un‐parsed) so we can inspect them
        raw_samples = df[date_col].dropna().astype(str).head(10).tolist()
        print(f"Raw {date_col} samples (first 10 non‐null): {raw_samples!r}")

        # 2) Strip leading/trailing whitespace from every entry
        df[date_col] = df[date_col].astype(str).str.strip()

        # 3) Now attempt strict "%m/%d/%Y" parsing
        parsed = pd.to_datetime(df[date_col], format="%m/%d/%Y", errors="coerce", infer_datetime_format=True)
        num_valid = parsed.notna().sum()
        print(f"Number of rows matching '%m/%d/%Y' exactly: {num_valid} / {len(df)}")

        if num_valid > 0:
            df[date_col] = parsed
            print(f"Parsing with '%m/%d/%Y' succeeded for {num_valid} rows.")
        else:
            print(
                "Warning: No rows matched '%m/%d/%Y'. "
                "Falling back to generic pd.to_datetime(...)."
            )
            df[date_col] = pd.to_datetime(df[date_col], errors="coerce")

        # 5) Drop any rows where parsing still failed
        before_drop = len(df)
        df.dropna(subset=[date_col], inplace=True)
        dropped = before_drop - len(df)
        if dropped:
            print(f"Dropped {dropped} rows where '{date_col}' could not be parsed.")
        
        # 6) Sort and reset index
        df.sort_values(by=date_col, inplace=True)
        df.reset_index(drop=True, inplace=True)
        
        # 7) Show the final parsed dates
        print(f"After parsing, first 3 {date_col} values:")
        print(df[[date_col]].head(3))
        print(f"Loaded {len(df)} rows successfully.")


        # 4d) Identify risk-free column
        try:
            rf_col = identify_risk_free_fund(df)
            print(f"Identified risk-free column as: '{rf_col}'")
        except Exception as e:
            print("Error identifying risk-free column:", e)
            return

        # 4f) Determine fund_cols vs indices_list based on n_indices
        all_cols = df.columns.tolist()
        # We assume “Date” and rf_col are present. Everything else is a candidate.
        if 'Date' not in all_cols or rf_col not in all_cols:
            print("Error: 'Date' or risk-free column not found in DataFrame columns.")
            return
        
        # Build list of all columns except 'Date' and rf_col
        remaining = [c for c in all_cols if c not in ['Date', rf_col]]
        
        if n_indices > len(remaining):
            print(
                f"Error: You asked for {n_indices} index columns, but only "
                f"{len(remaining)} columns remain after 'Date' and '{rf_col}'."
            )
            return

        if n_indices > 0:
            indices_list = remaining[-n_indices:]
            fund_cols    = remaining[:-n_indices]
        else:
            indices_list = []
            fund_cols    = remaining[:]
        print("\n>> Debug (post‐load): fund_cols =", fund_cols)
        print(">> Debug (post‐load): indices_list =", indices_list, "\n")
        
        # 2F) Print out what we found
        print(f"Detected fund columns ({len(fund_cols)}): {fund_cols}")
        print(f"Detected index columns ({len(indices_list)}): {indices_list}")
        print(
            "Data loaded and classified successfully.\n"
            "Proceed to Step 2 (Run Analysis)."
        )

 
        # 8) Confirm that df is now in global scope
        print(">> df defined with", len(df), "rows and columns:", df.columns.tolist())


# 5) Wire up and display the UI
load_button.on_click(on_load_clicked)


display(ui_load)


VBox(children=(HTML(value='<b>Step 1: Choose your CSV</b><br><i>Remember:</i> If you included any index column…

In [6]:
import pandas as pd
import numpy as np

# ────────────── 1) Re‐compute in_sdate, in_edate, out_sdate, out_edate ──────────────
in_sdate  = pd.to_datetime("2005-07-01", errors="coerce")
in_edate  = pd.to_datetime("2008-06-01", errors="coerce") + pd.offsets.MonthEnd(0)
out_sdate = pd.to_datetime("2008-07-01", errors="coerce")
out_edate = pd.to_datetime("2009-06-01", errors="coerce") + pd.offsets.MonthEnd(0)

# ────────────── 2) Build the full‐history list (no NaNs anywhere in each window) ──────────────
#    We assume that 'rf_col' is already defined and holds the risk‐free column name.
#    We also assume that any index columns are in a list called 'indices_list'.
all_candidates = [
    c for c in df.columns 
    if c not in ["Date", rf_col] + indices_list
]

# (A) Select only those f where in‐sample AND out‐sample both have no NaNs anywhere:
post_full_history_funds = []
for f in all_candidates:
    in_sub  = df[(df['Date'] >= in_sdate) & (df['Date'] <= in_edate)][f]
    out_sub = df[(df['Date'] >= out_sdate) & (df['Date'] <= out_edate)][f]
    if in_sub.notna().all() and out_sub.notna().all():
        post_full_history_funds.append(f)

print(f"(1) Funds with absolutely no NaNs over both windows (count = {len(post_full_history_funds)}):")
print(post_full_history_funds)


# ────────────── 3) Now run the “3+ consecutive NaNs” check only on that list ──────────────
flagged_exact = []  # will hold (fund_name, window_name, max_consecutive_nans)
for f in post_full_history_funds:
    # In‐sample gap‐runs
    sub_in   = df[(df['Date'] >= in_sdate) & (df['Date'] <= in_edate)][f]
    isnan_in = sub_in.isna().astype(int)
    run_len_in = isnan_in.groupby((isnan_in == 0).cumsum()).sum()
    max_run_in = run_len_in.max() if not run_len_in.empty else 0
    if max_run_in >= 3:
        flagged_exact.append((f, "in-sample", int(max_run_in)))

    # Out‐sample gap‐runs
    sub_out   = df[(df['Date'] >= out_sdate) & (df['Date'] <= out_edate)][f]
    isnan_out = sub_out.isna().astype(int)
    run_len_out = isnan_out.groupby((isnan_out == 0).cumsum()).sum()
    max_run_out = run_len_out.max() if not run_len_out.empty else 0
    if max_run_out >= 3:
        flagged_exact.append((f, "out-sample", int(max_run_out)))


# ────────────── 4) Report results ──────────────
if flagged_exact:
    print("\nFunds that actually *do* have 3+ consecutive NaNs inside one of the windows:")
    for (fund_name, window_name, length) in flagged_exact:
        print(f"  • {fund_name!r} → {length} consecutive NaNs in {window_name}")
else:
    print("\nNone of the post‐full‐history funds has a run of ≥ 3 NaNs in either window.")


(1) Funds with absolutely no NaNs over both windows (count = 27):
['Quantum Capital', 'Quantum Group', 'Echo Strategies', 'Echo Group', 'Meridian Strategies', 'Axiom LP', 'Crescent Partners', 'Forge Advisors', 'Sentinel Global', 'Axiom Advisors', 'Vista Holdings', 'Sentinel Advisors', 'Crescent Group', 'Adaptive Holdings', 'Vista Capital', 'Forge Investments', 'Ascent Global', 'Vista Global', 'Forge Group', 'Axiom Holdings', 'Adaptive Global', 'Ascent Advisors', 'Quantum Advisors', 'Ascent Group', 'Quantum Holdings', 'Sentinel LP', 'Adaptive LP']

None of the post‐full‐history funds has a run of ≥ 3 NaNs in either window.


## 3. Utility Functions
Here we define date parsing, consecutive gap checks, data filling, risk-free identification, return calculations, etc.

In [None]:
import pandas as pd

# ─────────── Define your date boundaries ───────────
in_sdate  = pd.to_datetime("2005-07-01", errors="coerce")
in_edate  = pd.to_datetime("2008-06-01", errors="coerce") + pd.offsets.MonthEnd(0)
out_sdate = pd.to_datetime("2008-07-01", errors="coerce")
out_edate = pd.to_datetime("2009-06-01", errors="coerce") + pd.offsets.MonthEnd(0)

# (Also ensure 'df' and 'fund_cols' are already defined above this cell.)

flagged = []  # will hold (fund_name, window_name, max_consecutive_nans) tuples

for f in fund_cols:
    # ─── In‐sample window check ───
    sub_in = df[(df['Date'] >= in_sdate) & (df['Date'] <= in_edate)][f]
    isnan_in = sub_in.isna().astype(int)                         # 1 where NaN, 0 otherwise
    # group by cumulative “not-NaN” runs to measure each NaN‐block length
    run_len_in = isnan_in.groupby((isnan_in == 0).cumsum()).sum()
    max_run_in = run_len_in.max() if not run_len_in.empty else 0
    if max_run_in >= 3:
        flagged.append((f, 'in-sample', int(max_run_in)))
    
    # ─── Out‐sample window check ───
    sub_out = df[(df['Date'] >= out_sdate) & (df['Date'] <= out_edate)][f]
    isnan_out = sub_out.isna().astype(int)
    run_len_out = isnan_out.groupby((isnan_out == 0).cumsum()).sum()
    max_run_out = run_len_out.max() if not run_len_out.empty else 0
    if max_run_out >= 3:
        flagged.append((f, 'out-sample', int(max_run_out)))

# Finally, print any funds with ≥ 3 consecutive NaNs
if flagged:
    print("Funds with a 3+ consecutive‐NaN run inside one of the windows:")
    for (fund_name, window_name, length) in flagged:
        print(f"  • {fund_name!r} has {length} consecutive NaNs in the {window_name} window")
else:
    print("All funds have at most 2 consecutive NaNs in both in‐sample and out‐sample windows.")

In [7]:

def consecutive_gaps(series, threshold=3):
    """
    Check if a series (sorted chronologically) has >= threshold consecutive NaNs.
    Return True if such a gap exists, False otherwise.
    """
    consecutive = 0
    for val in series:
        if pd.isna(val):
            consecutive += 1
        else:
            consecutive = 0
        if consecutive >= threshold:
            return True
    return False

def fill_short_gaps_with_zero(series, max_short_gap=2):
    """
    Replace missing values (NaN) with 0 if they appear in runs of <= max_short_gap.
    Longer runs remain NaN.
    """
    filled = series.copy()
    n = len(series)
    i = 0
    while i < n:
        if pd.isna(filled[i]):
            run_start = i
            while i < n and pd.isna(filled[i]):
                i += 1
            run_end = i  # first non-NaN after run
            gap_length = run_end - run_start
            if gap_length <= max_short_gap:
                filled[run_start:run_end] = 0.0
        else:
            i += 1
    return filled


def annualize_return(monthly_returns):
    """
    Annualized (geometric) return from monthly returns in decimal form.
    """
    valid_rets = monthly_returns.dropna()
    if len(valid_rets) == 0:
        return np.nan
    growth_factor = (1 + valid_rets).prod()
    n_months = len(valid_rets)
    if growth_factor <= 0:
        return -1.0
    ann_ret = growth_factor**(12.0 / n_months) - 1
    return ann_ret

def annualize_volatility(monthly_returns):
    """
    Annualized stdev of monthly returns, i.e. stdev * sqrt(12).
    """
    valid_rets = monthly_returns.dropna()
    if len(valid_rets) < 2:
        return np.nan
    return valid_rets.std() * np.sqrt(12)

def sharpe_ratio(monthly_returns, rf_series):
    """
    Annualized Sharpe ratio = (annual_excess_return) / (annual_excess_vol).
    """
    df = pd.DataFrame({'r': monthly_returns, 'rf': rf_series}).dropna()
    if len(df) < 2:
        return np.nan
    excess = df['r'] - df['rf']
    growth_factor = (1 + excess).prod()
    n_months = len(excess)
    if growth_factor <= 0:
        return np.nan
    ann_excess_ret = growth_factor**(12.0 / n_months) - 1
    ann_excess_vol = excess.std() * np.sqrt(12)
    if ann_excess_vol == 0:
        return np.nan
    return ann_excess_ret / ann_excess_vol

def sortino_ratio(monthly_returns, rf_series):
    """
    Annualized Sortino ratio = (annual_excess_return) / (annual_downside_stdev).
    """
    df = pd.DataFrame({'r': monthly_returns, 'rf': rf_series}).dropna()
    if len(df) < 2:
        return np.nan
    excess = df['r'] - df['rf']

    growth_factor = (1 + excess).prod()
    n_months = len(excess)
    if growth_factor <= 0:
        return np.nan
    ann_excess_ret = growth_factor**(12.0 / n_months) - 1

    negative_mask = excess < 0
    negative_returns = excess[negative_mask]
    if len(negative_returns) == 0:
        return np.inf  # no negative => infinite sortino
    downside_stdev = negative_returns.std() * np.sqrt(12)
    return ann_excess_ret / downside_stdev

def max_drawdown(monthly_returns):
    """
    Compute max drawdown from monthly returns in decimal form.
    """
    valid_rets = monthly_returns.dropna()
    if len(valid_rets) == 0:
        return np.nan
    wealth_index = (1 + valid_rets).cumprod()
    rolling_max = wealth_index.cummax()
    dd_series = 1 - (wealth_index / rolling_max)
    return dd_series.max()

def calc_portfolio_returns(weights, df_returns):
    """
    Compute monthly portfolio returns (Series) as weighted sum of columns in df_returns.
    """
    return (df_returns * weights).sum(axis=1)

print("Utility functions loaded.")


Utility functions loaded.


## 4. Widgets & User Inputs
Here we define some IPython widgets for in-sample/out-of-sample dates, target volatility, monthly cost, etc.

## 5. Fund Selection
Filters out columns that represent the risk-free rate or contain "index" in the name, then handles the selection mode (all, random, or manual).

In [8]:
# ─────────────── Widget Setup + Callback ───────────────

# (A) Build widgets
in_sample_start = widgets.Text(value='2003-01', description='In-Sample Start:')
in_sample_end   = widgets.Text(value='2005-12', description='In-Sample End:')
out_sample_start= widgets.Text(value='2006-01', description='Out-Sample Start:')
out_sample_end  = widgets.Text(value='2010-12', description='Out-Sample End:')

target_vol_widget   = widgets.FloatText(value=0.10,  description='Target Vol:')
monthly_cost_widget = widgets.FloatText(value=0.002, description='Monthly Cost:')

selection_mode_widget = widgets.Dropdown(
    options=[('All Funds','all'), ('Random Sample','random'), ('Manual','manual')],
    value='all',
    description='Mode:'
)
random_sample_size_widget = widgets.IntText(value=5, description='Sample Size:')

apply_button = widgets.Button(description='Run Analysis', button_style='success')

ui_inputs = widgets.VBox([
    in_sample_start, in_sample_end,
    out_sample_start, out_sample_end,
    target_vol_widget, monthly_cost_widget,
    selection_mode_widget, random_sample_size_widget,
    apply_button
])

output_area = widgets.Output()



In [None]:

        # 6) Show a brief summary
        print("Analysis complete. Summary:")
        sf = results['selected_funds']
        print(f"  Funds selected: {len(sf)} → {sf}")
        if 'selected_funds' in results:
            print(f"  Funds selected: {len(results['selected_funds'])}")
        if 'in_ew_stats' in results:
            ir, iv, isr, _, _ = results['in_ew_stats']
            print(f"  In-Sample EW → Return: {ir*100:.2f}%, Vol: {iv*100:.2f}%, Sharpe: {isr:.2f}")
        if 'out_ew_stats' in results:
            or_, ov, osr, _, _ = results['out_ew_stats']
            print(f"  Out-Sample EW → Return: {or_*100:.2f}%, Vol: {ov*100:.2f}%, Sharpe: {osr:.2f}")
        

print("Widgets defined. Use 'display(ui_inputs)' in a cell to show them after other functions run")

## 6. Custom Weights
Displays an integer text widget for each fund, requiring the sum of weights to be 100.

In [9]:
def get_custom_weights(selected_funds):
    """
    Display widgets for each fund to enter weights. Validate sum=100.
    Returns dict {fund: weight_decimal}.
    """
    weight_widgets = {}
    for fund in selected_funds:
        w = widgets.BoundedIntText(
            value=0,
            min=0,
            max=100,
            description=f"{fund}",
            layout=widgets.Layout(width='250px')
        )
        weight_widgets[fund] = w
    
    confirm_button = widgets.Button(
        description='Confirm Weights',
        button_style='success'
    )
    error_label = widgets.Label(value='', layout=widgets.Layout(color='red'))
    
    box = VBox(list(weight_widgets.values()) + [confirm_button, error_label])
    display(box)
    
    weights_container = {}
    
    def on_confirm_clicked(_):
        total = sum(w.value for w in weight_widgets.values())
        if total != 100:
            error_label.value = f"Error: Weights sum to {total}, must be 100."
            weights_container.clear()
        else:
            for fund, wdg in weight_widgets.items():
                weights_container[fund] = wdg.value / 100.0
            error_label.value = "Weights confirmed!"
    
    confirm_button.on_click(on_confirm_clicked)
    return weights_container

print("get_custom_weights function ready.")

get_custom_weights function ready.


## 7. Analysis (In-Sample & Out-of-Sample)
The `run_analysis` function orchestrates the entire process:
- Validates date inputs.
- Converts 'Date' column.
- Identifies risk-free column.
- Fills short gaps.
- Selects funds.
- Computes in-sample scaling factors and applies them in- and out-of-sample.
- Computes individual fund stats and portfolio stats.

## 8. Excel Export
Creates an Excel file with two sheets (In-Sample, Out-of-Sample) and two tables per sheet (Equal-weight and User-weight).

In [10]:
def export_to_excel(results_dict, output_filename="AnalysisOutput.xlsx"):
    """
    Create an Excel file with two tabs: In-Sample, Out-of-Sample.
    Each has two tables: (1) Equal-Weight, (2) User-Weighted.
    Columns for Return(%), Vol(%), Sharpe, Sortino, MaxDD(%).
    For OOS, also show 'before scaling' vs. 'after scaling' returns/vol.
    """
    selected_funds = results_dict['selected_funds']
    in_sample_stats = results_dict['in_sample_stats']
    out_sample_stats_scaled = results_dict['out_sample_stats']
    out_sample_stats_raw = results_dict['out_sample_stats_raw']

    in_ew_stats = results_dict['in_ew_stats']
    out_ew_stats_scaled = results_dict['out_ew_stats']
    out_ew_stats_raw = results_dict['out_ew_stats_raw']

    in_user_stats = results_dict['in_user_stats']
    out_user_stats_scaled = results_dict['out_user_stats']
    out_user_stats_raw = results_dict['out_user_stats_raw']

    # --- In-Sample DataFrames ---
    in_eq_data = []
    in_user_data = []
    for fund in selected_funds:
        r, v, s, so, mdd = in_sample_stats[fund]
        in_eq_data.append([fund, r, v, s, so, mdd])
        in_user_data.append([fund, r, v, s, so, mdd])

    in_eq_data.append([
        'Equal-Weight Portfolio',
        in_ew_stats[0],
        in_ew_stats[1],
        in_ew_stats[2],
        in_ew_stats[3],
        in_ew_stats[4]
    ])
    in_user_data.append([
        'User-Weighted Portfolio',
        in_user_stats[0],
        in_user_stats[1],
        in_user_stats[2],
        in_user_stats[3],
        in_user_stats[4]
    ])

    in_eq_df = pd.DataFrame(
        in_eq_data,
        columns=['Fund', 'Return (%)', 'Volatility (%)', 'Sharpe', 'Sortino', 'MaxDD (%)']
    )
    in_user_df = pd.DataFrame(
        in_user_data,
        columns=['Fund', 'Return (%)', 'Volatility (%)', 'Sharpe', 'Sortino', 'MaxDD (%)']
    )

    # --- Out-of-Sample DataFrames ---
    # columns: [Fund, RetBefore(%), VolBefore(%), RetAfter(%), VolAfter(%), Sharpe(After), Sortino(After), MaxDD(After)(%)]
    out_eq_data = []
    out_user_data = []

    for fund in selected_funds:
        r_raw, v_raw, _, _, _ = out_sample_stats_raw[fund]
        r_scaled, v_scaled, s_scaled, so_scaled, mdd_scaled = out_sample_stats_scaled[fund]
        out_eq_data.append([
            fund,
            r_raw,
            v_raw,
            r_scaled,
            v_scaled,
            s_scaled,
            so_scaled,
            mdd_scaled
        ])
        out_user_data.append([
            fund,
            r_raw,
            v_raw,
            r_scaled,
            v_scaled,
            s_scaled,
            so_scaled,
            mdd_scaled
        ])

    r_ew_raw, v_ew_raw, _, _, _ = out_ew_stats_raw
    r_ew_scaled, v_ew_scaled, s_ew_scaled, so_ew_scaled, mdd_ew_scaled = out_ew_stats_scaled
    out_eq_data.append([
        'Equal-Weight Portfolio',
        r_ew_raw,
        v_ew_raw,
        r_ew_scaled,
        v_ew_scaled,
        s_ew_scaled,
        so_ew_scaled,
        mdd_ew_scaled
    ])

    r_user_raw, v_user_raw, _, _, _ = out_user_stats_raw
    r_user_scaled, v_user_scaled, s_user_scaled, so_user_scaled, mdd_user_scaled = out_user_stats_scaled
    out_user_data.append([
        'User-Weighted Portfolio',
        r_user_raw,
        v_user_raw,
        r_user_scaled,
        v_user_scaled,
        s_user_scaled,
        so_user_scaled,
        mdd_user_scaled
    ])

    out_eq_df = pd.DataFrame(
        out_eq_data,
        columns=['Fund', 'RetBefore(%)', 'VolBefore(%)', 'RetAfter(%)', 'VolAfter(%)', 'Sharpe(After)', 'Sortino(After)', 'MaxDD(After)(%)']
    )
    out_user_df = pd.DataFrame(
        out_user_data,
        columns=['Fund', 'RetBefore(%)', 'VolBefore(%)', 'RetAfter(%)', 'VolAfter(%)', 'Sharpe(After)', 'Sortino(After)', 'MaxDD(After)(%)']
    )

    writer = pd.ExcelWriter(output_filename, engine='xlsxwriter')

    # In-Sample Sheet
    in_eq_df.to_excel(writer, sheet_name='In-Sample', startrow=0, index=False)
    in_user_df.to_excel(writer, sheet_name='In-Sample', startrow=len(in_eq_df)+3, index=False)

    # Out-of-Sample Sheet
    out_eq_df.to_excel(writer, sheet_name='Out-of-Sample', startrow=0, index=False)
    out_user_df.to_excel(writer, sheet_name='Out-of-Sample', startrow=len(out_eq_df)+3, index=False)

    workbook = writer.book
    pct_format = workbook.add_format({'num_format': '0.0%'})
    decimal_format = workbook.add_format({'num_format': '0.00'})  # for non-percent columns
    bold_format = workbook.add_format({'bold': True})

    # Format In-Sample
    in_sample_ws = writer.sheets['In-Sample']
    in_sample_ws.set_column(0, 0, 28)  # Fund column
    in_sample_ws.set_column(1, 2, 8, pct_format)
    in_sample_ws.set_column(3, 4, 8, decimal_format)
    in_sample_ws.set_column(5, 5, 8, pct_format)
    
    # Bold headers
    for colx in range(in_eq_df.shape[1]):
        in_sample_ws.write(0, colx, in_eq_df.columns[colx], bold_format)
    for colx in range(in_user_df.shape[1]):
        in_sample_ws.write(len(in_eq_df)+3, colx, in_user_df.columns[colx], bold_format)

    # Format Out-of-Sample
    out_sample_ws = writer.sheets['Out-of-Sample']
    out_sample_ws.set_column(0, 0, 28)
    out_sample_ws.set_column(1, 7, 15, pct_format)
    for colx in range(out_eq_df.shape[1]):
        out_sample_ws.write(0, colx, out_eq_df.columns[colx], bold_format)
    for colx in range(out_user_df.shape[1]):
        out_sample_ws.write(len(out_eq_df)+3, colx, out_user_df.columns[colx], bold_format)

    writer.close()
    logging.info(f"Exported analysis to {output_filename} successfully.")
    print(f"Excel file created: {output_filename}")

print("export_to_excel function ready.")

export_to_excel function ready.


## 8. Demo Run
The `demo_run()` function creates a small dummy dataset, runs the analysis, and exports the results to an Excel file.

In [None]:
def demo_run():
    """
    Create a small dummy dataset, run analysis, export results.
    """
    # Create monthly date range
    rng = pd.date_range(start='2003-01-01', end='2010-12-01', freq='MS')
    df_demo = pd.DataFrame({'Date': rng})

    np.random.seed(42)
    rf_values = np.random.normal(loc=0.002, scale=0.0001, size=len(rng))
    df_demo['RF'] = rf_values

    # Random funds with missing data
    for i in range(1, 6):
        fund_name = f"Fund_{i}"
        mean_r = 0.01 * i / 10.0
        stdev_r = 0.02 * (i / 5.0)
        rets = np.random.normal(loc=mean_r, scale=stdev_r, size=len(rng))

        # Introduce random short or long gaps
        if i == 3:
            missing_idx = np.random.choice(len(rng), 2, replace=False)
            for idx in missing_idx:
                rets[idx] = np.nan
        if i == 4:
            rets[10:13] = np.nan  # 3 consecutive -> exclude

        df_demo[fund_name] = rets

    # Shuffle rows to test sorting
    df_demo = df_demo.sample(frac=1).reset_index(drop=True)

 
    results = run_analysis(
        df_demo,
        in_start='2003-01', in_end='2005-12',
        out_start='2006-01', out_end='2010-12',
        target_vol=0.10,
        monthly_cost=0.002,
        selection_mode='all',
        random_n=2
    )

    
    
    if results is not None:
        export_to_excel(results, "DemoAnalysisOutput.xlsx")
        print("Demo run complete.")

print("demo_run function ready. Call 'demo_run()' to test.")

In [11]:
# (C) Wire the button and display
apply_button.on_click(on_apply_clicked)
display(ui_inputs, output_area)

VBox(children=(Text(value='2003-01', description='In-Sample Start:'), Text(value='2005-12', description='In-Sa…

Output()

### Using This Notebook
1. Run all cells.
2. Call `demo_run()` in a new cell to see a quick example with dummy data.
3. To use your own data, load it into a DataFrame (make sure it has a 'Date' column and decimal returns in other columns), then call `run_analysis()` and `export_to_excel()`.
4. For interactive selection, do:
   ```python
   display(ui_inputs)
   ```
   Then wire the `apply_button` to a callback function that reads the widget values and runs `run_analysis()`.
5. For custom weights, call:
   ```python
   my_weights = get_custom_weights(selected_funds)
   ```
   Then pass `my_weights` into your logic.
