# Volatility Scaling & Portfolio Analysis

This notebook demonstrates how to:
1. Load and validate data.
2. Handle missing data (short vs. long gaps).
3. Adjust returns to a target volatility in-sample, then apply the same scaling out-of-sample.
4. Compute Sharpe, Sortino, Max Drawdown.
5. Provide multiple fund selection modes (all, random sample, manual).
6. Calculate portfolio results (equal-weight and custom-weight).
7. Output in-sample and out-of-sample results to Excel with formatting.

**Note**: The manual fund selection and custom weights features are partially implemented. In a real interactive workflow, you would wire widget selections and weights into the final analysis.

In [1]:
# ============ 1. SETUP CELL ============

import sys

# If you need to install these packages on your environment, uncomment:
!{sys.executable} -m pip install --quiet ipywidgets openpyxl xlsxwriter

import logging
import sys
import numpy as np
import pandas as pd
import math
import ipywidgets as widgets
from ipywidgets import interact, interactive, VBox, HBox
import datetime
import random
import warnings

# For exporting to Excel with styling
import xlsxwriter

# Set up logging to console
logging.basicConfig(
    stream=sys.stdout,
    level=logging.INFO,
    format="%(levelname)s: %(message)s"
)

logging.info("Logging started. Volatility Scaling & Portfolio Analysis Notebook initialized.")

# (Optional) If widgets aren't enabled, run:
# !jupyter nbextension enable --py widgetsnbextension --sys-prefix

print("Setup complete.")

INFO: Logging started. Volatility Scaling & Portfolio Analysis Notebook initialized.
Setup complete.


## 2. Utility Functions
Here we define date parsing, consecutive gap checks, data filling, risk-free identification, return calculations, etc.

In [3]:
def parse_dates(date_str):
    """
    Attempt to parse a date string into a Python datetime.date object.
    Return None if parsing fails.
    """
    import datetime
    possible_formats = ["%Y-%m", "%Y-%m-%d", "%b-%Y", "%m/%d/%Y", "%Y/%m/%d"]
    for fmt in possible_formats:
        try:
            return datetime.datetime.strptime(date_str, fmt).date()
        except ValueError:
            continue
    return None

def consecutive_gaps(series, threshold=3):
    """
    Check if a series (sorted chronologically) has >= threshold consecutive NaNs.
    Return True if such a gap exists, False otherwise.
    """
    consecutive = 0
    for val in series:
        if pd.isna(val):
            consecutive += 1
        else:
            consecutive = 0
        if consecutive >= threshold:
            return True
    return False

def fill_short_gaps_with_zero(series, max_short_gap=2):
    """
    Replace missing values (NaN) with 0 if they appear in runs of <= max_short_gap.
    Longer runs remain NaN.
    """
    filled = series.copy()
    n = len(series)
    i = 0
    while i < n:
        if pd.isna(filled[i]):
            run_start = i
            while i < n and pd.isna(filled[i]):
                i += 1
            run_end = i  # first non-NaN after run
            gap_length = run_end - run_start
            if gap_length <= max_short_gap:
                filled[run_start:run_end] = 0.0
        else:
            i += 1
    return filled

def identify_risk_free_fund(df):
    """
    Identify which column (after 'Date') is the risk-free rate by smallest stdev among columns.
    """
    numeric_cols = df.columns[1:]  # skip the Date column
    stdevs = {}
    for col in numeric_cols:
        vals = df[col].dropna()
        if len(vals) > 0:
            stdevs[col] = vals.std()
        else:
            stdevs[col] = np.inf

    rf_col = min(stdevs, key=stdevs.get)
    logging.info(f"Identified '{rf_col}' as the risk-free column (lowest stdev).")
    return rf_col

def annualize_return(monthly_returns):
    """
    Annualized (geometric) return from monthly returns in decimal form.
    """
    valid_rets = monthly_returns.dropna()
    if len(valid_rets) == 0:
        return np.nan
    growth_factor = (1 + valid_rets).prod()
    n_months = len(valid_rets)
    if growth_factor <= 0:
        return -1.0
    ann_ret = growth_factor**(12.0 / n_months) - 1
    return ann_ret

def annualize_volatility(monthly_returns):
    """
    Annualized stdev of monthly returns, i.e. stdev * sqrt(12).
    """
    valid_rets = monthly_returns.dropna()
    if len(valid_rets) < 2:
        return np.nan
    return valid_rets.std() * np.sqrt(12)

def sharpe_ratio(monthly_returns, rf_series):
    """
    Annualized Sharpe ratio = (annual_excess_return) / (annual_excess_vol).
    """
    df = pd.DataFrame({'r': monthly_returns, 'rf': rf_series}).dropna()
    if len(df) < 2:
        return np.nan
    excess = df['r'] - df['rf']
    growth_factor = (1 + excess).prod()
    n_months = len(excess)
    if growth_factor <= 0:
        return np.nan
    ann_excess_ret = growth_factor**(12.0 / n_months) - 1
    ann_excess_vol = excess.std() * np.sqrt(12)
    if ann_excess_vol == 0:
        return np.nan
    return ann_excess_ret / ann_excess_vol

def sortino_ratio(monthly_returns, rf_series):
    """
    Annualized Sortino ratio = (annual_excess_return) / (annual_downside_stdev).
    """
    df = pd.DataFrame({'r': monthly_returns, 'rf': rf_series}).dropna()
    if len(df) < 2:
        return np.nan
    excess = df['r'] - df['rf']

    growth_factor = (1 + excess).prod()
    n_months = len(excess)
    if growth_factor <= 0:
        return np.nan
    ann_excess_ret = growth_factor**(12.0 / n_months) - 1

    negative_mask = excess < 0
    negative_returns = excess[negative_mask]
    if len(negative_returns) == 0:
        return np.inf  # no negative => infinite sortino
    downside_stdev = negative_returns.std() * np.sqrt(12)
    return ann_excess_ret / downside_stdev

def max_drawdown(monthly_returns):
    """
    Compute max drawdown from monthly returns in decimal form.
    """
    valid_rets = monthly_returns.dropna()
    if len(valid_rets) == 0:
        return np.nan
    wealth_index = (1 + valid_rets).cumprod()
    rolling_max = wealth_index.cummax()
    dd_series = 1 - (wealth_index / rolling_max)
    return dd_series.max()

def calc_portfolio_returns(weights, df_returns):
    """
    Compute monthly portfolio returns (Series) as weighted sum of columns in df_returns.
    """
    return (df_returns * weights).sum(axis=1)

print("Utility functions loaded.")


Utility functions loaded.


## 3. Widgets & User Inputs
Here we define some IPython widgets for in-sample/out-of-sample dates, target volatility, monthly cost, etc.

In [4]:
in_sample_start = widgets.Text(
    value='2003-01',
    description='In-Sample Start (YYYY-MM):',
    layout=widgets.Layout(width='300px')
)
in_sample_end = widgets.Text(
    value='2008-12',
    description='In-Sample End (YYYY-MM):',
    layout=widgets.Layout(width='300px')
)
out_sample_start = widgets.Text(
    value='2009-01',
    description='Out-Sample Start (YYYY-MM):',
    layout=widgets.Layout(width='300px')
)
out_sample_end = widgets.Text(
    value='2010-12',
    description='Out-Sample End (YYYY-MM):',
    layout=widgets.Layout(width='300px')
)

target_vol_widget = widgets.FloatText(
    value=0.10,
    description='Target Vol (annual):',
    layout=widgets.Layout(width='250px')
)

monthly_cost_widget = widgets.FloatText(
    value=0.002,
    description='Monthly Cost:',
    layout=widgets.Layout(width='250px')
)

selection_mode_widget = widgets.Dropdown(
    options=[('All Funds', 'all'),
             ('Random Sample', 'random'),
             ('Manual Selection', 'manual')],
    value='all',
    description='Selection Mode:'
)

random_sample_size_widget = widgets.IntText(
    value=5,
    description='Sample Size:',
    layout=widgets.Layout(width='200px')
)

apply_button = widgets.Button(
    description='Run Analysis',
    button_style='success'
)

ui_inputs = VBox([
    in_sample_start, in_sample_end,
    out_sample_start, out_sample_end,
    target_vol_widget, monthly_cost_widget,
    selection_mode_widget, random_sample_size_widget,
    apply_button
])

print("Widgets defined. Use 'display(ui_inputs)' in a cell to show them.")

Widgets defined. Use 'display(ui_inputs)' in a cell to show them.


In [5]:
display(ui_inputs)

VBox(children=(Text(value='2003-01', description='In-Sample Start (YYYY-MM):', layout=Layout(width='300px')), …

## 4. Fund Selection
Filters out columns that represent the risk-free rate or contain "index" in the name, then handles the selection mode (all, random, or manual).

In [6]:
def select_funds(df, rf_col, selection_mode='all', random_n=5):
    """
    Exclude the risk-free col and any 'index' columns from the fund list.
    Then filter out funds that have >=3 consecutive missing months.
    Then apply the specified selection_mode.
    """
    possible_funds = [c for c in df.columns if c not in ['Date', rf_col]]
    
    funds_only = []
    for col in possible_funds:
        if 'index' in col.lower():
            continue
        funds_only.append(col)
    
    valid_funds = []
    for col in funds_only:
        if not consecutive_gaps(df[col], threshold=3):
            valid_funds.append(col)
    
    if selection_mode == 'all':
        return valid_funds
    elif selection_mode == 'random':
        if len(valid_funds) <= random_n:
            warnings.warn(f"Fewer valid funds ({len(valid_funds)}) than sample size ({random_n}). Returning all.")
            return valid_funds
        else:
            return random.sample(valid_funds, random_n)
    else:
        # manual selection widget placeholder
        # In a real notebook, you'd display a multi-select widget.
        # For simplicity, just return all valid funds here.
        return valid_funds

print("select_funds function ready.")

select_funds function ready.


## 5. Custom Weights
Displays an integer text widget for each fund, requiring the sum of weights to be 100.

In [7]:
def get_custom_weights(selected_funds):
    """
    Display widgets for each fund to enter weights. Validate sum=100.
    Returns dict {fund: weight_decimal}.
    """
    weight_widgets = {}
    for fund in selected_funds:
        w = widgets.BoundedIntText(
            value=0,
            min=0,
            max=100,
            description=f"{fund}",
            layout=widgets.Layout(width='250px')
        )
        weight_widgets[fund] = w
    
    confirm_button = widgets.Button(
        description='Confirm Weights',
        button_style='success'
    )
    error_label = widgets.Label(value='', layout=widgets.Layout(color='red'))
    
    box = VBox(list(weight_widgets.values()) + [confirm_button, error_label])
    display(box)
    
    weights_container = {}
    
    def on_confirm_clicked(_):
        total = sum(w.value for w in weight_widgets.values())
        if total != 100:
            error_label.value = f"Error: Weights sum to {total}, must be 100."
            weights_container.clear()
        else:
            for fund, wdg in weight_widgets.items():
                weights_container[fund] = wdg.value / 100.0
            error_label.value = "Weights confirmed!"
    
    confirm_button.on_click(on_confirm_clicked)
    return weights_container

print("get_custom_weights function ready.")

get_custom_weights function ready.


## 6. Analysis (In-Sample & Out-of-Sample)
The `run_analysis` function orchestrates the entire process:
- Validates date inputs.
- Converts 'Date' column.
- Identifies risk-free column.
- Fills short gaps.
- Selects funds.
- Computes in-sample scaling factors and applies them in- and out-of-sample.
- Computes individual fund stats and portfolio stats.

In [8]:
def run_analysis(df, 
                 in_start, in_end, out_start, out_end, 
                 target_vol, monthly_cost,
                 selection_mode='all', random_n=5):
    """
    Orchestrate the analysis:
    1) Validate date inputs
    2) Convert 'Date' col
    3) Identify risk-free
    4) Fill short gaps, filter by 3+ gap
    5) Select funds
    6) Scale returns in-sample, apply factor + monthly cost OOS
    7) Compute stats + portfolio metrics
    8) Return a dictionary of results
    """
    in_sdate = parse_dates(in_start)
    in_edate = parse_dates(in_end)
    out_sdate = parse_dates(out_start)
    out_edate = parse_dates(out_end)
    
    if None in [in_sdate, in_edate, out_sdate, out_edate]:
        logging.error("Invalid date format. Please use YYYY-MM or recognized format.")
        print("Please fix date inputs and try again.")
        return None
    
    if not (in_sdate < in_edate <= out_sdate < out_edate):
        logging.warning("Date ranges might be overlapping or out of order. Proceed with caution.")
        print("Warning: The date range might be incorrect. Please verify.")
    
    # Convert 'Date' to datetime
    if not np.issubdtype(df['Date'].dtype, np.datetime64):
        df['Date'] = df['Date'].apply(parse_dates)
    df.dropna(subset=['Date'], inplace=True)
    df.sort_values(by='Date', inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    # Identify risk-free column
    rf_col = identify_risk_free_fund(df)
    
    # Fill short gaps
    for col in df.columns:
        if col != 'Date':
            df[col] = fill_short_gaps_with_zero(df[col])
    
    # Select funds
    selected_funds = select_funds(df, rf_col, selection_mode, random_n)
    if len(selected_funds) == 0:
        logging.warning("No valid funds remain after filtering.")
        print("Warning: No valid funds. Try adjusting your selection or data filters.")
        return None
    
    # Separate in-sample, out-of-sample
    in_sample_mask = (df['Date'] >= in_sdate) & (df['Date'] <= in_edate)
    out_sample_mask = (df['Date'] >= out_sdate) & (df['Date'] <= out_edate)
    
    in_sample_df = df.loc[in_sample_mask].copy()
    out_sample_df = df.loc[out_sample_mask].copy()
    
    in_sample_rf = in_sample_df[rf_col]
    out_sample_rf = out_sample_df[rf_col]
    
    # Compute scale factors in-sample
    scale_factors = {}
    in_sample_scaled = pd.DataFrame(index=in_sample_df.index, columns=selected_funds)
    out_sample_scaled = pd.DataFrame(index=out_sample_df.index, columns=selected_funds)
    
    for fund in selected_funds:
        fund_in_rets = in_sample_df[fund].dropna()
        current_vol = annualize_volatility(fund_in_rets)
        if pd.isna(current_vol) or current_vol == 0:
            scale_factors[fund] = 1.0
            continue
        scale_factors[fund] = target_vol / current_vol
    
    # Apply scaling in-sample & out-of-sample
    for fund in selected_funds:
        sf = scale_factors[fund]
        # In-sample
        adj_in = in_sample_df[fund] * sf - monthly_cost
        adj_in[adj_in < -1.0] = -1.0
        in_sample_scaled[fund] = adj_in
        
        # Out-of-sample
        if out_sample_df.shape[0] > 0:
            adj_out = out_sample_df[fund] * sf - monthly_cost
            adj_out[adj_out < -1.0] = -1.0
            out_sample_scaled[fund] = adj_out
    
    # Helper function for stats
    def compute_stats(series, rf_series):
        r = annualize_return(series)
        v = annualize_volatility(series)
        sr = sharpe_ratio(series, rf_series)
        so = sortino_ratio(series, rf_series)
        mdd = max_drawdown(series)
        return (r, v, sr, so, mdd)
    
    in_sample_stats = {}
    for fund in selected_funds:
        in_sample_stats[fund] = compute_stats(in_sample_scaled[fund], in_sample_rf)
    
    out_sample_stats = {}
    for fund in selected_funds:
        out_sample_stats[fund] = compute_stats(out_sample_scaled[fund], out_sample_rf)
    
    out_sample_stats_raw = {}
    for fund in selected_funds:
        out_sample_stats_raw[fund] = compute_stats(out_sample_df[fund], out_sample_rf)
    
    # Portfolio (equal-weight)
    ew_w = np.array([1.0/len(selected_funds)]*len(selected_funds))
    in_ew_port = calc_portfolio_returns(ew_w, in_sample_scaled[selected_funds])
    out_ew_port = calc_portfolio_returns(ew_w, out_sample_scaled[selected_funds])
    out_ew_port_raw = calc_portfolio_returns(ew_w, out_sample_df[selected_funds])
    
    in_ew_stats = compute_stats(in_ew_port, in_sample_rf)
    out_ew_stats = compute_stats(out_ew_port, out_sample_rf)
    out_ew_stats_raw = compute_stats(out_ew_port_raw, out_sample_rf)
    
    # Portfolio (user-weighted) - placeholder
    user_weight_dict = {f: 1.0/len(selected_funds) for f in selected_funds}
    custom_w = np.array([user_weight_dict[f] for f in selected_funds])
    in_user_port = calc_portfolio_returns(custom_w, in_sample_scaled[selected_funds])
    out_user_port = calc_portfolio_returns(custom_w, out_sample_scaled[selected_funds])
    out_user_port_raw = calc_portfolio_returns(custom_w, out_sample_df[selected_funds])
    
    in_user_stats = compute_stats(in_user_port, in_sample_rf)
    out_user_stats = compute_stats(out_user_port, out_sample_rf)
    out_user_stats_raw = compute_stats(out_user_port_raw, out_sample_rf)
    
    results = {
        'selected_funds': selected_funds,
        'in_sample_scaled': in_sample_scaled,
        'out_sample_scaled': out_sample_scaled,
        'in_sample_stats': in_sample_stats,
        'out_sample_stats': out_sample_stats,
        'out_sample_stats_raw': out_sample_stats_raw,
        'in_ew_stats': in_ew_stats,
        'out_ew_stats': out_ew_stats,
        'out_ew_stats_raw': out_ew_stats_raw,
        'in_user_stats': in_user_stats,
        'out_user_stats': out_user_stats,
        'out_user_stats_raw': out_user_stats_raw
    }
    logging.info("Analysis complete.")
    return results

print("run_analysis function ready.")

run_analysis function ready.


## 7. Excel Export
Creates an Excel file with two sheets (In-Sample, Out-of-Sample) and two tables per sheet (Equal-weight and User-weight).

In [None]:
def export_to_excel(results_dict, output_filename="AnalysisOutput.xlsx"):
    """
    Create an Excel file with two tabs: In-Sample, Out-of-Sample.
    Each has two tables: (1) Equal-Weight, (2) User-Weighted.
    Columns for Return(%), Vol(%), Sharpe, Sortino, MaxDD(%).
    For OOS, also show 'before scaling' vs. 'after scaling' returns/vol.
    """
    selected_funds = results_dict['selected_funds']
    in_sample_stats = results_dict['in_sample_stats']
    out_sample_stats_scaled = results_dict['out_sample_stats']
    out_sample_stats_raw = results_dict['out_sample_stats_raw']

    in_ew_stats = results_dict['in_ew_stats']
    out_ew_stats_scaled = results_dict['out_ew_stats']
    out_ew_stats_raw = results_dict['out_ew_stats_raw']

    in_user_stats = results_dict['in_user_stats']
    out_user_stats_scaled = results_dict['out_user_stats']
    out_user_stats_raw = results_dict['out_user_stats_raw']

    # --- In-Sample DataFrames ---
    in_eq_data = []
    in_user_data = []
    for fund in selected_funds:
        r, v, s, so, mdd = in_sample_stats[fund]
        in_eq_data.append([fund, r*100, v*100, s, so, mdd*100])
        in_user_data.append([fund, r*100, v*100, s, so, mdd*100])

    in_eq_data.append([
        'Equal-Weight Portfolio',
        in_ew_stats[0]*100,
        in_ew_stats[1]*100,
        in_ew_stats[2],
        in_ew_stats[3],
        in_ew_stats[4]*100
    ])
    in_user_data.append([
        'User-Weighted Portfolio',
        in_user_stats[0]*100,
        in_user_stats[1]*100,
        in_user_stats[2],
        in_user_stats[3],
        in_user_stats[4]*100
    ])

    in_eq_df = pd.DataFrame(
        in_eq_data,
        columns=['Fund', 'Return (%)', 'Volatility (%)', 'Sharpe', 'Sortino', 'MaxDD (%)']
    )
    in_user_df = pd.DataFrame(
        in_user_data,
        columns=['Fund', 'Return (%)', 'Volatility (%)', 'Sharpe', 'Sortino', 'MaxDD (%)']
    )

    # --- Out-of-Sample DataFrames ---
    # columns: [Fund, RetBefore(%), VolBefore(%), RetAfter(%), VolAfter(%), Sharpe(After), Sortino(After), MaxDD(After)(%)]
    out_eq_data = []
    out_user_data = []

    for fund in selected_funds:
        r_raw, v_raw, _, _, _ = out_sample_stats_raw[fund]
        r_scaled, v_scaled, s_scaled, so_scaled, mdd_scaled = out_sample_stats_scaled[fund]
        out_eq_data.append([
            fund,
            r_raw*100,
            v_raw*100,
            r_scaled*100,
            v_scaled*100,
            s_scaled,
            so_scaled,
            mdd_scaled*100
        ])
        out_user_data.append([
            fund,
            r_raw*100,
            v_raw*100,
            r_scaled*100,
            v_scaled*100,
            s_scaled,
            so_scaled,
            mdd_scaled*100
        ])

    r_ew_raw, v_ew_raw, _, _, _ = out_ew_stats_raw
    r_ew_scaled, v_ew_scaled, s_ew_scaled, so_ew_scaled, mdd_ew_scaled = out_ew_stats_scaled
    out_eq_data.append([
        'Equal-Weight Portfolio',
        r_ew_raw*100,
        v_ew_raw*100,
        r_ew_scaled*100,
        v_ew_scaled*100,
        s_ew_scaled,
        so_ew_scaled,
        mdd_ew_scaled*100
    ])

    r_user_raw, v_user_raw, _, _, _ = out_user_stats_raw
    r_user_scaled, v_user_scaled, s_user_scaled, so_user_scaled, mdd_user_scaled = out_user_stats_scaled
    out_user_data.append([
        'User-Weighted Portfolio',
        r_user_raw*100,
        v_user_raw*100,
        r_user_scaled*100,
        v_user_scaled*100,
        s_user_scaled,
        so_user_scaled,
        mdd_user_scaled*100
    ])

    out_eq_df = pd.DataFrame(
        out_eq_data,
        columns=['Fund', 'RetBefore(%)', 'VolBefore(%)', 'RetAfter(%)', 'VolAfter(%)', 'Sharpe(After)', 'Sortino(After)', 'MaxDD(After)(%)']
    )
    out_user_df = pd.DataFrame(
        out_user_data,
        columns=['Fund', 'RetBefore(%)', 'VolBefore(%)', 'RetAfter(%)', 'VolAfter(%)', 'Sharpe(After)', 'Sortino(After)', 'MaxDD(After)(%)']
    )

    writer = pd.ExcelWriter(output_filename, engine='xlsxwriter')

    # In-Sample Sheet
    in_eq_df.to_excel(writer, sheet_name='In-Sample', startrow=0, index=False)
    in_user_df.to_excel(writer, sheet_name='In-Sample', startrow=len(in_eq_df)+3, index=False)

    # Out-of-Sample Sheet
    out_eq_df.to_excel(writer, sheet_name='Out-of-Sample', startrow=0, index=False)
    out_user_df.to_excel(writer, sheet_name='Out-of-Sample', startrow=len(out_eq_df)+3, index=False)

    workbook = writer.book
    pct_format = workbook.add_format({'num_format': '0.00%'})
    bold_format = workbook.add_format({'bold': True})

    # Format In-Sample
    in_sample_ws = writer.sheets['In-Sample']
    in_sample_ws.set_column(0, 0, 28)  # Fund column
    in_sample_ws.set_column(1, 5, 15, pct_format)
    # Bold headers
    for colx in range(in_eq_df.shape[1]):
        in_sample_ws.write(0, colx, in_eq_df.columns[colx], bold_format)
    for colx in range(in_user_df.shape[1]):
        in_sample_ws.write(len(in_eq_df)+3, colx, in_user_df.columns[colx], bold_format)

    # Format Out-of-Sample
    out_sample_ws = writer.sheets['Out-of-Sample']
    out_sample_ws.set_column(0, 0, 28)
    out_sample_ws.set_column(1, 7, 15, pct_format)
    for colx in range(out_eq_df.shape[1]):
        out_sample_ws.write(0, colx, out_eq_df.columns[colx], bold_format)
    for colx in range(out_user_df.shape[1]):
        out_sample_ws.write(len(out_eq_df)+3, colx, out_user_df.columns[colx], bold_format)

    writer.save()
    logging.info(f"Exported analysis to {output_filename} successfully.")
    print(f"Excel file created: {output_filename}")

print("export_to_excel function ready.")

## 8. Demo Run
The `demo_run()` function creates a small dummy dataset, runs the analysis, and exports the results to an Excel file.

In [9]:
def demo_run():
    """
    Create a small dummy dataset, run analysis, export results.
    """
    # Create monthly date range
    rng = pd.date_range(start='2003-01-01', end='2010-12-01', freq='MS')
    df_demo = pd.DataFrame({'Date': rng})

    np.random.seed(42)
    rf_values = np.random.normal(loc=0.002, scale=0.0001, size=len(rng))
    df_demo['RF'] = rf_values

    # Random funds with missing data
    for i in range(1, 6):
        fund_name = f"Fund_{i}"
        mean_r = 0.01 * i / 10.0
        stdev_r = 0.02 * (i / 5.0)
        rets = np.random.normal(loc=mean_r, scale=stdev_r, size=len(rng))

        # Introduce random short or long gaps
        if i == 3:
            missing_idx = np.random.choice(len(rng), 2, replace=False)
            for idx in missing_idx:
                rets[idx] = np.nan
        if i == 4:
            rets[10:13] = np.nan  # 3 consecutive -> exclude

        df_demo[fund_name] = rets

    # Shuffle rows to test sorting
    df_demo = df_demo.sample(frac=1).reset_index(drop=True)

    results = run_analysis(
        df_demo,
        in_start='2003-01', in_end='2005-12',
        out_start='2006-01', out_end='2010-12',
        target_vol=0.10,
        monthly_cost=0.002,
        selection_mode='all',
        random_n=2
    )

    if results is not None:
        export_to_excel(results, "DemoAnalysisOutput.xlsx")
        print("Demo run complete.")

print("demo_run function ready. Call 'demo_run()' to test.")

demo_run function ready. Call 'demo_run()' to test.


In [10]:
demo_run()

INFO: Identified 'RF' as the risk-free column (lowest stdev).


TypeError: Invalid comparison between dtype=datetime64[ns] and date

### Using This Notebook
1. Run all cells.
2. Call `demo_run()` in a new cell to see a quick example with dummy data.
3. To use your own data, load it into a DataFrame (make sure it has a 'Date' column and decimal returns in other columns), then call `run_analysis()` and `export_to_excel()`.
4. For interactive selection, do:
   ```python
   display(ui_inputs)
   ```
   Then wire the `apply_button` to a callback function that reads the widget values and runs `run_analysis()`.
5. For custom weights, call:
   ```python
   my_weights = get_custom_weights(selected_funds)
   ```
   Then pass `my_weights` into your logic.
