### Backtest Results: Analysis and Visualization (v2)

This notebook loads the master results file and performs in-depth analysis and visualization, correctly distinguishing between different strategy parameter sets.

**Workflow:**
1.  **Setup:** Configure paths and define the columns that identify a unique strategy.
2.  **Load Data:** Load the master backtest results file.
3.  **Aggregate Analysis:** Calculate performance metrics by grouping on **unique strategy parameters** to ensure a correct, apples-to-apples comparison.
4.  **Visualize Evolving Sharpe:** Plot the evolving Sharpe ratio for the best-performing strategy run.
5.  **Visualize Equity Curve:** Plot the cumulative return (equity curve) for the best-performing strategy run.

### Setup and Configuration

In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np

# --- Project Path Setup ---
NOTEBOOK_DIR = Path.cwd()
ROOT_DIR = NOTEBOOK_DIR.parent if NOTEBOOK_DIR.name == 'notebooks' else NOTEBOOK_DIR
if str(ROOT_DIR) not in sys.path:
    sys.path.append(str(ROOT_DIR))
SRC_DIR = ROOT_DIR / 'src'
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# --- Local Imports ---
import utils
# from stocks.notebooks.config_obsolete import ANNUAL_RISK_FREE_RATE, TRADING_DAYS_PER_YEAR
from config import ANNUAL_RISK_FREE_RATE, DAILY_RISK_FREE_RATE, TRADING_DAYS_PER_YEAR

# --- Analysis Parameters ---
# ANNUAL_RISK_FREE_RATE = 0.04
# TRADING_DAYS_PER_YEAR = 252
MIN_PERIODS_FOR_SHARPE = 10
# EXTRA_ROWS_FOR_DF_PRICES = 2
BENCHMARK_TICKER = "VGT" # <--- ADD THIS LINE 

# --- !! CRITICAL: Define columns that identify a unique strategy run !! ---
STRATEGY_ID_COLS = [
    'n_select_requested',
    'filter_min_price',
    'filter_min_avg_volume_m',
    'score_weight_rsi', # Add/remove any parameters you tune
]

# --- File Path Construction ---
BACKTEST_DIR = ROOT_DIR / 'output' / 'backtest_results'
SOURCE_PATH = BACKTEST_DIR / 'backtest_master_results.parquet'
DATA_DIR = ROOT_DIR / 'data'

# --- Notebook Setup ---
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1500)
pd.set_option('display.float_format', '{:.4f}'.format)
%load_ext autoreload
%autoreload 2

# --- Verification ---
print(f"Source file for analysis: {SOURCE_PATH}")
print(f"Identifying unique strategies by: {STRATEGY_ID_COLS}")

### Step 1: Load Backtest Results

In [None]:
print(f"--- Step 1: Loading data from {SOURCE_PATH.name} ---")

try:
    df_results = pd.read_parquet(SOURCE_PATH)
    # Ensure date column is in datetime format for analysis
    df_results['actual_selection_date_used'] = pd.to_datetime(df_results['actual_selection_date_used'])
    print(f"✅ Successfully loaded and prepared data for {len(df_results)} backtest runs.")
    # print(df_results)
    # print(df_results.head(24))    
    # display(df_results.head(12))
    # display(df_results.tail())
    display(df_results)         
except FileNotFoundError:
    print(f"❌ ERROR: Source file not found at {SOURCE_PATH}. Halting execution.")
    df_results = None          

### lateest_date should be max + 5 more dats

In [None]:
# Get the full date range of our entire backtest period
earliest_date = df_results['actual_selection_date_used'].min()
latest_date = df_results['actual_selection_date_used'].max() + pd.offsets.BDay(2)




# # Access the column as a pandas Series
# date_series = df_results['actual_selection_date_used']

# # Find the earliest (minimum) date in the entire column
# earliest_date = date_series.min().strftime('%Y-%m-%d')

# # Find the latest (maximum) date in the entire column
# latest_date = date_series.max().strftime('%Y-%m-%d')

print(f"Earliest Date in backtest: {earliest_date}")
print(f"Latest Date in backtest:   {latest_date}")

### Step 2: Load Price Data

In [None]:
_df_prices = pd.read_parquet(DATA_DIR / 'df_adj_close.parquet')

# Sort the DataFrame by its index, modifying it in place.
_df_prices.sort_index(inplace=True)

# Assume _df_prices has a sorted DatetimeIndex
# Assume earliest_date and latest_date are defined

# --- Step 1: Find the integer position of the latest_date ---
try:
    # Get the integer index (row number) of the latest_date
    latest_date_position = _df_prices.index.get_loc(latest_date)
    
    # --- Step 2: Calculate the new end position ---
    # Add 3 to get the position for the new end date
    new_end_position = latest_date_position + EXTRA_ROWS_FOR_DF_PRICES
    
    # --- Step 3: Get the new end date label from that position ---
    # Check if this new position is within the bounds of the DataFrame
    if new_end_position < len(_df_prices):
        extended_latest_date = _df_prices.index[new_end_position]
    else:
        # If not, just go to the very end of the DataFrame
        extended_latest_date = _df_prices.index[-1]
        
except KeyError:
    # Handle case where latest_date is not in the index
    print(f"Error: The date {latest_date} was not found in the index.")
    # Fallback to the original slice or handle as needed
    extended_latest_date = latest_date

# --- Step 4: Use .loc with the new, extended end date ---
df_prices = _df_prices.loc[earliest_date:extended_latest_date]

# df_prices now contains the original range plus up to 3 more rows of data

print(f"Loaded price data with {len(_df_prices)} rows and {len(_df_prices.columns)} columns.")
print(f"df_prices data:")
display(df_prices.head(3))
display(df_prices.tail(3))

benchmark_price_series = df_prices[BENCHMARK_TICKER]
print(f'benchmark_price_series {BENCHMARK_TICKER} prices:')
display(benchmark_price_series.head(3))
display(benchmark_price_series.tail(3))

In [None]:
_df_prices = pd.read_parquet(DATA_DIR / 'df_adj_close.parquet')

# Sort the DataFrame by its index, modifying it in place.
_df_prices.sort_index(inplace=True)

# Assume _df_prices has a sorted DatetimeIndex
# Assume earliest_date and latest_date are defined

# --- Step 1: Find the integer position of the latest_date ---
try:
    # Get the integer index (row number) of the latest_date
    latest_date_position = _df_prices.index.get_loc(latest_date)
    
    # --- Step 2: Calculate the new end position ---
    # Add 3 to get the position for the new end date
    new_end_position = latest_date_position + EXTRA_ROWS_FOR_DF_PRICES
    
    # --- Step 3: Get the new end date label from that position ---
    # Check if this new position is within the bounds of the DataFrame
    if new_end_position < len(_df_prices):
        extended_latest_date = _df_prices.index[new_end_position]
    else:
        # If not, just go to the very end of the DataFrame
        extended_latest_date = _df_prices.index[-1]
        
except KeyError:
    # Handle case where latest_date is not in the index
    print(f"Error: The date {latest_date} was not found in the index.")
    # Fallback to the original slice or handle as needed
    extended_latest_date = latest_date

# --- Step 4: Use .loc with the new, extended end date ---
df_prices = _df_prices.loc[earliest_date:extended_latest_date]

# df_prices now contains the original range plus up to 3 more rows of data

print(f"Loaded price data with {len(_df_prices)} rows and {len(_df_prices.columns)} columns.")
print(f"df_prices data:")
display(df_prices.head(3))
display(df_prices.tail(3))

benchmark_price_series = df_prices[BENCHMARK_TICKER]
print(f'benchmark_price_series {BENCHMARK_TICKER} prices:')
display(benchmark_price_series.head(3))
display(benchmark_price_series.tail(3))

In [None]:
pd.set_option('display.max_rows', None)
print(benchmark_price_series)

### Step 3: Aggregate Performance Analysis

In [None]:
if df_results is not None and df_prices is not None:
    print("\n--- Step 2: Aggregate Performance Analysis (Corrected for 2-Day Cycle & vs. Benchmark) ---")
    
    # --- Part A: Calculate Benchmark Performance First ---
    print(f"Calculating benchmark performance for '{BENCHMARK_TICKER}'...")
    
    # Get the full date range of our entire backtest period
    min_date = df_results['actual_selection_date_used'].min()
    max_date = df_results['actual_selection_date_used'].max() + pd.DateOffset(days=2)
    benchmark_full_range = pd.date_range(start=min_date, end=max_date, freq='B')
    
    # Benchmark 1: Buy and Hold (100% invested)
    benchmark_returns_100 = df_prices[BENCHMARK_TICKER].pct_change().reindex(benchmark_full_range).fillna(0)
    mean_bench_100 = benchmark_returns_100.mean()
    std_bench_100 = benchmark_returns_100.std()
    benchmark_buy_and_hold_sharpe = (mean_bench_100 * TRADING_DAYS_PER_YEAR - ANNUAL_RISK_FREE_RATE) / (std_bench_100 * np.sqrt(TRADING_DAYS_PER_YEAR))

    # --- FIX STARTS HERE ---
    # Benchmark 2: Timed (50% cash, mimicking the strategy)
    # First, get only the UNIQUE selection dates to avoid duplicate index issues.
    unique_selection_dates = df_results['actual_selection_date_used'].unique()
    
    selection_indices = df_prices.index.get_indexer(unique_selection_dates, method='ffill')
    buy_prices_bench = df_prices[BENCHMARK_TICKER].iloc[selection_indices + 1]
    sell_prices_bench = df_prices[BENCHMARK_TICKER].iloc[selection_indices + 2]
    trade_returns_bench = (sell_prices_bench.values - buy_prices_bench.values) / buy_prices_bench.values
    
    sell_dates_bench = df_prices.index[selection_indices + 2]
    timed_benchmark_series = pd.Series(trade_returns_bench, index=sell_dates_bench).reindex(benchmark_full_range).fillna(0)
    # --- FIX ENDS HERE ---

    mean_bench_50 = timed_benchmark_series.mean()
    std_bench_50 = timed_benchmark_series.std()
    benchmark_timed_sharpe = (mean_bench_50 * TRADING_DAYS_PER_YEAR - ANNUAL_RISK_FREE_RATE) / (std_bench_50 * np.sqrt(TRADING_DAYS_PER_YEAR))


    # --- Part B: Calculate Strategy Performance (Corrected Logic) ---
    summary_records = []
    grouped = df_results.groupby(STRATEGY_ID_COLS + ['scheme'])
    # daily_risk_free_rate = ANNUAL_RISK_FREE_RATE / TRADING_DAYS_PER_YEAR
    daily_risk_free_rate = DAILY_RISK_FREE_RATE   

    print("Analyzing each unique strategy run...")
    for group_name, group_df in grouped:
        
        # --- Correctly find the T+2 TRADING DAY for the sell date ---
        selection_indices_strat = df_prices.index.get_indexer(group_df['actual_selection_date_used'], method='ffill')
        sell_dates_strat = df_prices.index[selection_indices_strat + 2]
        trade_returns = pd.Series(group_df['portfolio_return'].values, index=sell_dates_strat)

        # Create the continuous timeline with zero-fill for cash days
        start_date = trade_returns.index.min()
        end_date = trade_returns.index.max()
        full_date_range = pd.date_range(start=start_date, end=end_date, freq='B')
        daily_return_series = trade_returns.reindex(full_date_range).fillna(0)
        mean_return = daily_return_series.mean()
        std_dev_return = daily_return_series.std()
        
        if std_dev_return > 1e-9:
            sharpe_ratio = (mean_return - daily_risk_free_rate) / std_dev_return
        else:
            sharpe_ratio = np.nan
        annualized_sharpe = sharpe_ratio * np.sqrt(TRADING_DAYS_PER_YEAR)
        
        record = dict(zip(STRATEGY_ID_COLS + ['scheme'], group_name))
        record['Num Trade Days'] = len(group_df)
        record['Annualized Sharpe (adj. for cash)'] = annualized_sharpe
        summary_records.append(record)

    # --- Part C: Combine and Display Final Summary ---
    df_summary = pd.DataFrame(summary_records).set_index(STRATEGY_ID_COLS + ['scheme'])
    df_summary = df_summary.sort_values(by='Annualized Sharpe (adj. for cash)', ascending=False)
    
    # Create benchmark DataFrame to append
    benchmark_data = {
        'Num Trade Days': ['N/A', 'N/A'],
        'Annualized Sharpe (adj. for cash)': [benchmark_buy_and_hold_sharpe, benchmark_timed_sharpe]
    }
    # Adjusting the benchmark index to match the columns in df_summary
    # We create a list of tuples for the MultiIndex
    benchmark_index_tuples = [
        tuple(['BENCHMARK', BENCHMARK_TICKER, 'Buy & Hold'] + ['N/A'] * (len(df_summary.index.names) - 3)),
        tuple(['BENCHMARK', BENCHMARK_TICKER, 'Timed'] + ['N/A'] * (len(df_summary.index.names) - 3))
    ]
    benchmark_index = pd.MultiIndex.from_tuples(benchmark_index_tuples, names=df_summary.index.names)
    df_benchmark = pd.DataFrame(benchmark_data, index=benchmark_index)

    df_final_summary = pd.concat([df_summary, df_benchmark])

    print(f"\nAnalysis complete. Comparison vs. '{BENCHMARK_TICKER}'")
    display(df_final_summary)

else:
    print("Skipping analysis because data or price files failed to load.")

In [None]:
# Assuming 'grouped_for_demo' is your groupby object from before
group_names = list(grouped_for_demo.groups.keys())

# # Print the list of available group names
# print("Available group names:")
# for i, name in enumerate(group_names):
    # print(f"{i}  - {name}")

# Use .get_group() to retrieve the DataFrame for just that group
specific_group_df = grouped_for_demo.get_group(group_names[1])

# Now you can work with this specific DataFrame
print(f"\nDataFrame for group: {group_names[1]}")
print(specific_group_df)

In [None]:
group_names[0]

In [None]:
# This cell demonstrates the CORRECTED DateOffset logic and adds a benchmark comparison.

if 'df_results' in locals() and 'df_prices' in locals():
    
    # --- 1. Isolate a small sample to demonstrate with ---
    grouped_for_demo = df_results.groupby(STRATEGY_ID_COLS + ['scheme'])
    group_name, sample_group_df = next(iter(grouped_for_demo)) # Get the first group
    
    print(f"--- Demonstrating with a sample group: {group_name} ---")
    
    # --- 2. Get the integer positions of the selection dates ---
    selection_dates = sample_group_df['actual_selection_date_used']
    selection_indices = df_prices.index.get_indexer(selection_dates, method='ffill')
    
    # --- 3. Calculate Benchmark Return for the exact T+1 to T+2 period ---
    buy_prices_bench = df_prices[BENCHMARK_TICKER].iloc[selection_indices + 1]
    sell_prices_bench = df_prices[BENCHMARK_TICKER].iloc[selection_indices + 2]
    benchmark_trade_returns = (sell_prices_bench.values - buy_prices_bench.values) / buy_prices_bench.values

    # --- 4. Create the comprehensive "before and after" DataFrame ---
    demo_df = pd.DataFrame({
        'A_Selection_Date': selection_dates.values,
        'B_Incorrect_Sell_Date (Calendar)': (selection_dates + pd.DateOffset(days=2)).values,
        'C_Correct_Sell_Date (Trading)': df_prices.index[selection_indices + 2],
        'D_Portfolio_Return': sample_group_df['portfolio_return'].values,
        'E_Benchmark_Return': benchmark_trade_returns
    # }).head() # Show the first 5 rows for clarity
    }) # Show allrows for clarity

    print("\n--- Side-by-Side Performance for Individual Trades ---")
    display(demo_df)
    
    print("\nEXPLANATION:")
    print(" - Column 'C' shows the correct sell date, skipping weekends (e.g., from 2025-06-06 to 2025-06-10).")
    print(" - Column 'D' is your strategy's return for the C-B period (T+1 close to T+2 close).")
    print(f" - Column 'E' is the '{BENCHMARK_TICKER}' return for the exact same period, allowing for a direct comparison.")

else:
    print("Please run the data loading cells first.")




In [None]:
# BM_df

In [None]:
###########
selection_date_bench = df_prices[BENCHMARK_TICKER].iloc[selection_indices + 0]

# Define your start and end dates
start_date = '2025-04-25'
end_date = '2025-06-20'

# Convert the index to a list and sort it in one step
df_prices_index = df_prices.loc[start_date:end_date].index \
    .sort_values() \
    .strftime('%Y-%m-%d') \
    .tolist()

# Use .loc to select all rows between these dates (inclusive)
# selected_df = df.loc[start_date:end_date]

# The 'selected_df' now contains only the data for that period
# print(selected_df)


print(f'\nselection_date_bench:\n{selection_date_bench}')
print(f'\nbuy_prices_bench:\n{buy_prices_bench}')
print(f'\nsell_prices_bench:\n{sell_prices_bench}')
print(f'\ndf_prices_index:\n')
for date in df_prices_index:
    print(date)



In [None]:
# This cell demonstrates the CORRECTED DateOffset logic and adds a benchmark comparison.

if 'df_results' in locals() and 'df_prices' in locals():
    
    # --- 1. Isolate a small sample to demonstrate with ---
    grouped_for_demo = df_results.groupby(STRATEGY_ID_COLS + ['scheme'])
    group_name, sample_group_df = next(iter(grouped_for_demo)) # Get the first group
    
    print(f"--- Demonstrating with a sample group: {group_name} ---")
    
    # --- 2. Get the integer positions of the selection dates ---
    selection_dates = sample_group_df['actual_selection_date_used']
    selection_indices = df_prices.index.get_indexer(selection_dates, method='ffill')
    
    # --- 3. Calculate Benchmark Return for the exact T+1 to T+2 period ---
    buy_prices_bench = df_prices[BENCHMARK_TICKER].iloc[selection_indices + 1]
    sell_prices_bench = df_prices[BENCHMARK_TICKER].iloc[selection_indices + 2]
    benchmark_trade_returns = (sell_prices_bench.values - buy_prices_bench.values) / buy_prices_bench.values

    # --- 4. Create the comprehensive "before and after" DataFrame ---
    demo_df = pd.DataFrame({
        'A_Selection_Date': selection_dates.values,
        'B_Incorrect_Sell_Date (Calendar)': (selection_dates + pd.DateOffset(days=2)).values,
        'C_Correct_Sell_Date (Trading)': df_prices.index[selection_indices + 2],
        'D_Portfolio_Return': sample_group_df['portfolio_return'].values,
        'E_Benchmark_Return': benchmark_trade_returns
    }).head() # Show the first 5 rows for clarity
    
    print("\n--- Side-by-Side Performance for Individual Trades ---")
    display(demo_df)
    
    print("\nEXPLANATION:")
    print(" - Column 'C' shows the correct sell date, skipping weekends (e.g., from 2025-06-06 to 2025-06-10).")
    print(" - Column 'D' is your strategy's return for the C-B period (T+1 close to T+2 close).")
    print(f" - Column 'E' is the '{BENCHMARK_TICKER}' return for the exact same period, allowing for a direct comparison.")

else:
    print("Please run the data loading cells first.")

In [None]:
# df_prices_index = df_prices.index.sort_values().strftime('%Y-%m-%d').to_list()
# df_prices_index

# sell_dates_strat_index = sell_dates_strat.sort_values().strftime('%Y-%m-%d').to_list()
# sell_dates_strat_index

# trade_returns

# daily_return_series

# group_name

In [None]:
print(f'df_prices.index:\n{df_prices.index}')
print(f'selection_indices_strat:\n{selection_indices_strat}')
print(f'sell_dates_strat:\n{sell_dates_strat}')
print(f'trade_returns:\n{trade_returns}')        
print(f'daily_return_series:\n{daily_return_series}')   

In [None]:
# This cell demonstrates the CORRECTED DateOffset logic and adds a clear benchmark comparison.

if 'df_results' in locals() and 'df_prices' in locals():
    
    # --- 1. Isolate a small sample to demonstrate with ---
    grouped_for_demo = df_results.groupby(STRATEGY_ID_COLS + ['scheme'])
    group_name, sample_group_df = next(iter(grouped_for_demo)) # Get the first group
    
    print(f"--- Demonstrating with a sample group: {group_name} ---")
    
    # --- 2. Get the integer positions of the selection dates ---
    selection_dates = sample_group_df['actual_selection_date_used']
    selection_indices = df_prices.index.get_indexer(selection_dates, method='ffill')
    
    # --- 3. Calculate Benchmark Return for the exact T+1 to T+2 period ---
    buy_prices_bench = df_prices[BENCHMARK_TICKER].iloc[selection_indices + 1]
    sell_prices_bench = df_prices[BENCHMARK_TICKER].iloc[selection_indices + 2]
    benchmark_trade_returns = (sell_prices_bench.values - buy_prices_bench.values) / buy_prices_bench.values

    # --- 4. Create the comprehensive demonstration DataFrame with clearer column names ---
    demo_df = pd.DataFrame({
        'A_Selection_Date': selection_dates.values,
        'B_Correct_Buy_Date (T+1)': df_prices.index[selection_indices + 1],
        'C_Correct_Sell_Date (T+2)': df_prices.index[selection_indices + 2],
        'D_Portfolio_Return': sample_group_df['portfolio_return'].values,
        'E_Benchmark_Return (for same T+1->T+2 period)': benchmark_trade_returns
    }).head() # Show the first 5 rows for clarity
    
    print("\n--- Side-by-Side Performance for Individual Trades ---")
    display(demo_df)
    
    print("\nEXPLANATION:")
    print(" - This table shows an apples-to-apples comparison for each individual trade cycle.")
    print(" - Column 'D' is your strategy's return from the close of the Buy Date to the close of the Sell Date.")
    print(f" - Column 'E' is the benchmark's return for that EXACT SAME one-day holding period.")
    print(f" - Therefore, Column 'E' IS the '100% holding' benchmark return for the specific duration of the trade.")

else:
    print("Please run the data loading cells first.")

In [None]:
pd.set_option('display.max_rows', None)

# buy_prices_bench
# sell_prices_bench
# print(df_prices[BENCHMARK_TICKER])  
# benchmark_trade_returns
# benchmark_trade_returns.tolist()
# benchmark_prices


In [None]:
# df_summary
strategy_map

In [None]:
# This interactive cell allows the user to select and plot the equity curves
# for any combination of strategy runs against the benchmark.

import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

if 'df_summary' in locals() and not df_summary.empty and 'df_prices' in locals():

    # --- 1. Prepare the list of strategies for the widget ---
    # We need a user-friendly name for each strategy and a way to map it back
    # to the MultiIndex tuple used for filtering.
    strategy_map = {}
    for strategy_params in df_summary.index:
        params_dict = dict(zip(df_summary.index.names, strategy_params))
        # Create a concise, readable label
        params_list = [f"{k.split('_')[-1]}={v}" for k, v in params_dict.items() if k not in ['scheme', 'n_select_requested']]
        label_params = f"n={params_dict.get('n_select_requested', 'N/A')}, {', '.join(params_list)}"
        strategy_label = f"#{len(strategy_map)+1} - Scheme: {params_dict['scheme']} ({label_params})"
        strategy_map[strategy_label] = strategy_params

    # --- 2. Create the Interactive Selection Widget ---
    strategy_selector = widgets.SelectMultiple(
        options=strategy_map.keys(),
        value=[list(strategy_map.keys())[0]],  # Default to selecting the top strategy
        description='Strategies',
        disabled=False,
        layout={'height': '250px', 'width': '95%'} # Make the widget easy to use
    )

    # --- 3. Create the function that will be called by the widget ---
    def plot_selected_strategies(selected_labels):
        if not selected_labels:
            print("Please select at least one strategy to plot.")
            return

        print(f"Plotting {len(selected_labels)} selected strategies vs. Benchmark...")

        # --- Determine the overall date range for the plot ---
        selected_indices = [strategy_map[label] for label in selected_labels]
        all_relevant_runs = df_results[df_results.set_index(df_summary.index.names).index.isin(selected_indices)]
        
        if all_relevant_runs.empty:
            print("No data found for the selected strategies.")
            return
            
        first_selection_date = all_relevant_runs['actual_selection_date_used'].min()
        last_selection_date = all_relevant_runs['actual_selection_date_used'].max()
        
        start_loc = df_prices.index.get_indexer([first_selection_date], method='ffill')[0] + 1
        end_loc = df_prices.index.get_indexer([last_selection_date], method='ffill')[0] + 2
        
        correct_date_range = df_prices.loc[df_prices.index[start_loc]:df_prices.index[end_loc]].index

        # --- Setup the Plot ---
        plt.style.use('seaborn-v0_8-darkgrid')
        fig, ax = plt.subplots(figsize=(15, 8))
        
        # --- Loop Through Selected Strategies, Calculate, and Plot ---
        for label in selected_labels:
            strategy_params = strategy_map[label]
            
            mask = (df_results[df_summary.index.names] == strategy_params).all(axis=1)
            df_strategy = df_results[mask]
            
            selection_indices = df_prices.index.get_indexer(df_strategy['actual_selection_date_used'], method='ffill')
            sell_dates = df_prices.index[selection_indices + 2]
            trade_returns = pd.Series(df_strategy['portfolio_return'].values, index=sell_dates)
            strategy_daily_returns = trade_returns.reindex(correct_date_range).fillna(0)
            strategy_equity_curve = (1 + strategy_daily_returns).cumprod()

            ax.plot(strategy_equity_curve.index, strategy_equity_curve.values, 
                    label=label, linewidth=2.0)

        # --- Plot the Benchmark Curve ---
        benchmark_prices = df_prices.loc[correct_date_range, BENCHMARK_TICKER]
        benchmark_equity_curve = benchmark_prices / benchmark_prices.iloc[0]
        ax.plot(benchmark_equity_curve.index, benchmark_equity_curve.values, 
                label=f"Buy & Hold '{BENCHMARK_TICKER}'", linewidth=1.5, color='black', linestyle='--', zorder=1)


        # print(f'label:\n{label}')
        # print(f'selected_labels:\n{selected_labels}')
        # print(f'mask:\n{mask}')
        # print(f'df_strategy:\n{df_strategy}')
        # print(f'selection_indices:\n{selection_indices}')
        # print(f'sell_dates:\n{sell_dates}')
        # print(f'trade_returns:\n{trade_returns}')

        print(f'strategy_params:\n{strategy_params}')  
        print(f'strategy_daily_returns:\n{strategy_daily_returns}')
        print(f'strategy_equity_curve:\n{strategy_equity_curve}')
        # print(f'mask:\n{mask}')        


        print(f'benchmark_prices:\n{benchmark_prices}')





        # --- Finalize the Plot ---
        ax.set_title(f"Selected Strategies vs. Benchmark: Growth of $1", fontsize=16)
        ax.set_xlabel("Date")
        ax.set_ylabel("Equity Curve (1 = breakeven)")
        ax.legend(fontsize=9, loc='upper left')
        ax.grid(True, which='both', linestyle=':', linewidth=0.6)
        fig.autofmt_xdate()
        
        plt.show()

    # --- 4. Display the widget and link it to the plotting function ---
    print("✅ Interactive Plotting Cell Ready")
    print("Use Ctrl+Click (or Cmd+Click on Mac) to select multiple strategies, or Shift+Click for a range.")
    
    widgets.interact(plot_selected_strategies, selected_labels=strategy_selector)

else:
    print("Please run the analysis in 'Step 2' first to generate the 'df_summary' table.")




# plot_selected_strategies('#1 - Scheme: IV (n=10, price=10.0, m=2.0, rsi=0.35)')
# plot_selected_strategies([1,2])
# plot_selected_strategies('Scheme: IV (n=10, price=10.0, m=2.0, rsi=0.35)')

In [None]:
# Select the 'portfolio_return' column FIRST, then apply aggregations
df_summary = grouped['portfolio_return'].agg(['mean', 'std', 'count']).sort_values(by='mean', ascending=False)

print("Aggregate statistics for each unique strategy run:")
display(df_summary)

In [None]:
# Tell the aggregation functions to automatically ignore non-numeric columns
df_summary_all_numeric = grouped.agg(
    mean=('portfolio_return', lambda x: x.mean(numeric_only=True)),
    std=('portfolio_return', lambda x: x.std(numeric_only=True)),
    count=('portfolio_return', 'count')
).sort_values(by='mean', ascending=False)


# A simpler way if you just want to run the functions on all applicable columns
df_summary_all_numeric = grouped.mean(numeric_only=True)

print("Mean of all numeric columns for each unique strategy run:")
display(df_summary_all_numeric)

### Step 3: Visualize Evolving Sharpe for Top Strategy

In [None]:
if 'df_summary' in locals() and not df_summary.empty:
    print("\n--- Step 3: Plotting Evolving Sharpe Ratio for the Top Strategy Run ---")
    
    # Get the parameters of the best strategy from our summary table
    top_strategy_params = df_summary.index[0]
    
    # Create a filter mask to select only the data for this specific run
    strategy_filter_mask = (df_results[STRATEGY_ID_COLS + ['scheme']] == top_strategy_params).all(axis=1)
    df_top_strategy = df_results[strategy_filter_mask]

    # Call the utility function
    utils.plot_evolving_annualized_sharpe(
        df=df_top_strategy, # Pass only the filtered data for the best strategy
        date_col='actual_selection_date_used',
        return_col='portfolio_return',
        scheme_col='scheme',
        annual_risk_free_rate=ANNUAL_RISK_FREE_RATE,
        trading_days_per_year=TRADING_DAYS_PER_YEAR,
        min_periods_for_sharpe=MIN_PERIODS_FOR_SHARPE
    )
else:
    print("Skipping visualization.")

### Below Cells are for Analysis

In [None]:
# See the names of all the groups pandas created
print("Available group names:")
print(list(grouped.groups.keys())[0:5]) # Print the first 5 group names
print(list(grouped.groups.values())[0:5]) # Print the first 5 group names


# Now, pick one of those names to inspect
# The name will be a tuple, e.g., (10, 10.0, 2.0, 0.35, 'EW')
a_specific_group_name = list(grouped.groups.keys())[0] 

print(f"\n--- Data for the specific group: {a_specific_group_name} ---")
display(grouped.get_group(a_specific_group_name))