### Backtest Results: Interactive Analysis

This notebook provides a powerful interactive tool for analyzing and visualizing the performance of all backtested strategies.

**Workflow:**
1.  **Setup:** Configure paths and define parameters for the analysis.
2.  **Load Data:** Load the master backtest results and the historical price data.
3.  **Prepare for Interaction:** Generate a summary of all unique strategy runs to populate the interactive widget.
4.  **Interactive Visualization:** Use a widget to select and plot the equity curves of one or more strategies against the benchmark. The results are cached for efficiency.
5.  **Analyze Underlying Data:** Display a detailed, day-by-day table of the returns for the currently selected strategies, pulling directly from the cached results.

### Setup and Configuration

In [2]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

# --- Project Path Setup ---
NOTEBOOK_DIR = Path.cwd()
ROOT_DIR = NOTEBOOK_DIR.parent
if str(ROOT_DIR) not in sys.path: sys.path.append(str(ROOT_DIR))
SRC_DIR = ROOT_DIR / 'src'
if str(SRC_DIR) not in sys.path: sys.path.append(str(SRC_DIR))

# --- Local Imports ---
import utils

# --- Analysis Parameters ---
MIN_PERIODS_FOR_SHARPE = 10
BENCHMARK_TICKER = "VGT" 

# --- Columns that identify a unique strategy run ---
STRATEGY_ID_COLS = [
    'n_select_requested', 'filter_min_price', 'filter_min_avg_volume_m', 'score_weight_rsi',
]

# --- File Path Construction ---
DATA_DIR = ROOT_DIR / 'data'
BACKTEST_DIR = ROOT_DIR / 'output' / 'backtest_results'
BACKTEST_RESULTS_PATH = BACKTEST_DIR / 'backtest_master_results.parquet'
HISTORICAL_PRICES_PATH = DATA_DIR / 'df_adj_close.parquet'

# --- Notebook Setup ---
pd.set_option('display.max_columns', None); pd.set_option('display.width', 1500)
pd.set_option('display.float_format', '{:.4f}'.format)
%load_ext autoreload
%autoreload 2

print(f"✅ Setup complete. Benchmark: '{BENCHMARK_TICKER}'")

✅ Setup complete. Benchmark: 'VGT'


### Step 1: Load All Required Data

In [3]:
print("--- Step 1: Loading all required data ---")
data_loaded_ok = False
try:
    # Load backtest results
    df_results = pd.read_parquet(BACKTEST_RESULTS_PATH)
    df_results['actual_selection_date_used'] = pd.to_datetime(df_results['actual_selection_date_used'])
    print(f"✅ Loaded {len(df_results)} backtest runs.")
    
    # Load historical price data
    df_prices = pd.read_parquet(HISTORICAL_PRICES_PATH)
    df_prices.index = pd.to_datetime(df_prices.index)
    df_prices.sort_index(inplace=True)
    print(f"✅ Loaded price data for {len(df_prices.columns)} tickers.")
    
    data_loaded_ok = True
except FileNotFoundError as e:
    print(f"❌ ERROR: A required file was not found. {e}")

--- Step 1: Loading all required data ---
✅ Loaded 216 backtest runs.
✅ Loaded price data for 1536 tickers.


### Step 2: Prepare for Interactive Analysis

In [4]:
if data_loaded_ok:
    print("\n--- Step 2: Preparing data for the interactive plot ---")

    # Group results by strategy to create a summary list for the widget.
    summary_records = []
    grouped = df_results.groupby(STRATEGY_ID_COLS + ['scheme'])
    for group_name, group_df in grouped:
        record = dict(zip(STRATEGY_ID_COLS + ['scheme'], group_name))
        summary_records.append(record)

    df_summary = pd.DataFrame(summary_records).set_index(STRATEGY_ID_COLS + ['scheme'])

    # Create a mapping from a user-friendly label to the strategy's MultiIndex
    strategy_map = {}
    for i, strategy_params in enumerate(df_summary.index):
        params_dict = dict(zip(df_summary.index.names, strategy_params))
        params_list = [f"{k.split('_')[-1]}={v}" for k, v in params_dict.items() if k not in ['scheme', 'n_select_requested']]
        label_params = f"n={params_dict.get('n_select_requested', 'N/A')}, {', '.join(params_list)}"
        strategy_label = f"#{i+1} - Scheme: {params_dict['scheme']} ({label_params})"
        strategy_map[strategy_label] = strategy_params

    print(f"✅ Found {len(strategy_map)} unique strategy runs. Ready for visualization.")
else:
    print("Skipping step due to data loading failure.")


--- Step 2: Preparing data for the interactive plot ---
✅ Found 3 unique strategy runs. Ready for visualization.


### Step 3: Interactive Visualization of Equity Curves

In [None]:
if 'strategy_map' in locals():
    
    # --- Cache for efficiency: Stores calculated curves to avoid re-computation ---
    equity_curves_cache = {}

    # --- 1. Define the plotting function FIRST ---
    # This function will be called by the widget whenever its value changes.
    def plot_selected(selected_labels):
        if not selected_labels:
            print("Please select at least one strategy.")
            return

        # Determine overall date range for all selected strategies
        selected_indices = [strategy_map[label] for label in selected_labels]
        relevant_runs = df_results[df_results.set_index(df_summary.index.names).index.isin(selected_indices)]
        
        if relevant_runs.empty: return
        
        start_loc = df_prices.index.get_indexer([relevant_runs['actual_selection_date_used'].min()], method='ffill')[0] + 1
        end_loc = min(df_prices.index.get_indexer([relevant_runs['actual_selection_date_used'].max()], method='ffill')[0] + 2, len(df_prices) - 1)
        correct_date_range = df_prices.index[start_loc:end_loc + 1]

        fig, ax = plt.subplots(figsize=(15, 8))
        
        # Loop through selections, using the cache for efficiency
        for label in selected_labels:
            if label not in equity_curves_cache: # Calculate only if not in cache
                params = strategy_map[label]
                mask = (df_results[df_summary.index.names] == pd.Series(params, index=df_summary.index.names)).all(axis=1)
                df_strat = df_results[mask]
                
                indices = df_prices.index.get_indexer(df_strat['actual_selection_date_used'], method='ffill') # Use ffill here too
                valid_mask = (indices != -1) & (indices + 2 < len(df_prices))
                
                if not valid_mask.any():
                    equity_curves_cache[label] = pd.Series(1.0, index=correct_date_range)
                    continue

                sell_dates = df_prices.index[indices[valid_mask] + 2]
                returns = pd.Series(df_strat[valid_mask]['portfolio_return'].values, index=sell_dates)
                daily_returns = returns.reindex(correct_date_range).fillna(0)
                equity_curves_cache[label] = (1 + daily_returns).cumprod()
            
            # Plot from the cache
            equity_curves_cache[label].plot(ax=ax, label=label, linewidth=2.0)

        # Plot the benchmark curve
        benchmark_prices = df_prices.loc[correct_date_range, BENCHMARK_TICKER]
        (benchmark_prices / benchmark_prices.iloc[0]).plot(
            ax=ax, label=f"Buy & Hold '{BENCHMARK_TICKER}'", color='black', linestyle='--', linewidth=1.5
        )

        # Set the main, larger title for the entire figure
        fig.suptitle("Strategies Theoretical 100% Invested Result vs. Benchmark Buy-and-Hold: Growth of $1", fontsize=16, y=.94)

        # Set the smaller, secondary title (subtitle) for the axes
        ax.set_title("Strategies are 50% Invested from t0:buy-at-close, t1:sell-at-close, t2:buy-at-close, t3:sell-at-close", fontsize=12, color='brown')

        ax.set_xlabel("Date"); ax.set_ylabel("Equity Curve (1 = breakeven)")
        ax.legend(loc='upper left', fontsize=9); ax.grid(True, which='both', linestyle=':')
        fig.autofmt_xdate(); plt.show()

    # --- 2. Create the interactive UI elements NOW that the function is defined ---
    strategy_selector = widgets.SelectMultiple(
        options=list(strategy_map.keys()), value=[list(strategy_map.keys())[0]],
        description='Strategies', disabled=False, layout={'height': '250px', 'width': '95%'}
    )
    plot_output = widgets.interactive_output(
        plot_selected, # We can now refer to plot_selected directly
        {'selected_labels': strategy_selector}
    )

    # --- 3. Display the final interactive UI ---
    print("✅ Interactive Plotting Cell Ready")
    print("Use Ctrl+Click (or Cmd+Click on Mac) to select multiple strategies.")
    display(strategy_selector, plot_output)

else:
    print("Please run previous steps first.")

✅ Interactive Plotting Cell Ready
Use Ctrl+Click (or Cmd+Click on Mac) to select multiple strategies.


SelectMultiple(description='Strategies', index=(0,), layout=Layout(height='250px', width='95%'), options=('#1 …

Output()

### Step 4: Analyze Underlying Daily Returns

In [6]:
if 'equity_curves_cache' in locals():
    print("--- Step 4: Underlying Daily Returns for Currently Selected Strategies ---")
    
    # Get the strategy labels currently selected in the widget from Step 3
    selected_labels = strategy_selector.value
    
    if not selected_labels:
        print("Please select one or more strategies in the widget above to see the data.")
    else:
        # --- Build the comparison table from the cache with controlled column order ---
        
        # 1. Get the common date range from the first cached equity curve.
        #    This ensures all data aligns perfectly with what is plotted.
        date_range = equity_curves_cache[selected_labels[0]].index
        
        # 2. Initialize the DataFrame with the correct index.
        #    This is the foundation for our comparison table.
        df_comparison = pd.DataFrame(index=date_range)
        
        # 3. Add the Benchmark Price and Return columns FIRST.
        #    This ensures they always appear as the first two columns.
        benchmark_price_col_name = f"Benchmark Price ({BENCHMARK_TICKER})"
        df_comparison[benchmark_price_col_name] = df_prices.loc[date_range, BENCHMARK_TICKER]
        df_comparison['Benchmark Return'] = df_comparison[benchmark_price_col_name].pct_change().fillna(0)

        # 4. Loop through each selected strategy and add its daily returns as a new column.
        for label in selected_labels:
            # Retrieve the pre-calculated equity curve from the cache
            equity_curve = equity_curves_cache.get(label)
            if equity_curve is not None:
                # Calculate daily returns and add it as a new column to the right
                df_comparison[label] = equity_curve.pct_change().fillna(0)
        
        # 5. Display the final combined DataFrame.
        print("This table shows the day-by-day returns, with benchmark data on the left for easy comparison.")
        display(df_comparison)
else:
    print("Please run the interactive plotting cell (Step 3) first to populate the cache.")

--- Step 4: Underlying Daily Returns for Currently Selected Strategies ---
This table shows the day-by-day returns, with benchmark data on the left for easy comparison.


Unnamed: 0_level_0,Benchmark Price (VGT),Benchmark Return,"#1 - Scheme: EW (n=10, price=10.0, m=2.0, rsi=0.35)"
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-04-28,544.9220,0.0000,0.0000
2025-04-29,547.5590,0.0048,0.0085
2025-04-30,548.8580,0.0024,0.0114
2025-05-01,558.5470,0.0177,-0.0023
2025-05-02,566.9280,0.0150,0.0242
...,...,...,...
2025-08-05,683.5500,-0.0090,0.0033
2025-08-06,691.8500,0.0121,0.0171
2025-08-07,693.2000,0.0020,0.0087
2025-08-08,699.3500,0.0089,0.0039
