In [1]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import ipywidgets as widgets
import pprint 
from pathlib import Path
from datetime import datetime, date
from IPython.display import display, Markdown

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000)

download_path = Path.home() / "Downloads"  
# OHLCV_file_path = r'c:\Users\ping\Files_win10\python\py311\stocks\data\df_OHLCV_clean_stocks_etfs.parquet'
OHLCV_file_path = r'c:\Users\ping\Files_win10\python\py311\stocks\data\df_OHLCV_stocks_etfs.parquet'

df_OHLCV = pd.read_parquet(OHLCV_file_path, engine='pyarrow')

In [2]:
df_OHLCV.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1094338 entries, ('AA', Timestamp('2025-09-25 00:00:00')) to ('ZWS', Timestamp('2023-01-03 00:00:00'))
Data columns (total 5 columns):
 #   Column     Non-Null Count    Dtype  
---  ------     --------------    -----  
 0   Adj Open   1094338 non-null  float64
 1   Adj High   1094338 non-null  float64
 2   Adj Low    1094338 non-null  float64
 3   Adj Close  1094338 non-null  float64
 4   Volume     1094338 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 46.0+ MB


In [3]:
df_OHLCV.loc['AA']

Unnamed: 0_level_0,Adj Open,Adj High,Adj Low,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-09-25,31.2100,31.8450,30.8100,31.4500,5318714
2025-09-24,31.7600,32.0300,31.1050,31.2300,7167700
2025-09-23,32.6700,32.9700,31.0300,31.5300,7983300
2025-09-22,32.0000,32.6400,31.7900,32.6300,4758300
2025-09-19,32.7700,32.9900,32.2800,32.4600,6383100
...,...,...,...,...,...
2023-01-09,46.0172,48.9760,45.9205,48.6086,6386678
2023-01-06,44.9825,46.1622,44.1703,44.8278,4803970
2023-01-05,43.8028,44.4700,42.6812,43.9866,3966801
2023-01-04,43.2904,44.3830,42.9045,43.6385,3643827


### The New "Walk-Forward" Analyzer Function

Here is the complete code. You can add this new function to your project; it does not replace or interfere with our previous functions.

In [4]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime, date
import numpy as np
import ipywidgets as widgets
from IPython.display import display, Markdown
import pprint

# --- Function 1: The Advanced Walk-Forward Analyzer (DEFINITIVE VERSION) ---
# --- Function 1: The Advanced Walk-Forward Analyzer (DEFINITIVE FINAL VERSION) ---
def plot_walk_forward_analyzer(df_ohlcv, 
                               default_start_date=None,
                               default_calc_period='3M', 
                               default_fwd_period='1M',
                               default_metric='Sharpe (ATR)',
                               default_rank_start=1,
                               default_rank_end=10,
                               default_benchmark_ticker='VOO'):
    """
    Creates a robust, interactive walk-forward analysis plot.
    This version includes correct Wilder's ATR, correct equal-weighted Sharpe derived
    directly from the plotted portfolio line, and a correctly populated return DataFrame.
    """
    # --- All setup and widget code is unchanged... ---
    print("Initializing Walk-Forward Analyzer...")
    if not isinstance(df_ohlcv.index, pd.MultiIndex):
        raise ValueError("Input DataFrame must have a (Ticker, Date) MultiIndex.")
    df_ohlcv = df_ohlcv.sort_index()
    print("Pre-processing data (unstacking)...")
    df_close_full = df_ohlcv['Adj Close'].unstack(level=0)
    df_high_full = df_ohlcv['Adj High'].unstack(level=0)
    df_low_full = df_ohlcv['Adj Low'].unstack(level=0)
    df_close_full.index = pd.to_datetime(df_close_full.index)
    df_high_full.index = pd.to_datetime(df_high_full.index)
    df_low_full.index = pd.to_datetime(df_low_full.index)
    if isinstance(default_start_date, str):
        default_start_date = pd.to_datetime(default_start_date).date()
    min_date_available = df_close_full.index.min()
    max_date_available = df_close_full.index.max()
    if default_start_date is None:
        default_start_date = max_date_available.date() - pd.DateOffset(years=1)
    start_date_picker = widgets.DatePicker(description='Start Date:', value=default_start_date, disabled=False)
    calc_period_options = {'1M': pd.DateOffset(months=1), '3M': pd.DateOffset(months=3), '6M': pd.DateOffset(months=6), '1Y': pd.DateOffset(years=1)}
    fwd_period_options = {'0D': pd.DateOffset(days=0), '1W': pd.DateOffset(weeks=1), '2W': pd.DateOffset(weeks=2), '1M': pd.DateOffset(months=1), '3M': pd.DateOffset(months=3)}
    calc_period_dropdown = widgets.Dropdown(options=calc_period_options.keys(), value=default_calc_period, description='Calc Period:')
    fwd_period_dropdown = widgets.Dropdown(options=fwd_period_options.keys(), value=default_fwd_period, description='Fwd Period:')
    metrics = ['Price', 'Sharpe', 'Sharpe (ATR)']
    metric_dropdown = widgets.Dropdown(options=metrics, value=default_metric, description='Metric:')
    rank_options = [1, 5, 10, 20, 30, 40, 50, 75, 100]
    rank_start_dropdown = widgets.Dropdown(options=rank_options, value=default_rank_start, description='Rank Start:')
    rank_end_dropdown = widgets.Dropdown(options=rank_options, value=default_rank_end, description='Rank End:')
    benchmark_ticker_input = widgets.Text(value=default_benchmark_ticker, description='Benchmark:', placeholder='Enter Ticker')
    update_button = widgets.Button(description="Update Chart", button_style='primary')
    ticker_list_output = widgets.Output()
    results_container = [None]
    fig = go.FigureWidget()
    max_traces = 50
    for i in range(max_traces):
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'placeholder_{i}', visible=False, showlegend=False))
    fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name='Benchmark', visible=True, showlegend=True, line=dict(color='black', width=3, dash='dash')))
    fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name='Group Portfolio', visible=True, showlegend=True, line=dict(color='green', width=3)))

    def calculate_gain(price_series: pd.Series):
        if price_series.dropna().shape[0] < 2: return np.nan
        return (price_series.ffill().iloc[-1] / price_series.bfill().iloc[0]) - 1

    def calculate_sharpe(return_series: pd.Series):
        if return_series.dropna().shape[0] < 2: return np.nan
        std_dev = return_series.std()
        if std_dev > 0:
            return (return_series.mean() / std_dev) * np.sqrt(252)
        return np.nan

    def update_plot(button_click):
        ticker_list_output.clear_output()
        start_date = pd.to_datetime(start_date_picker.value)
        calc_period = calc_period_options[calc_period_dropdown.value]; fwd_period = fwd_period_options[fwd_period_dropdown.value]
        metric = metric_dropdown.value; rank_start, rank_end = rank_start_dropdown.value, rank_end_dropdown.value
        benchmark_ticker = benchmark_ticker_input.value.strip().upper()
        if rank_start > rank_end:
            with ticker_list_output: print("Error: 'Rank Start' must be <= 'Rank End'."); return
        safe_start_date = max(start_date, min_date_available)
        safe_calc_end_date = min(start_date + calc_period, max_date_available)
        safe_viz_end_date = min(safe_calc_end_date + fwd_period, max_date_available)
        if safe_start_date >= safe_calc_end_date:
            with ticker_list_output: print(f"Error: Invalid date range."); return
        calc_close_raw = df_close_full.loc[safe_start_date:safe_calc_end_date]
        calc_close = calc_close_raw.dropna(axis=1, how='all')
        if calc_close.shape[1] == 0 or len(calc_close) < 2:
            with ticker_list_output: print("Error: Not enough data in calc period."); return

        metric_values = {}
        first_prices = calc_close.bfill().iloc[0]; last_prices = calc_close.ffill().iloc[-1]
        metric_values['Price'] = (last_prices / first_prices).dropna()
        daily_returns = calc_close.bfill().ffill().pct_change()
        mean_returns, std_returns = daily_returns.mean(), daily_returns.std()
        metric_values['Sharpe'] = (mean_returns / std_returns * np.sqrt(252)).fillna(0)
        valid_tickers = calc_close.columns
        calc_high = df_high_full[valid_tickers].loc[safe_start_date:safe_calc_end_date]
        calc_low = df_low_full[valid_tickers].loc[safe_start_date:safe_calc_end_date]
        tr = np.maximum(calc_high - calc_low, abs(calc_high - df_close_full[valid_tickers].shift(1)), abs(calc_low - df_close_full[valid_tickers].shift(1)))
        atr = tr.ewm(alpha=1/14, adjust=False).mean()
        atrp = (atr / calc_close).mean()
        metric_values['Sharpe (ATR)'] = (mean_returns / atrp).fillna(0)
        
        sorted_tickers = metric_values[metric].sort_values(ascending=False)
        tickers_to_display = sorted_tickers.index[rank_start-1:rank_end].tolist()
        if not tickers_to_display:
            with ticker_list_output: print("Error: No tickers found."); return
        normalized_plot_data = df_close_full[tickers_to_display].loc[safe_start_date:safe_viz_end_date]
        normalized_plot_data = normalized_plot_data.div(normalized_plot_data.bfill().iloc[0])
        actual_calc_end_ts = calc_close.index.max()
        actual_viz_start_date = normalized_plot_data.index.min().date()
        actual_viz_end_date = normalized_plot_data.index.max().date()

        with fig.batch_update():
            for i in range(max_traces):
                trace = fig.data[i]
                if i < len(tickers_to_display):
                    ticker = tickers_to_display[i]
                    trace.x, trace.y, trace.name = normalized_plot_data.index, normalized_plot_data[ticker], ticker
                    trace.visible, trace.showlegend = True, True
                else: trace.visible, trace.showlegend = False, False
            benchmark_trace = fig.data[max_traces]
            if benchmark_ticker and benchmark_ticker in df_close_full.columns:
                benchmark_series = df_close_full[benchmark_ticker].loc[safe_start_date:safe_viz_end_date]
                if not benchmark_series.dropna().empty:
                    normalized_benchmark = benchmark_series / benchmark_series.bfill().iloc[0]
                    benchmark_trace.x, benchmark_trace.y = normalized_benchmark.index, normalized_benchmark
                    benchmark_trace.name = f"Benchmark ({benchmark_ticker})"; benchmark_trace.visible = True
                else: benchmark_trace.visible = False
            else: benchmark_trace.visible = False
            portfolio_trace = fig.data[max_traces + 1]
            portfolio_series = normalized_plot_data.mean(axis=1) # This is the data for the green line
            portfolio_trace.x, portfolio_trace.y = portfolio_series.index, portfolio_series
            portfolio_trace.name = 'Group Portfolio'; portfolio_trace.visible = True
            fig.layout.shapes = []
            fig.add_shape(type="line", x0=actual_calc_end_ts, y0=0, x1=actual_calc_end_ts, y1=1, xref='x', yref='paper', line=dict(color="grey", width=2, dash="dash"))

        # --- FIX 2: RESTORED THE RESULTS DATAFRAME BLOCK ---
        calc_start_prices = calc_close.bfill().iloc[0]; calc_end_prices = calc_close.ffill().iloc[-1]
        fwd_close_slice = df_close_full.loc[actual_calc_end_ts:safe_viz_end_date]
        viz_end_prices = fwd_close_slice.ffill().iloc[-1] if not fwd_close_slice.empty and len(fwd_close_slice) >= 2 else calc_end_prices
        calc_gains = (calc_end_prices / calc_start_prices) - 1
        fwd_gains = (viz_end_prices / calc_end_prices) - 1
        results_df = pd.DataFrame({'Rank': range(rank_start, rank_start + len(tickers_to_display)), 'Metric': metric, 'MetricValue': sorted_tickers.loc[tickers_to_display].values, 'CalcPrice': calc_end_prices.loc[tickers_to_display], 'CalcGain': calc_gains.loc[tickers_to_display], 'CalcPeriod': calc_period_dropdown.value, 'FwdPeriod': fwd_period_dropdown.value, 'FwdGain': fwd_gains.loc[tickers_to_display]}, index=pd.Index(tickers_to_display, name='Ticker'))
        if benchmark_trace.visible:
            bench_calc_prices = df_close_full[benchmark_ticker].loc[safe_start_date:actual_calc_end_ts]
            bench_fwd_prices = df_close_full[benchmark_ticker].loc[actual_calc_end_ts:safe_viz_end_date]
            bench_start_price = bench_calc_prices.bfill().iloc[0]
            bench_calc_end_price = bench_calc_prices.ffill().iloc[-1]
            bench_fwd_end_price = bench_fwd_prices.ffill().iloc[-1] if not bench_fwd_prices.empty and len(bench_fwd_prices) >= 2 else bench_calc_end_price
            bench_calc_gain = (bench_calc_end_price / bench_start_price) - 1
            bench_fwd_gain = (bench_fwd_end_price / bench_calc_end_price) - 1
            bench_metric_value = metric_values[metric].get(benchmark_ticker, np.nan)
            benchmark_df_row = pd.DataFrame({'Rank': np.nan, 'Metric': metric, 'MetricValue': bench_metric_value, 'CalcPrice': bench_calc_end_price, 'CalcGain': bench_calc_gain, 'CalcPeriod': calc_period_dropdown.value, 'FwdPeriod': fwd_period_dropdown.value, 'FwdGain': bench_fwd_gain}, index=pd.Index([f"{benchmark_ticker} (BM)"], name='Ticker'))
            results_df = pd.concat([results_df, benchmark_df_row])
        results_container[0] = results_df
        # --- END OF RESTORED BLOCK ---
        
        with ticker_list_output:
            print(f"Analyzing from {actual_viz_start_date} to {actual_viz_end_date}.")
            print(f"  - Ranking based on performance from {actual_viz_start_date} to {actual_calc_end_ts.date()}.")
            pprint.pprint(tickers_to_display, width=120, compact=True)
            
            # --- FIX 1: DERIVE SHARPE DIRECTLY FROM PLOTTED DATA ---
            # 'portfolio_series' is already defined above as `normalized_plot_data.mean(axis=1)`
            portfolio_return_series = portfolio_series.pct_change()
            
            benchmark_price_series = df_close_full.get(benchmark_ticker)
            benchmark_return_series = benchmark_price_series.loc[safe_start_date:safe_viz_end_date].bfill().ffill().pct_change() if benchmark_price_series is not None else pd.Series(dtype='float64')

            calc_p_gain = calculate_gain(portfolio_series.loc[:actual_calc_end_ts])
            fwd_p_gain = calculate_gain(portfolio_series.loc[actual_calc_end_ts:])
            full_p_gain = calculate_gain(portfolio_series)
            
            calc_b_gain = calculate_gain(benchmark_price_series.loc[safe_start_date:actual_calc_end_ts]) if benchmark_price_series is not None else np.nan
            fwd_b_gain = calculate_gain(benchmark_price_series.loc[actual_calc_end_ts:safe_viz_end_date]) if benchmark_price_series is not None else np.nan
            full_b_gain = calculate_gain(benchmark_price_series.loc[safe_start_date:safe_viz_end_date]) if benchmark_price_series is not None else np.nan
            
            calc_p_sharpe = calculate_sharpe(portfolio_return_series.loc[:actual_calc_end_ts])
            fwd_p_sharpe = calculate_sharpe(portfolio_return_series.loc[actual_calc_end_ts:])
            full_p_sharpe = calculate_sharpe(portfolio_return_series)

            calc_b_sharpe = calculate_sharpe(benchmark_return_series.loc[:actual_calc_end_ts])
            fwd_b_sharpe = calculate_sharpe(benchmark_return_series.loc[actual_calc_end_ts:])
            full_b_sharpe = calculate_sharpe(benchmark_return_series)
            
            rows = []
            rows.append({'Metric': 'Group Portfolio Gain', 'Full': full_p_gain, 'Calc': calc_p_gain, 'Fwd': fwd_p_gain})
            if not np.isnan(full_b_gain):
                rows.append({'Metric': f'Benchmark ({benchmark_ticker}) Gain', 'Full': full_b_gain, 'Calc': calc_b_gain, 'Fwd': fwd_b_gain})
                rows.append({'Metric': 'Gain Delta (vs Bm)', 'Full': full_p_gain - full_b_gain, 'Calc': calc_p_gain - calc_b_gain, 'Fwd': fwd_p_gain - fwd_b_gain})
            rows.append({'Metric': 'Group Portfolio Sharpe', 'Full': full_p_sharpe, 'Calc': calc_p_sharpe, 'Fwd': fwd_p_sharpe})
            if not np.isnan(full_b_sharpe):
                rows.append({'Metric': f'Benchmark ({benchmark_ticker}) Sharpe', 'Full': full_b_sharpe, 'Calc': calc_b_sharpe, 'Fwd': fwd_b_sharpe})
                rows.append({'Metric': 'Sharpe Delta (vs Bm)', 'Full': full_p_sharpe - full_b_sharpe, 'Calc': calc_p_sharpe - calc_b_sharpe, 'Fwd': fwd_p_sharpe - fwd_b_sharpe})
            report_df = pd.DataFrame(rows).set_index('Metric')
            gain_rows = [row for row in report_df.index if 'Gain' in row or 'Delta' in row]
            sharpe_rows = [row for row in report_df.index if 'Sharpe' in row]
            styled_df = report_df.style.format('{:+.2%}', na_rep='N/A', subset=(gain_rows, report_df.columns)).format('{:+.2f}', na_rep='N/A', subset=(sharpe_rows, report_df.columns)).set_properties(**{'text-align': 'right', 'width': '100px'}).set_table_styles([{'selector': 'th.col_heading', 'props': [('text-align', 'right')]}, {'selector': 'th.row_heading', 'props': [('text-align', 'left')]}])
            print("\n--- Strategy Performance Summary ---")
            display(styled_df)

    fig.update_layout(title_text='Walk-Forward Performance Analysis', xaxis_title='Date', yaxis_title='Normalized Price (Start = 1)', hovermode='x unified', legend_title_text='Tickers (Ranked)', height=700, margin=dict(t=50))
    fig.add_hline(y=1, line_width=1, line_dash="dash", line_color="grey")
    update_button.on_click(update_plot)
    controls_row1 = widgets.HBox([start_date_picker, calc_period_dropdown, fwd_period_dropdown])
    controls_row2 = widgets.HBox([metric_dropdown, rank_start_dropdown, rank_end_dropdown, benchmark_ticker_input, update_button])
    ui_container = widgets.VBox([controls_row1, controls_row2, ticker_list_output], layout=widgets.Layout(margin='10px 0 20px 0'))
    display(ui_container, fig)
    update_plot(None)
    return results_container



In [5]:
import pandas as pd
import numpy as np
from datetime import datetime
from IPython.display import display, Markdown

def verify_one_ticker_walk_forward_calculation(df_ohlcv, 
                                               ticker, 
                                               start_date, 
                                               calc_period, 
                                               fwd_period, 
                                               export_csv=False):
    """
    Provides a transparent, step-by-step breakdown of the calculations used in 
    the walk-forward analyzer for a SINGLE ticker and a specific time slice.
    If export_csv is True, it generates a file with all intermediate calculations.
    """
    # ... (setup part is unchanged) ...
    display(Markdown(f"## Verification Report for Ticker: `{ticker}`"))
    period_options = { '1M': pd.DateOffset(months=1), '3M': pd.DateOffset(months=3), '6M': pd.DateOffset(months=6), '1Y': pd.DateOffset(years=1), '0D': pd.DateOffset(days=0), '1W': pd.DateOffset(weeks=1), '2W': pd.DateOffset(weeks=2) }
    try:
        df_ticker_raw = df_ohlcv.loc[ticker].sort_index()
        df_ticker = df_ticker_raw.copy()
        df_ticker.columns = df_ticker.columns.str.strip()
    except KeyError:
        print(f"---! ERROR: Ticker '{ticker}' not found in the DataFrame. !---")
        return
    adj_close_col = 'Adj Close'; adj_high_col = 'Adj High'; adj_low_col = 'Adj Low'
    start_date_ts = pd.to_datetime(start_date)
    calc_offset = period_options[calc_period]; fwd_offset = period_options[fwd_period]
    calc_end_date_ts = start_date_ts + calc_offset; fwd_end_date_ts = calc_end_date_ts + fwd_offset
    display(Markdown(f"**Analysis Start Date:** `{start_date_ts.date()}`"))
    display(Markdown(f"**Requested Calculation Period:** `{start_date_ts.date()}` to `{calc_end_date_ts.date()}` ({calc_period})"))
    display(Markdown(f"**Requested Forward Period:**   `{calc_end_date_ts.date()}` to `{fwd_end_date_ts.date()}` ({fwd_period})"))

    display(Markdown("### A. Calculation Period Analysis ('In-Sample')"))
    calc_df = df_ticker.loc[start_date_ts:calc_end_date_ts].copy()
    
    # --- DEFINITIVE FIX: Check for at least 2 valid data points ---
    if calc_df[adj_close_col].notna().sum() < 2:
        print("\n---! ERROR: Not enough data points (less than 2) in the Calculation Period to proceed. !---")
        return
        
    actual_calc_start_date = calc_df.index.min().date()
    actual_calc_end_date = calc_df.index.max().date()
    display(Markdown(f"**Actual Dates Used:** `{actual_calc_start_date}` to `{actual_calc_end_date}` ({len(calc_df)} trading days)"))

    # ... (rest of the function is the same, but now safer) ...
    # The fix is mainly the check above. The pct_change call below is also updated for robustness.
    format_dict = {col: '{:,.2f}' for col in calc_df.columns if 'Adj' in col or 'Open' in col}; format_dict['Volume'] = '{:,}'
    display(pd.concat([calc_df.head(3), calc_df.tail(3)]).style.format(format_dict))
    calc_start_price = calc_df[adj_close_col].bfill().iloc[0]
    calc_end_price = calc_df[adj_close_col].ffill().iloc[-1]
    calc_gain = (calc_end_price / calc_start_price) - 1
    display(Markdown("#### `CalcPrice` and `CalcGain` Verification:"))
    print(f"  - Calc Start Price (on {calc_df[adj_close_col].first_valid_index().date()}): ${calc_start_price:.2f}")
    print(f"  - Calc End Price   (on {calc_df[adj_close_col].last_valid_index().date()}): ${calc_end_price:.2f}  <-- This is 'CalcPrice'")
    print(f"  - CalcGain = (${calc_end_price:.2f} / ${calc_start_price:.2f}) - 1 = {calc_gain:.2%}")
    display(Markdown("#### `MetricValue` Verification:"))
    price_metric = calc_end_price / calc_start_price
    print(f"\n1. Price Metric:")
    print(f"   - Formula: Last Price / First Price"); print(f"   - Value: ${calc_end_price:.2f} / ${calc_start_price:.2f} = {price_metric:.4f}")
    
    daily_returns = calc_df[adj_close_col].bfill().ffill().pct_change()

    mean_return = daily_returns.mean()
    std_return = daily_returns.std()
    sharpe_ratio = (mean_return / std_return * np.sqrt(252)) if std_return > 0 else 0
    print(f"\n2. Sharpe Ratio Metric:")
    print(f"   - Mean Daily Return: {mean_return:.6f}"); print(f"   - Std Dev Daily Return: {std_return:.6f}")
    print(f"   - Formula: (Mean / Std Dev) * sqrt(252)"); print(f"   - Value: ({mean_return:.6f} / {std_return:.6f}) * {np.sqrt(252):.2f} = {sharpe_ratio:.4f}")
    high_low = calc_df[adj_high_col] - calc_df[adj_low_col]
    high_prev_close = abs(calc_df[adj_high_col] - calc_df[adj_close_col].shift(1))
    low_prev_close = abs(calc_df[adj_low_col] - calc_df[adj_close_col].shift(1))
    tr = np.maximum(high_low, np.maximum(high_prev_close, low_prev_close))
    atr = tr.ewm(alpha=1/14, adjust=False).mean()
    atrp_series = atr / calc_df[adj_close_col]
    atrp_mean = atrp_series.mean()
    sharpe_atr = (mean_return / atrp_mean) if atrp_mean > 0 else 0
    print(f"\n3. Sharpe (ATR) Metric:")
    print(f"   - Mean Daily Return: {mean_return:.6f} (same as above)"); print(f"   - Average ATR Percent (ATRP): {atrp_mean:.6f}")
    print(f"   - Formula: Mean Daily Return / ATRP"); print(f"   - Value: {mean_return:.6f} / {atrp_mean:.6f} = {sharpe_atr:.4f}")
    # ... (rest of function is unchanged)
    display(Markdown("\n### B. Forward Period Analysis ('Out-of-Sample')"))
    fwd_df = df_ticker.loc[actual_calc_end_date:fwd_end_date_ts].copy()
    if fwd_df.empty or len(fwd_df) < 2:
        display(Markdown("**No valid data found in the Forward Period.**"))
        fwd_end_price = calc_end_price
        fwd_gain = 0
    else:
        actual_fwd_start_date = fwd_df.index.min().date()
        actual_fwd_end_date = fwd_df.index.max().date()
        display(Markdown(f"**Actual Dates Used:** `{actual_fwd_start_date}` to `{actual_fwd_end_date}` ({len(fwd_df)} trading days)"))
        fwd_end_price = fwd_df[adj_close_col].ffill().iloc[-1]
    fwd_start_price = calc_end_price
    fwd_gain = (fwd_end_price / fwd_start_price) - 1
    display(Markdown("#### `FwdGain` Verification:"))
    print(f"  - Fwd Start Price (same as Calc End Price): ${fwd_start_price:.2f}")
    if not fwd_df.empty and len(fwd_df) >= 2:
        print(f"  - Fwd End Price   (on {fwd_df[adj_close_col].last_valid_index().date()}): ${fwd_end_price:.2f}")
    else:
        print(f"  - Fwd End Price   (No fwd data, using start price): ${fwd_end_price:.2f}")
    print(f"  - FwdGain = (${fwd_end_price:.2f} / ${fwd_start_price:.2f}) - 1 = {fwd_gain:.2%}")
    display(Markdown("### C. Final Summary Table"))
    summary_data = { 'Metric': ['End of Calc Price', 'Calc Period Gain', 'Forward Period Gain','---','Metric: Price', 'Metric: Sharpe', 'Metric: Sharpe (ATR)'], 'Calculated Value': [f"${calc_end_price:.2f}", f"{calc_gain:.2%}", f"{fwd_gain:.2%}", '---', f"{price_metric:.4f}", f"{sharpe_ratio:.4f}", f"{sharpe_atr:.4f}"], 'Corresponds To DataFrame Column': ['`CalcPrice`', '`CalcGain`', '`FwdGain`', '---', '`MetricValue`', '`MetricValue`', '`MetricValue`'] }
    summary_df = pd.DataFrame(summary_data)
    display(summary_df)
    if export_csv:
        calc_df['Period'] = 'Calculation'; calc_df['Daily_Return'] = daily_returns; calc_df['High_Minus_Low'] = high_low; calc_df['Abs_High_vs_PrevClose'] = high_prev_close; calc_df['Abs_Low_vs_PrevClose'] = low_prev_close; calc_df['True_Range'] = tr; calc_df['ATR_14'] = atr; calc_df['ATRP'] = atrp_series
        fwd_df['Period'] = 'Forward'
        combined_df = pd.concat([calc_df, fwd_df.iloc[1:]])
        ordered_cols = [ 'Period', adj_close_col, 'Daily_Return', adj_high_col, adj_low_col, 'High_Minus_Low', 'Abs_High_vs_PrevClose', 'Abs_Low_vs_PrevClose', 'True_Range', 'ATR_14', 'ATRP', 'Volume' ]
        remaining_cols = [col for col in combined_df.columns if col not in ordered_cols]
        final_cols = ordered_cols + remaining_cols
        filename = f"verification_{ticker}_{start_date_ts.strftime('%Y%m%d')}.csv"
        combined_df[final_cols].to_csv(filename, float_format='%.8f')
        print(f"\n✅ Detailed data for '{ticker}' exported to '{filename}'")



In [6]:
# The plot_walk_forward_analyzer function remains unchanged from the last version.
# This is the newly named and corrected verification function.

# --- Function 2: The Detailed Verification Tool (FINAL VERSION) ---
def verify_group_tickers_walk_forward_calculation(df_ohlcv, 
                                                  tickers_to_verify, 
                                                  benchmark_ticker,
                                                  start_date, 
                                                  calc_period, 
                                                  fwd_period, 
                                                  export_csv=False):
    """
    Provides a transparent, step-by-step breakdown of the portfolio and benchmark
    performance calculations. The portfolio calculations are truly equal-weighted.
    """
    # --- 1. INITIAL SETUP AND DATA PREPARATION ---
    display(Markdown(f"## Verification Report for Portfolio vs. Benchmark"))
    display(Markdown(f"**Portfolio Tickers:** `{tickers_to_verify}`"))
    display(Markdown(f"**Benchmark Ticker:** `{benchmark_ticker}`"))

    period_options = { '1M': pd.DateOffset(months=1), '3M': pd.DateOffset(months=3), '6M': pd.DateOffset(months=6), '1Y': pd.DateOffset(years=1), '0D': pd.DateOffset(days=0), '1W': pd.DateOffset(weeks=1), '2W': pd.DateOffset(weeks=2) }
    
    df_close_full = df_ohlcv['Adj Close'].unstack(level=0)
    
    # Date setup
    start_date_ts = pd.to_datetime(start_date)
    calc_offset = period_options[calc_period]; fwd_offset = period_options[fwd_period]
    calc_end_date_ts_theoretical = start_date_ts + calc_offset
    fwd_end_date_ts_theoretical = calc_end_date_ts_theoretical + fwd_offset
    actual_calc_end_ts = df_close_full.loc[start_date_ts:calc_end_date_ts_theoretical].index.max()

    display(Markdown(f"**Analysis Start Date:** `{start_date_ts.date()}`"))
    display(Markdown(f"**Calculation Period End Date:** `{actual_calc_end_ts.date()}`"))
    display(Markdown(f"**Forward Period End Date:** `{fwd_end_date_ts_theoretical.date()}`"))
    
    # --- THE FIX IS HERE: Create the correct equal-weighted portfolio value series ---
    # 1. Get the raw price slice for the full period
    portfolio_prices_raw_slice = df_close_full[tickers_to_verify].loc[start_date_ts:fwd_end_date_ts_theoretical]
    # 2. Normalize each ticker to a start of 1.0
    normalized_portfolio_prices = portfolio_prices_raw_slice.div(portfolio_prices_raw_slice.bfill().iloc[0])
    # 3. Average the normalized series to get the true portfolio value series (this matches the green line)
    portfolio_value_series = normalized_portfolio_prices.mean(axis=1)
    # --- END OF FIX ---

    # Benchmark series remains the same (it's just a single ticker)
    try:
        benchmark_price_series = df_close_full[benchmark_ticker]
    except KeyError as e:
        print(f"---! ERROR: Ticker {e} not found in the DataFrame. !---")
        return

    def print_verification_steps(title, price_series):
        display(Markdown(f"#### Verification for: `{title}`"))
        if price_series.dropna().shape[0] < 2:
            print("  - Not enough data points to calculate performance.")
            return {'gain': np.nan, 'sharpe': np.nan}
        
        # Gain is calculated from the start/end values of the input series
        start_price = price_series.bfill().iloc[0]
        end_price = price_series.ffill().iloc[-1]
        gain = (end_price / start_price) - 1
        print(f"  - Start Value (on {price_series.first_valid_index().date()}): {start_price:,.4f}")
        print(f"  - End Value   (on {price_series.last_valid_index().date()}): {end_price:,.4f}")
        print(f"  - Gain = ({end_price:,.4f} / {start_price:,.4f}) - 1 = {gain:.2%}")

        # Sharpe is calculated from the returns of the input series
        returns = price_series.pct_change()
        mean_return = returns.mean()
        std_return = returns.std()
        sharpe = (mean_return / std_return * np.sqrt(252)) if std_return > 0 else np.nan
        print(f"\n  - Mean Daily Return: {mean_return:.6f}")
        print(f"  - Std Dev of Daily Return: {std_return:.6f}")
        print(f"  - Sharpe = ({mean_return:.6f} / {std_return:.6f}) * sqrt(252) = {sharpe:.2f}")
        return {'gain': gain, 'sharpe': sharpe}

    # --- 2. PERIOD-BY-PERIOD ANALYSIS (Now using the correct portfolio series) ---
    
    # A. Calculation Period
    display(Markdown("### A. Calculation Period Analysis ('In-Sample')"))
    calc_p = portfolio_value_series.loc[start_date_ts:actual_calc_end_ts]
    calc_b = benchmark_price_series.loc[start_date_ts:actual_calc_end_ts]
    perf_calc_p = print_verification_steps("Group Portfolio", calc_p)
    perf_calc_b = print_verification_steps(f"Benchmark ({benchmark_ticker})", calc_b)

    # B. Forward Period
    display(Markdown("\n### B. Forward Period Analysis ('Moment of Truth')"))
    fwd_p = portfolio_value_series.loc[actual_calc_end_ts:fwd_end_date_ts_theoretical]
    fwd_b = benchmark_price_series.loc[actual_calc_end_ts:fwd_end_date_ts_theoretical]
    perf_fwd_p = print_verification_steps("Group Portfolio", fwd_p)
    perf_fwd_b = print_verification_steps(f"Benchmark ({benchmark_ticker})", fwd_b)
    
    # C. Full Period
    display(Markdown("\n### C. Full Period Analysis (Total)"))
    full_p = portfolio_value_series # Already the full series
    full_b = benchmark_price_series.loc[start_date_ts:fwd_end_date_ts_theoretical]
    perf_full_p = print_verification_steps("Group Portfolio", full_p)
    perf_full_b = print_verification_steps(f"Benchmark ({benchmark_ticker})", full_b)

    # --- 3. FINAL SUMMARY TABLE (Logic unchanged, but now fed correct data) ---
    display(Markdown("\n### D. Final Summary Table (matches analyzer output)"))
    rows = []
    rows.append({'Metric': 'Group Portfolio Gain', 'Full': perf_full_p['gain'], 'Calc': perf_calc_p['gain'], 'Fwd': perf_fwd_p['gain']})
    rows.append({'Metric': f'Benchmark ({benchmark_ticker}) Gain', 'Full': perf_full_b['gain'], 'Calc': perf_calc_b['gain'], 'Fwd': perf_fwd_b['gain']})
    rows.append({'Metric': 'Gain Delta (vs Bm)', 'Full': perf_full_p['gain'] - perf_full_b['gain'], 'Calc': perf_calc_p['gain'] - perf_calc_b['gain'], 'Fwd': perf_fwd_p['gain'] - perf_fwd_b['gain']})
    rows.append({'Metric': 'Group Portfolio Sharpe', 'Full': perf_full_p['sharpe'], 'Calc': perf_calc_p['sharpe'], 'Fwd': perf_fwd_p['sharpe']})
    rows.append({'Metric': f'Benchmark ({benchmark_ticker}) Sharpe', 'Full': perf_full_b['sharpe'], 'Calc': perf_calc_b['sharpe'], 'Fwd': perf_fwd_b['sharpe']})
    rows.append({'Metric': 'Sharpe Delta (vs Bm)', 'Full': perf_full_p['sharpe'] - perf_full_b['sharpe'], 'Calc': perf_calc_p['sharpe'] - perf_calc_b['sharpe'], 'Fwd': perf_fwd_p['sharpe'] - perf_fwd_b['sharpe']})
    report_df = pd.DataFrame(rows).set_index('Metric')
    gain_rows = [row for row in report_df.index if 'Gain' in row or 'Delta' in row]
    sharpe_rows = [row for row in report_df.index if 'Sharpe' in row]
    styled_df = report_df.style.format('{:+.2%}', na_rep='N/A', subset=(gain_rows, report_df.columns)).format('{:+.2f}', na_rep='N/A', subset=(sharpe_rows, report_df.columns)).set_properties(**{'text-align': 'right', 'width': '100px'}).set_table_styles([{'selector': 'th.col_heading', 'props': [('text-align', 'right')]}, {'selector': 'th.row_heading', 'props': [('text-align', 'left')]}])
    display(styled_df)
    
    if export_csv:
        # Exporting the correct, normalized portfolio series
        export_df = pd.DataFrame({
            'Portfolio_Value_Normalized': portfolio_value_series,
            'Portfolio_Return': portfolio_value_series.pct_change(),
            f'Benchmark_Price_{benchmark_ticker}': benchmark_price_series
        })
        filename = f"verification_group_tickers_{start_date_ts.strftime('%Y%m%d')}.csv"
        export_df.to_csv(filename, float_format='%.6f')
        print(f"\n✅ Detailed group verification data exported to '{filename}'")
        

In [7]:
_start_date = '2023-05-01'
_calc_period = '3M'
_fwd_period = '1W'
_metric = 'Sharpe (ATR)'
_rank_start = 20
_rank_end = 30

# ```python
# Your original call that produced the results DataFrame
walk_forward_results = plot_walk_forward_analyzer(
    df_OHLCV,
    default_start_date=_start_date, # Using a specific date for reproducibility
    default_calc_period=_calc_period,
    default_fwd_period=_fwd_period,
    default_metric=_metric,
    default_rank_start=_rank_start,
    default_rank_end=_rank_end,
)


Initializing Walk-Forward Analyzer...
Pre-processing data (unstacking)...


VBox(children=(HBox(children=(DatePicker(value=datetime.date(2023, 5, 1), description='Start Date:', step=1), …

FigureWidget({
    'data': [{'mode': 'lines',
              'name': 'placeholder_0',
              'showlegend': False,
              'type': 'scatter',
              'uid': 'b72a3faf-d331-47f1-a3be-325e6f11f8ff',
              'visible': False,
              'x': [None],
              'y': [None]},
             {'mode': 'lines',
              'name': 'placeholder_1',
              'showlegend': False,
              'type': 'scatter',
              'uid': 'd5b3cb82-505d-4f04-852f-76319e994de2',
              'visible': False,
              'x': [None],
              'y': [None]},
             {'mode': 'lines',
              'name': 'placeholder_2',
              'showlegend': False,
              'type': 'scatter',
              'uid': '974036e9-4dbf-490b-b766-8f37b7cc5b2f',
              'visible': False,
              'x': [None],
              'y': [None]},
             {'mode': 'lines',
              'name': 'placeholder_3',
              'showlegend': False,
              'type': 

In [8]:
walk_forward_results

[          Rank        Metric  MetricValue  CalcPrice  CalcGain CalcPeriod FwdPeriod   FwdGain
 Ticker                                                                                       
 PLTR      20.0  Sharpe (ATR)     0.318964    19.9900  1.569409         3M        1W -0.147574
 DYNF      21.0  Sharpe (ATR)     0.315989    36.5875  0.138046         3M        1W -0.020001
 BOXX      22.0  Sharpe (ATR)     0.313616   102.4740  0.011470         3M        1W  0.000976
 IONQ      23.0  Sharpe (ATR)     0.312896    19.8600  2.617486         3M        1W -0.228097
 BBIO      24.0  Sharpe (ATR)     0.307938    34.2700  1.389819         3M        1W -0.058652
 VRT       25.0  Sharpe (ATR)     0.299978    26.4558  0.790155         3M        1W  0.365247
 QBTS      26.0  Sharpe (ATR)     0.298445     2.7000  4.590062         3M        1W -0.348148
 ICSH      27.0  Sharpe (ATR)     0.293728    45.1458  0.011879         3M        1W  0.001794
 ACHR      28.0  Sharpe (ATR)     0.293715     6.5

In [9]:
# plotted_tickers = walk_forward_results[0].index.to_list()[:-1]
plotted_tickers = walk_forward_results[0].index.to_list()
print(f'plotted tickers: {plotted_tickers}')

plotted_tickers = ['PLTR', 'DYNF', 'BOXX', 'IONQ', 'BBIO', 'VRT', 'QBTS', 'ICSH', 'ACHR', 'LI', 'TM', 'VOO']
print(f'removed (BM) from platted_tickers: {plotted_tickers}')

walk_forward_results[0]

plotted tickers: ['PLTR', 'DYNF', 'BOXX', 'IONQ', 'BBIO', 'VRT', 'QBTS', 'ICSH', 'ACHR', 'LI', 'TM', 'VOO (BM)']
removed (BM) from platted_tickers: ['PLTR', 'DYNF', 'BOXX', 'IONQ', 'BBIO', 'VRT', 'QBTS', 'ICSH', 'ACHR', 'LI', 'TM', 'VOO']


Unnamed: 0_level_0,Rank,Metric,MetricValue,CalcPrice,CalcGain,CalcPeriod,FwdPeriod,FwdGain
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
PLTR,20.0,Sharpe (ATR),0.318964,19.99,1.569409,3M,1W,-0.147574
DYNF,21.0,Sharpe (ATR),0.315989,36.5875,0.138046,3M,1W,-0.020001
BOXX,22.0,Sharpe (ATR),0.313616,102.474,0.01147,3M,1W,0.000976
IONQ,23.0,Sharpe (ATR),0.312896,19.86,2.617486,3M,1W,-0.228097
BBIO,24.0,Sharpe (ATR),0.307938,34.27,1.389819,3M,1W,-0.058652
VRT,25.0,Sharpe (ATR),0.299978,26.4558,0.790155,3M,1W,0.365247
QBTS,26.0,Sharpe (ATR),0.298445,2.7,4.590062,3M,1W,-0.348148
ICSH,27.0,Sharpe (ATR),0.293728,45.1458,0.011879,3M,1W,0.001794
ACHR,28.0,Sharpe (ATR),0.293715,6.55,2.226601,3M,1W,-0.128244
LI,29.0,Sharpe (ATR),0.291405,44.37,0.878493,3M,1W,-0.039216


In [10]:
# Example Usage:

# 1. Run the main analyzer
# walk_forward_results = plot_walk_forward_analyzer(df_OHLCV, ...)

# 2. Suppose the output showed these tickers were selected
selected_tickers = plotted_tickers[:-1]
benchmark = 'VOO'
start_date_to_verify = '2023-05-01'
calc_p = '3M'
fwd_p = '1W'

# 3. Run the new verifier with the portfolio details
verify_group_tickers_walk_forward_calculation(
    df_ohlcv=df_OHLCV,
    tickers_to_verify=selected_tickers,
    benchmark_ticker=benchmark,
    start_date=start_date_to_verify,
    calc_period=calc_p,
    fwd_period=fwd_p,
    export_csv=True # Optionally create the detailed CSV
)

## Verification Report for Portfolio vs. Benchmark

**Portfolio Tickers:** `['PLTR', 'DYNF', 'BOXX', 'IONQ', 'BBIO', 'VRT', 'QBTS', 'ICSH', 'ACHR', 'LI', 'TM']`

**Benchmark Ticker:** `VOO`

**Analysis Start Date:** `2023-05-01`

**Calculation Period End Date:** `2023-08-01`

**Forward Period End Date:** `2023-08-08`

### A. Calculation Period Analysis ('In-Sample')

#### Verification for: `Group Portfolio`

  - Start Value (on 2023-05-01): 1.0000
  - End Value   (on 2023-08-01): 2.3164
  - Gain = (2.3164 / 1.0000) - 1 = 131.64%

  - Mean Daily Return: 0.014143
  - Std Dev of Daily Return: 0.038774
  - Sharpe = (0.014143 / 0.038774) * sqrt(252) = 5.79


#### Verification for: `Benchmark (VOO)`

  - Start Value (on 2023-05-01): 369.9670
  - End Value   (on 2023-08-01): 407.9900
  - Gain = (407.9900 / 369.9670) - 1 = 10.28%

  - Mean Daily Return: 0.001577
  - Std Dev of Daily Return: 0.006774
  - Sharpe = (0.001577 / 0.006774) * sqrt(252) = 3.69



### B. Forward Period Analysis ('Moment of Truth')

#### Verification for: `Group Portfolio`

  - Start Value (on 2023-08-01): 2.3164
  - End Value   (on 2023-08-08): 2.0295
  - Gain = (2.0295 / 2.3164) - 1 = -12.38%

  - Mean Daily Return: -0.026057
  - Std Dev of Daily Return: 0.009647
  - Sharpe = (-0.026057 / 0.009647) * sqrt(252) = -42.88


#### Verification for: `Benchmark (VOO)`

  - Start Value (on 2023-08-01): 407.9900
  - End Value   (on 2023-08-08): 401.0930
  - Gain = (401.0930 / 407.9900) - 1 = -1.69%

  - Mean Daily Return: -0.003377
  - Std Dev of Daily Return: 0.008167
  - Sharpe = (-0.003377 / 0.008167) * sqrt(252) = -6.56



### C. Full Period Analysis (Total)

#### Verification for: `Group Portfolio`

  - Start Value (on 2023-05-01): 1.0000
  - End Value   (on 2023-08-08): 2.0295
  - Gain = (2.0295 / 1.0000) - 1 = 102.95%

  - Mean Daily Return: 0.011187
  - Std Dev of Daily Return: 0.038840
  - Sharpe = (0.011187 / 0.038840) * sqrt(252) = 4.57


#### Verification for: `Benchmark (VOO)`

  - Start Value (on 2023-05-01): 369.9670
  - End Value   (on 2023-08-08): 401.0930
  - Gain = (401.0930 / 369.9670) - 1 = 8.41%

  - Mean Daily Return: 0.001212
  - Std Dev of Daily Return: 0.006938
  - Sharpe = (0.001212 / 0.006938) * sqrt(252) = 2.77



### D. Final Summary Table (matches analyzer output)

Unnamed: 0_level_0,Full,Calc,Fwd
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Group Portfolio Gain,+102.95%,+131.64%,-12.38%
Benchmark (VOO) Gain,+8.41%,+10.28%,-1.69%
Gain Delta (vs Bm),+94.54%,+121.36%,-10.69%
Group Portfolio Sharpe,+4.57,+5.79,-42.88
Benchmark (VOO) Sharpe,+2.77,+3.69,-6.56
Sharpe Delta (vs Bm),+1.80,+2.10,-36.31



✅ Detailed group verification data exported to 'verification_group_tickers_20230501.csv'


In [11]:
# 1. Define your list of tickers and the date range
# _portfolio_tickers = ['PLTR', 'DYNF', 'BOXX', 'IONQ', 'BBIO', 'VRT', 'QBTS', 'ICSH', 'ACHR', 'LI', 'TM']
start_date = '2023-05-01'
end_date = '2023-08-08'

# 2. Ensure the DataFrame is sorted for efficient slicing
# (This is good practice, although it's likely already sorted from our previous work)
df_OHLCV = df_OHLCV.sort_index()

# 3. Use .loc to filter by both ticker list and date range simultaneously
# The first part of the tuple filters the 'Ticker' level of the index.
# The second part of the tuple slices the 'Date' level of the index.
# portfolio_data = df_OHLCV.loc[(_portfolio_tickers, start_date:end_date), :]
portfolio_data = df_OHLCV.loc[(plotted_tickers, slice(start_date, end_date)), :]

# 4. Display the results to verify
print(f"Filtered data for {len(plotted_tickers)} tickers from {start_date} to {end_date}:")
display(portfolio_data)

# You can also check the info to see the size of the resulting DataFrame
print("\nInfo of the filtered DataFrame:")
portfolio_data.info()

Filtered data for 12 tickers from 2023-05-01 to 2023-08-08:


Unnamed: 0_level_0,Unnamed: 1_level_0,Adj Open,Adj High,Adj Low,Adj Close,Volume
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PLTR,2023-05-01,7.750,7.820,7.660,7.780,19134200
PLTR,2023-05-02,7.720,7.910,7.430,7.590,38008000
PLTR,2023-05-03,7.620,7.680,7.450,7.560,24322400
PLTR,2023-05-04,7.470,7.520,7.280,7.380,29989300
PLTR,2023-05-05,7.440,7.550,7.320,7.410,45848800
...,...,...,...,...,...,...
VOO,2023-08-02,405.150,405.383,401.667,402.338,5112548
VOO,2023-08-03,400.499,402.941,399.925,401.200,3106672
VOO,2023-08-04,402.902,404.829,398.894,399.235,5876906
VOO,2023-08-07,401.093,402.990,400.461,402.834,2670527



Info of the filtered DataFrame:
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 828 entries, ('PLTR', Timestamp('2023-05-01 00:00:00')) to ('VOO', Timestamp('2023-08-08 00:00:00'))
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Adj Open   828 non-null    float64
 1   Adj High   828 non-null    float64
 2   Adj Low    828 non-null    float64
 3   Adj Close  828 non-null    float64
 4   Volume     828 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 134.6+ KB


### Code to Calculate Verification Metrics for a Multi-Ticker DataFrame

In [12]:
import pandas as pd
import numpy as np

# Assume 'portfolio_data' is your DataFrame from the previous step,
# containing the OHLCV data for all portfolio tickers with a MultiIndex.

# --- Step 1: Ensure data is sorted correctly for time-series operations ---
# This is CRITICAL for .shift() to work correctly within groups.
portfolio_data = portfolio_data.sort_index(level=['Ticker', 'Date'])

# Create a copy to store the results without modifying the original
results_df = portfolio_data.copy()

# --- Step 2: Vectorized Calculations using .groupby('Ticker') ---

# Daily Return
results_df['Daily_Return'] = results_df.groupby('Ticker')['Adj Close'].pct_change()

# Previous Day's Close (we'll need this for True Range)
# Calculate it once and reuse for efficiency.
prev_close = results_df.groupby('Ticker')['Adj Close'].shift(1)

# True Range (TR) Components
results_df['High_Minus_Low'] = results_df['Adj High'] - results_df['Adj Low']
results_df['Abs_High_vs_PrevClose'] = abs(results_df['Adj High'] - prev_close)
results_df['Abs_Low_vs_PrevClose'] = abs(results_df['Adj Low'] - prev_close)

# True Range (the maximum of the three components)
results_df['True_Range'] = np.maximum(
    results_df['High_Minus_Low'],
    np.maximum(results_df['Abs_High_vs_PrevClose'], results_df['Abs_Low_vs_PrevClose'])
)

# ATR_14 (14-period Exponential Moving Average of True Range)
# We use .ewm() for the exponential moving average.
# `adjust=False` is standard for technical indicators like ATR.
# --- THE FIX IS HERE ---
# Instead of span=14, we explicitly use alpha=1/14 for Wilder's smoothing.
results_df['ATR_14'] = results_df.groupby('Ticker')['True_Range'].ewm(alpha=1/14, adjust=False).mean().values
# --- END OF FIX ---

# ATRP (Average True Range Percent)
results_df['ATRP'] = results_df['ATR_14'] / results_df['Adj Close']

# --- Step 3: Display the results for verification ---

# Reorder columns for better readability
ordered_cols = [
    'Adj Close', 
    'Daily_Return',
    'Adj High', 
    'Adj Low',
    'High_Minus_Low',
    'Abs_High_vs_PrevClose',
    'Abs_Low_vs_PrevClose',
    'True_Range',
    'ATR_14',
    'ATRP',
    'Volume'
]
# Add any original columns not in the list to the end
final_cols = ordered_cols + [col for col in results_df.columns if col not in ordered_cols]
results_df = results_df[final_cols]

print("DataFrame with all verification calculations:")
# Display the head and tail, or a larger sample, to see the results.
# Notice the NaNs on the first day for each ticker, which is correct.
display(results_df.head(15)) 

DataFrame with all verification calculations:


Unnamed: 0_level_0,Unnamed: 1_level_0,Adj Close,Daily_Return,Adj High,Adj Low,High_Minus_Low,Abs_High_vs_PrevClose,Abs_Low_vs_PrevClose,True_Range,ATR_14,ATRP,Volume,Adj Open
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ACHR,2023-05-01,2.03,,2.06,1.951,0.109,,,,,,875400,1.98
ACHR,2023-05-02,1.95,-0.039409,2.07,1.93,0.14,0.04,0.1,0.14,0.14,0.071795,1291500,2.02
ACHR,2023-05-03,2.06,0.05641,2.115,1.97,0.145,0.165,0.02,0.165,0.141786,0.068828,1188400,1.97
ACHR,2023-05-04,2.03,-0.014563,2.129,2.01,0.119,0.069,0.05,0.119,0.140158,0.069043,970500,2.06
ACHR,2023-05-05,2.12,0.044335,2.12,2.05,0.07,0.09,0.02,0.09,0.136575,0.064422,999400,2.06
ACHR,2023-05-08,2.14,0.009434,2.23,2.11,0.12,0.11,0.01,0.12,0.135391,0.063267,1344000,2.12
ACHR,2023-05-09,2.25,0.051402,2.28,2.12,0.16,0.14,0.02,0.16,0.137149,0.060955,1495500,2.2
ACHR,2023-05-10,2.38,0.057778,2.42,2.28,0.14,0.17,0.03,0.17,0.139496,0.058612,2049300,2.32
ACHR,2023-05-11,2.37,-0.004202,2.42,2.3,0.12,0.04,0.08,0.12,0.138103,0.058271,1116400,2.38
ACHR,2023-05-12,2.0,-0.156118,2.33,1.91,0.42,0.04,0.46,0.46,0.161096,0.080548,2569400,2.27


In [13]:
print(f'results_df:\n{results_df}')

results_df:
                   Adj Close  Daily_Return  Adj High  Adj Low  High_Minus_Low  Abs_High_vs_PrevClose  Abs_Low_vs_PrevClose  True_Range    ATR_14      ATRP    Volume  Adj Open
Ticker Date                                                                                                                                                                   
ACHR   2023-05-01     2.0300           NaN    2.0600   1.9510          0.1090                    NaN                   NaN         NaN       NaN       NaN    875400    1.9800
       2023-05-02     1.9500     -0.039409    2.0700   1.9300          0.1400                 0.0400                0.1000      0.1400  0.140000  0.071795   1291500    2.0200
       2023-05-03     2.0600      0.056410    2.1150   1.9700          0.1450                 0.1650                0.0200      0.1650  0.141786  0.068828   1188400    1.9700
       2023-05-04     2.0300     -0.014563    2.1290   2.0100          0.1190                 0.0690             

In [14]:
# --- Define the filename ---
# It's good practice to make the filename descriptive.
filename = "verification_portfolio_calculations.csv"

# --- Write the DataFrame to a CSV file ---

# Option 1: Basic export
# results_df.to_csv(filename)

# Option 2: Recommended export with better formatting
#    - float_format='%.6f': Saves floating point numbers with 6 decimal places for precision.
#    - index=True: Includes the MultiIndex (Ticker, Date) in the CSV, which is important.
results_df.to_csv(filename, float_format='%.6f', index=True)

# --- Confirmation message ---
print(f"✅ Successfully exported the verification data to '{filename}'")

✅ Successfully exported the verification data to 'verification_portfolio_calculations.csv'


In [15]:
print(plotted_tickers)
print(sorted(plotted_tickers))

['PLTR', 'DYNF', 'BOXX', 'IONQ', 'BBIO', 'VRT', 'QBTS', 'ICSH', 'ACHR', 'LI', 'TM', 'VOO']
['ACHR', 'BBIO', 'BOXX', 'DYNF', 'ICSH', 'IONQ', 'LI', 'PLTR', 'QBTS', 'TM', 'VOO', 'VRT']


In [16]:
import pandas as pd

ticker   = 'AAPL'                       # <- symbol you want
start_dt = pd.Timestamp('2023-05-01')   # <- inclusive
end_dt   = pd.Timestamp('2023-08-08')   # <- inclusive

# 1. pull the single-ticker slice
ticker_df = df_OHLCV.loc[ticker]        # returns a plain DataFrame indexed by date

# 2. restrict to the date window
ticker_df = ticker_df.loc[start_dt:end_dt]

print(ticker_df.head())

            Adj Open  Adj High  Adj Low  Adj Close     Volume
Date                                                         
2023-05-01   167.181   168.336  166.549    167.487   53131812
2023-05-02   167.981   168.237  165.462    166.450   49033792
2023-05-03   167.398   168.800  165.087    165.373   65953927
2023-05-04   162.845   164.968  162.272    163.734   82255484
2023-05-05   168.860   172.138  168.642    171.417  114877856


In [17]:
start_dt = pd.Timestamp('2023-05-01')   # <- inclusive
end_dt   = pd.Timestamp('2023-08-08')   # <- inclusive
sorted_plotted_tickers = sorted(plotted_tickers)

for _ticker in sorted_plotted_tickers:
    # df_OHLCV.loc[_ticker][start_dt:end_dt]
    # print(_ticker)
    print(df_OHLCV.loc[(_ticker, slice(start_dt, end_dt)), :].head(1))
    print(df_OHLCV.loc[(_ticker, slice(start_dt, end_dt)), :].tail(1))
    print('============')

                   Adj Open  Adj High  Adj Low  Adj Close  Volume
Ticker Date                                                      
ACHR   2023-05-01      1.98      2.06    1.951       2.03  875400
                   Adj Open  Adj High  Adj Low  Adj Close   Volume
Ticker Date                                                       
ACHR   2023-08-08       5.5      5.73     5.25       5.71  3804000
                   Adj Open  Adj High  Adj Low  Adj Close   Volume
Ticker Date                                                       
BBIO   2023-05-01     14.51     14.97     14.2      14.34  1219700
                   Adj Open  Adj High  Adj Low  Adj Close   Volume
Ticker Date                                                       
BBIO   2023-08-08     30.82     32.38    30.67      32.26  1555900
                   Adj Open  Adj High  Adj Low  Adj Close  Volume
Ticker Date                                                      
BOXX   2023-05-01   101.312   101.319  101.307    101.312   28075
 

In [18]:
_export_csv = True

# Now, let's verify top-ranked ticker from the results
_verify_ticker = walk_forward_results[0].index[8] 

# Verify the calculation for 'BIL' using the same parameters
verify_one_ticker_walk_forward_calculation(
    df_ohlcv=df_OHLCV,
    ticker=_verify_ticker,
    start_date=_start_date,
    calc_period=_calc_period,
    fwd_period=_fwd_period,
    export_csv=_export_csv  # Set to True to get the data in Excel
)


## Verification Report for Ticker: `ACHR`

**Analysis Start Date:** `2023-05-01`

**Requested Calculation Period:** `2023-05-01` to `2023-08-01` (3M)

**Requested Forward Period:**   `2023-08-01` to `2023-08-08` (1W)

### A. Calculation Period Analysis ('In-Sample')

**Actual Dates Used:** `2023-05-01` to `2023-08-01` (64 trading days)

Unnamed: 0_level_0,Adj Open,Adj High,Adj Low,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-05-01 00:00:00,1.98,2.06,1.95,2.03,875400
2023-05-02 00:00:00,2.02,2.07,1.93,1.95,1291500
2023-05-03 00:00:00,1.97,2.12,1.97,2.06,1188400
2023-07-28 00:00:00,4.67,4.89,4.63,4.78,4674300
2023-07-31 00:00:00,5.28,6.87,5.23,6.73,41685500
2023-08-01 00:00:00,6.55,7.1,6.4,6.55,14109100


#### `CalcPrice` and `CalcGain` Verification:

  - Calc Start Price (on 2023-05-01): $2.03
  - Calc End Price   (on 2023-08-01): $6.55  <-- This is 'CalcPrice'
  - CalcGain = ($6.55 / $2.03) - 1 = 222.66%


#### `MetricValue` Verification:


1. Price Metric:
   - Formula: Last Price / First Price
   - Value: $6.55 / $2.03 = 3.2266

2. Sharpe Ratio Metric:
   - Mean Daily Return: 0.021992
   - Std Dev Daily Return: 0.084482
   - Formula: (Mean / Std Dev) * sqrt(252)
   - Value: (0.021992 / 0.084482) * 15.87 = 4.1325

3. Sharpe (ATR) Metric:
   - Mean Daily Return: 0.021992 (same as above)
   - Average ATR Percent (ATRP): 0.078656
   - Formula: Mean Daily Return / ATRP
   - Value: 0.021992 / 0.078656 = 0.2796



### B. Forward Period Analysis ('Out-of-Sample')

**Actual Dates Used:** `2023-08-01` to `2023-08-08` (6 trading days)

#### `FwdGain` Verification:

  - Fwd Start Price (same as Calc End Price): $6.55
  - Fwd End Price   (on 2023-08-08): $5.71
  - FwdGain = ($5.71 / $6.55) - 1 = -12.82%


### C. Final Summary Table

Unnamed: 0,Metric,Calculated Value,Corresponds To DataFrame Column
0,End of Calc Price,$6.55,`CalcPrice`
1,Calc Period Gain,222.66%,`CalcGain`
2,Forward Period Gain,-12.82%,`FwdGain`
3,---,---,---
4,Metric: Price,3.2266,`MetricValue`
5,Metric: Sharpe,4.1325,`MetricValue`
6,Metric: Sharpe (ATR),0.2796,`MetricValue`



✅ Detailed data for 'ACHR' exported to 'verification_ACHR_20230501.csv'


### **J. Welles Wilder Jr. definition ATR that captures overnight gaps.**

Let's be precise:

*   **We WILL calculate** the **True Range (TR)** for each day, which is the maximum of:
    1.  `(Today's High - Today's Low)`
    2.  `abs(Today's High - Yesterday's Close)`
    3.  `abs(Today's Low - Yesterday's Close)`
*   Then, we will typically calculate the **Average True Range (ATR)**, which is a **Exponential Moving Average** of this True Range value. This is the standard industry indicator for volatility that includes intraday and overnight movement.

### **Sharpe (ATR) Calculation**

You are correct that the ATR calculation should be tied to the `Period` dropdown, but here's how we'll do it in a standard, robust way:

**Step 1: Pre-computation (Done once for all stocks)**

First, for the entire historical dataset, we will calculate a standard **14-day Average True Range (ATR)**. This is the industry standard lookback period defined by Wilder. This step creates a new, continuous ATR data series for every single stock, just like we have an 'Adj Close' series.

**Step 2: On-the-Fly Calculation (When you click "Update Chart")**

This is where your interpretation comes in. When you make your selections in the dropdowns (e.g., `Metric: Return/ATR`, `Period: 3M`):

1.  We slice the **daily returns** data to get only the last **3 months**.
2.  We also slice the pre-computed **14-day ATR** data to get only the last **3 months**.
3.  We then calculate:
    *   **Numerator:** The `mean()` of the daily returns over those 3 months.
    *   **Denominator:** The `mean()` of the ATR values over those 3 months.
4.  The final score for the new metric is `Numerator / Denominator`.

**In short: The `Period` dropdown defines the window of data we analyze, not the lookback period of the ATR indicator itself.**

This is the best practice because it ensures we are always comparing apples to apples. We're using a consistent measure of volatility (14-day ATR) and seeing how it behaves over different analysis windows (1D, 3M, 1Y, etc.).

### ATR_14 Calculation  `pandas.ewm(..., adjust=False)`
You have hit upon the most subtle, confusing, and important aspect of how pandas calculates Exponential Moving Averages.

Your thinking is perfectly logical, and your calculation of the 14-period simple average (`4.33614...`) is **100% correct.** The reason it doesn't match the CSV is the key detail.

The discrepancy arises because the method we are using, `pandas.ewm(..., adjust=False)`, **does not use a simple average as a "seed" value.** This is a very common misconception because the "SMA seed" method is often taught for manual calculation.

Instead, `ewm(adjust=False)` applies the exponential smoothing formula **recursively from the very first data point.**

### How `ewm(adjust=False)` Actually Works
atr = tr.ewm(alpha=1/14, adjust=False).mean()

Let's trace it with your data.
`alpha = 1/14` and `1 - alpha = 13/14`.

1.  **First Valid `True_Range` (Jan 4):** `4.283`
    *   The `ewm` starts here. The first ATR is simply the first value.
    *   **ATR on Jan 4 = 4.283**. (Matches your CSV)

2.  **Second Valid `True_Range` (Jan 5):** `3.956`
    *   `ATR = (Current_TR * 1/14) + (Previous_ATR * 13/14)`
    *   `ATR = (3.956 * 1/14) + (4.283 * 13/14)`
    *   `ATR = 0.2825714 + 3.9774286`
    *   **ATR on Jan 5 = 4.25999...**. (Rounds to `4.25964...` in your CSV due to higher precision floating point math). This is a match.

### The correct formula to link the periods is:

`(1 + Full Gain) = (1 + Calc Gain) * (1 + Fwd Gain)`