In [21]:
import pandas as pd
import plotly.io as pio
from pathlib import Path

# # Get the string name of the currently active default renderer
# default_renderer_name = pio.renderers.default

# # Use that name to access the renderer object and set its config
# # This is the line that fixes the AttributeError
# pio.renderers[default_renderer_name].config = {'doubleClickDelay': 300}

# print(f"Plotly environment configured for the '{default_renderer_name}' renderer.")
# print(f"Double-click delay is now set to {pio.renderers[default_renderer_name].config['doubleClickDelay']}ms.")

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000)

download_path = Path.home() / "Downloads"  
OHLCV_file_path = r'c:\Users\ping\Files_win10\python\py311\stocks\data\df_OHLCV_clean_stocks_etfs.parquet'

df_OHLCV = pd.read_parquet(OHLCV_file_path, engine='pyarrow')

In [22]:
df_OHLCV.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 366000 entries, ('A', Timestamp('2024-09-25 00:00:00')) to ('ZWS', Timestamp('2025-09-24 00:00:00'))
Data columns (total 5 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   Adj Open   366000 non-null  float64
 1   Adj High   366000 non-null  float64
 2   Adj Low    366000 non-null  float64
 3   Adj Close  366000 non-null  float64
 4   Volume     366000 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 15.4+ MB


### The New "Walk-Forward" Analyzer Function

Here is the complete code. You can add this new function to your project; it does not replace or interfere with our previous functions.

In [46]:
def plot_walk_forward_analyzer(df_ohlcv, 
                               default_start_date=None,
                               default_calc_period='3M', 
                               default_fwd_period='1M',
                               default_metric='Sharpe (ATR)',
                               default_rank_start=1,
                               default_rank_end=10):
    """
    Creates a robust, interactive walk-forward analysis plot.
    Returns a container whose first element is updated with a results DataFrame
    that includes both calculation gain ('CalcGain') and forward-looking gain ('FwdGain').
    """
    # ... (Sections 1 and 2: Setup, unstacking, and widget creation are unchanged) ...
    print("Initializing Walk-Forward Analyzer...")
    if not isinstance(df_ohlcv.index, pd.MultiIndex):
        raise ValueError("Input DataFrame must have a (Ticker, Date) MultiIndex.")
    df_ohlcv = df_ohlcv.sort_index()
    print("Pre-processing data (unstacking)...")
    df_close_full = df_ohlcv['Adj Close'].unstack(level=0)
    df_high_full = df_ohlcv['Adj High'].unstack(level=0)
    df_low_full = df_ohlcv['Adj Low'].unstack(level=0)
    df_close_full.index = pd.to_datetime(df_close_full.index)
    df_high_full.index = pd.to_datetime(df_high_full.index)
    df_low_full.index = pd.to_datetime(df_low_full.index)
    min_date_available = df_close_full.index.min()
    max_date_available = df_close_full.index.max()
    if default_start_date is None:
        default_start_date = max_date_available.date() - pd.DateOffset(years=1)
    start_date_picker = widgets.DatePicker(description='Start Date:', value=default_start_date, disabled=False)
    calc_period_options = {'1M': pd.DateOffset(months=1), '3M': pd.DateOffset(months=3), '6M': pd.DateOffset(months=6), '1Y': pd.DateOffset(years=1)}
    fwd_period_options = {'0D': pd.DateOffset(days=0), '1W': pd.DateOffset(weeks=1), '2W': pd.DateOffset(weeks=2), '1M': pd.DateOffset(months=1), '3M': pd.DateOffset(months=3)}
    calc_period_dropdown = widgets.Dropdown(options=calc_period_options.keys(), value=default_calc_period, description='Calc Period:')
    fwd_period_dropdown = widgets.Dropdown(options=fwd_period_options.keys(), value=default_fwd_period, description='Fwd Period:')
    metrics = ['Price', 'Sharpe', 'Sharpe (ATR)']
    metric_dropdown = widgets.Dropdown(options=metrics, value=default_metric, description='Metric:')
    rank_options = [1, 5, 10, 20, 30, 40, 50, 75, 100]
    rank_start_dropdown = widgets.Dropdown(options=rank_options, value=default_rank_start, description='Rank Start:')
    rank_end_dropdown = widgets.Dropdown(options=rank_options, value=default_rank_end, description='Rank End:')
    update_button = widgets.Button(description="Update Chart", button_style='primary')
    ticker_list_output = widgets.Output()
    results_container = [None]
    fig = go.FigureWidget()
    max_traces = 50
    for i in range(max_traces):
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'placeholder_{i}', visible=False, showlegend=False))


    # 3. Define the Update Logic
    def update_plot(button_click):
        ticker_list_output.clear_output()
        
        start_date = pd.to_datetime(start_date_picker.value)
        calc_period = calc_period_options[calc_period_dropdown.value]
        fwd_period = fwd_period_options[fwd_period_dropdown.value]
        metric = metric_dropdown.value
        rank_start, rank_end = rank_start_dropdown.value, rank_end_dropdown.value

        if rank_start > rank_end:
            with ticker_list_output: print("Error: 'Rank Start' must be <= 'Rank End'.")
            return

        # --- Define and Cap ALL date boundaries before slicing ---
        calc_end_date_theoretical = start_date + calc_period
        viz_end_date_theoretical = calc_end_date_theoretical + fwd_period
        safe_start_date = max(start_date, min_date_available)
        safe_calc_end_date = min(calc_end_date_theoretical, max_date_available)
        safe_viz_end_date = min(viz_end_date_theoretical, max_date_available)
        
        if safe_start_date >= safe_calc_end_date:
            with ticker_list_output: print(f"Error: Invalid date range. The calculation period has no data.")
            return

        # --- Perform calculations on SAFE data slices ---
        calc_close = df_close_full.loc[safe_start_date:safe_calc_end_date]
        if len(calc_close) < 2:
            with ticker_list_output: print("Error: Not enough data in the calculation period to rank.")
            return

        # ... (Metric calculation logic is unchanged) ...
        metric_values = {}
        first_prices, last_prices = calc_close.bfill().iloc[0], calc_close.ffill().iloc[-1]
        metric_values['Price'] = (last_prices / first_prices).dropna()
        daily_returns = calc_close.pct_change()
        mean_returns, std_returns = daily_returns.mean(), daily_returns.std()
        metric_values['Sharpe'] = (mean_returns / std_returns * np.sqrt(252)).fillna(0)
        calc_high = df_high_full.loc[safe_start_date:safe_calc_end_date]
        calc_low = df_low_full.loc[safe_start_date:safe_calc_end_date]
        high_low = calc_high - calc_low
        high_prev_close = abs(calc_high - df_close_full.shift(1).loc[safe_start_date:safe_calc_end_date])
        low_prev_close = abs(calc_low - df_close_full.shift(1).loc[safe_start_date:safe_calc_end_date])
        tr = np.maximum(high_low, np.maximum(high_prev_close, low_prev_close))
        atr = tr.ewm(alpha=1/14, adjust=False).mean()
        atrp = (atr / calc_close).mean()
        metric_values['Sharpe (ATR)'] = (mean_returns / atrp).fillna(0)
        
        sorted_tickers = metric_values[metric].sort_values(ascending=False)
        tickers_to_display = sorted_tickers.index[rank_start-1:rank_end].tolist()

        # ... (Plotting preparation and figure updates are unchanged) ...
        normalized_plot_data = df_close_full[tickers_to_display].loc[safe_start_date:safe_viz_end_date]
        normalized_plot_data = normalized_plot_data.div(normalized_plot_data.bfill().iloc[0])
        actual_calc_end_ts = calc_close.index.max()
        actual_viz_start_date = normalized_plot_data.index.min().date()
        actual_viz_end_date = normalized_plot_data.index.max().date()
        actual_calc_end_date = actual_calc_end_ts.date()
        with fig.batch_update():
            fig.layout.shapes = [] 
            fig.add_shape(type="line", x0=actual_calc_end_ts, y0=0, x1=actual_calc_end_ts, y1=1, xref='x', yref='paper', line=dict(color="grey", width=2, dash="dash"))
            for i in range(max_traces):
                trace = fig.data[i]
                if i < len(tickers_to_display):
                    ticker = tickers_to_display[i]
                    trace.x, trace.y, trace.name = normalized_plot_data.index, normalized_plot_data[ticker], ticker
                    trace.visible, trace.showlegend = True, True
                else:
                    trace.visible, trace.showlegend = False, False

        # <<< NEW: Calculate CalcGain, CalcPrice and FwdGain >>>
        calc_start_prices = df_close_full.asof(safe_start_date)
        calc_end_prices = df_close_full.asof(actual_calc_end_ts)
        viz_end_prices = df_close_full.asof(safe_viz_end_date)
        
        calc_gains = (calc_end_prices / calc_start_prices) - 1
        fwd_gains = (viz_end_prices / calc_end_prices) - 1
        
        # Build the final results DataFrame with the new columns
        results_df = pd.DataFrame({
            'Rank': range(rank_start, rank_start + len(tickers_to_display)),
            'Metric': metric,
            'MetricValue': sorted_tickers.loc[tickers_to_display].values,
            'CalcPrice': calc_end_prices.loc[tickers_to_display],
            'CalcGain': calc_gains.loc[tickers_to_display],
            'CalcPeriod': calc_period_dropdown.value,
            'FwdPeriod': fwd_period_dropdown.value,
            'FwdGain': fwd_gains.loc[tickers_to_display]
        }, index=pd.Index(tickers_to_display, name='Ticker'))
        
        # Reorder columns to the desired format
        results_df = results_df[['Rank', 'Metric', 'MetricValue', 'CalcPrice', 'CalcGain', 'CalcPeriod', 'FwdPeriod', 'FwdGain']]
        
        results_container[0] = results_df
        
        # ... (UI printout is unchanged) ...
        with ticker_list_output:
             print(f"Analyzing from {actual_viz_start_date} to {actual_viz_end_date}.")
             print(f"  - Ranking based on performance from {actual_viz_start_date} to {actual_calc_end_date}.")
             pprint.pprint(tickers_to_display, width=120, compact=True)

    # 4. Final Layout and Display (Unchanged)
    fig.update_layout(title_text='Walk-Forward Performance Analysis', xaxis_title='Date', 
                      yaxis_title='Normalized Price (Start = 1)', hovermode='x unified', 
                      legend_title_text='Tickers (Ranked)', height=700, margin=dict(t=50))
    fig.add_hline(y=1, line_width=1, line_dash="dash", line_color="grey")
    update_button.on_click(update_plot)
    controls_row1 = widgets.HBox([start_date_picker, calc_period_dropdown, fwd_period_dropdown])
    controls_row2 = widgets.HBox([metric_dropdown, rank_start_dropdown, rank_end_dropdown, update_button])
    ui_container = widgets.VBox([controls_row1, controls_row2, ticker_list_output], layout=widgets.Layout(margin='10px 0 20px 0'))
    display(ui_container, fig)
    update_plot(None)
    
    return results_container


In [None]:
# --- Example Usage of the Walk-Forward Analyzer ---

# Assume 'long_format_df' is your master OHLCV DataFrame

# You can specify a starting point, or let it default
start_of_year = date(2024, 9, 25)

# Call the new function
walk_forward_results = plot_walk_forward_analyzer(
    df_OHLCV,
    default_start_date=start_of_year,
    default_calc_period='6M',
    default_fwd_period='3M',
    default_metric='Sharpe',
    default_rank_start=1,
    default_rank_end=5
)

Initializing Walk-Forward Analyzer...
Pre-processing data (unstacking)...


VBox(children=(HBox(children=(DatePicker(value=datetime.date(2024, 9, 25), description='Start Date:', step=1),…

FigureWidget({
    'data': [{'mode': 'lines',
              'name': 'placeholder_0',
              'showlegend': False,
              'type': 'scatter',
              'uid': '59e83bbd-e0e6-41d9-b820-b74d14731f28',
              'visible': False,
              'x': [None],
              'y': [None]},
             {'mode': 'lines',
              'name': 'placeholder_1',
              'showlegend': False,
              'type': 'scatter',
              'uid': 'a5b776e1-6d67-4576-9a6a-1d7b40758e24',
              'visible': False,
              'x': [None],
              'y': [None]},
             {'mode': 'lines',
              'name': 'placeholder_2',
              'showlegend': False,
              'type': 'scatter',
              'uid': '8863b985-834c-4361-bf4a-edbcad5bbad3',
              'visible': False,
              'x': [None],
              'y': [None]},
             {'mode': 'lines',
              'name': 'placeholder_3',
              'showlegend': False,
              'type': 

In [48]:
print(walk_forward_results[0].columns)
walk_forward_results

Index(['Rank', 'Metric', 'MetricValue', 'CalcPrice', 'CalcGain', 'CalcPeriod', 'FwdPeriod', 'FwdGain'], dtype='object')


[        Rank        Metric  MetricValue  CalcPrice  CalcGain CalcPeriod FwdPeriod   FwdGain
 Ticker                                                                                     
 MINT      10  Sharpe (ATR)     0.557859    96.2784  0.004288         1M        1W  0.001096
 BILS      11  Sharpe (ATR)     0.551168    95.6298  0.003106         1M        1W  0.001259
 SOFI      12  Sharpe (ATR)     0.534707    10.9900  0.421734         1M        1W  0.004550
 PULS      13  Sharpe (ATR)     0.519447    47.5487  0.004218         1M        1W  0.000707
 OKLO      14  Sharpe (ATR)     0.506498    19.1100  1.321993         1M        1W  0.133961
 FI        15  Sharpe (ATR)     0.462273   199.5200  0.128507         1M        1W  0.012680
 BAH       16  Sharpe (ATR)     0.450496   178.6130  0.137699         1M        1W -0.003572
 JAAA      17  Sharpe (ATR)     0.444306    48.3024  0.005000         1M        1W  0.002029
 SRLN      18  Sharpe (ATR)     0.361180    39.0536  0.012649         

### **J. Welles Wilder Jr. definition ATR that captures overnight gaps.**

Let's be precise:

*   **We WILL calculate** the **True Range (TR)** for each day, which is the maximum of:
    1.  `(Today's High - Today's Low)`
    2.  `abs(Today's High - Yesterday's Close)`
    3.  `abs(Today's Low - Yesterday's Close)`
*   Then, we will typically calculate the **Average True Range (ATR)**, which is a **Exponential Moving Average** of this True Range value. This is the standard industry indicator for volatility that includes intraday and overnight movement.

### **Return/ATR Calculation**

You are correct that the ATR calculation should be tied to the `Period` dropdown, but here's how we'll do it in a standard, robust way:

**Step 1: Pre-computation (Done once for all stocks)**

First, for the entire historical dataset, we will calculate a standard **14-day Average True Range (ATR)**. This is the industry standard lookback period defined by Wilder. This step creates a new, continuous ATR data series for every single stock, just like we have an 'Adj Close' series.

**Step 2: On-the-Fly Calculation (When you click "Update Chart")**

This is where your interpretation comes in. When you make your selections in the dropdowns (e.g., `Metric: Return/ATR`, `Period: 3M`):

1.  We slice the **daily returns** data to get only the last **3 months**.
2.  We also slice the pre-computed **14-day ATR** data to get only the last **3 months**.
3.  We then calculate:
    *   **Numerator:** The `mean()` of the daily returns over those 3 months.
    *   **Denominator:** The `mean()` of the ATR values over those 3 months.
4.  The final score for the new metric is `Numerator / Denominator`.

**In short: The `Period` dropdown defines the window of data we analyze, not the lookback period of the ATR indicator itself.**

This is the best practice because it ensures we are always comparing apples to apples. We're using a consistent measure of volatility (14-day ATR) and seeing how it behaves over different analysis windows (1D, 3M, 1Y, etc.).

In [None]:
# # --- NEW: Initialize the variable before the call ---
# results = [] 

# Call the final function and capture the returned list
# The 'plotted_tickers' list will be updated in place every time you click the button.
results = plot_interactive_performers_widget(
    df_OHLCV, 
    default_metric='Sharpe', 
    default_period='1Y', 
    default_rank_start=20, 
    default_rank_end=40
)

# print("\n--- After the plot, the 'plotted_tickers' variable holds the last displayed list: ---")
# print(plotted_tickers)

In [None]:
print("\n--- After the plot, the 'plotted_tickers' variable holds the last displayed list: ---")
print(results[0])

In [None]:
# 2. Access the results DataFrame
# You can run this cell at any time to see the *current* results
# based on your selections in the widget.

current_results_df = results[0]

if current_results_df is not None:
    print("Successfully retrieved the results DataFrame:")
    display(current_results_df)
    
    # You can now work with this DataFrame
    print("\nThe best performing ticker in this set is:", current_results_df.index[0])
    print("Its Sharpe Ratio was:", current_results_df['MetricValue'].iloc[0])
else:
    print("No results to display. This might be due to an error or lack of data for the selected period.")

### Plot Tickers

In [None]:
results_list = list(results[0].index)
print(results_list)

In [None]:
my_tickers = ['SPY', 'VGT', 'QQQ', 'NVDA', 'GOOG', 'META', 'ORCL', 'APP', 'U', 'B']
print(my_tickers + results_list) 

In [None]:
# --- 2. Define the list of tickers you want to keep ---
tickers_to_plot = list(set(my_tickers + results_list))



# --- 3. The Efficient Filtering Code ---
# This single line performs a fast, index-based selection.
# The result preserves the original MultiIndex structure and format.
filtered_df = df_OHLCV.loc[tickers_to_plot]


# --- 4. Verification ---
print("Filtered DataFrame created. Shape:", filtered_df.shape)
print("Filtered DataFrame Index Levels:", filtered_df.index.names)

# Verify that only the desired tickers are present
remaining_tickers = filtered_df.index.get_level_values('Ticker').unique().tolist()
print("Tickers remaining in filtered_df:", remaining_tickers)
assert set(tickers_to_plot) == set(remaining_tickers)

print("\nAll checks passed. The format is preserved correctly.")
display(filtered_df.head())

### Load df_finviz

In [None]:
finviz_file_path = r'C:\Users\ping\Files_win10\python\py311\stocks\data\2025-09-23_df_finviz_merged_stocks_etfs.parquet'

In [None]:
finviz_file_path = r'C:\Users\ping\Files_win10\python\py311\stocks\data\2025-09-23_df_finviz_merged_stocks_etfs.parquet'
df_finviz = pd.read_parquet(finviz_file_path, engine='pyarrow')

print(f'df_finviz :\n{df_finviz }')

In [None]:
print(df_finviz.loc[plotted_tickers])

### Verification Workflow Example

In [None]:
# def inspect_ticker_data(df_ohlcv, ticker, period):
#     """
#     Provides a detailed, transparent breakdown of performance metric calculations for a single ticker over a specific period.
    
#     Returns a pandas DataFrame containing the original OHLCV data plus all intermediate columns
#     used to calculate Price, Sharpe, and Return/ATR metrics. This serves as a "calculation worksheet"
#     for easy verification.
#     """
#     # 1. Initial Data Slicing and Validation
#     if ticker not in df_ohlcv.index.get_level_values(0):
#         print(f"Error: Ticker '{ticker}' not found in the DataFrame.")
#         return
        
#     ticker_df = df_ohlcv.loc[ticker].copy()
#     ticker_df.index = pd.to_datetime(ticker_df.index)
#     end_date = ticker_df.index.max()
    
#     periods = {
#         '1D': end_date - pd.DateOffset(days=1), '5D': end_date - pd.DateOffset(days=5),
#         '3M': end_date - pd.DateOffset(months=3), '6M': end_date - pd.DateOffset(months=6),
#         'YTD': datetime(end_date.year, 1, 1), '1Y': end_date - pd.DateOffset(years=1),
#         '5Y': end_date - pd.DateOffset(years=5),
#     }
    
#     if period not in periods:
#         print(f"Error: Period '{period}' is not a valid option.")
#         return
        
#     start_date = periods[period]
#     period_df = ticker_df.loc[start_date:end_date].copy()
    
#     if len(period_df) < 2:
#         print(f"Not enough data for ticker '{ticker}' in period '{period}'.")
#         return

#     # 2. Add Calculation Columns to the DataFrame
#     # --- Price Performance Columns ---
#     start_price = period_df['Adj Close'].bfill().iloc[0]
#     period_df['Normalized Price'] = period_df['Adj Close'] / start_price

#     # --- Sharpe Ratio Columns ---
#     period_df['Daily Return'] = period_df['Adj Close'].pct_change()

#     # --- Return/ATR Columns (matches the main function's logic) ---
#     period_df['Prev Close'] = period_df['Adj Close'].shift(1)
#     period_df['High-Low'] = period_df['Adj High'] - period_df['Adj Low']
#     period_df['High-PrevClose'] = abs(period_df['Adj High'] - period_df['Prev Close'])
#     period_df['Low-PrevClose'] = abs(period_df['Adj Low'] - period_df['Prev Close'])
#     period_df['True Range'] = period_df[['High-Low', 'High-PrevClose', 'Low-PrevClose']].max(axis=1)
#     # Use the same ewm parameters as the main plotting function for consistency
#     period_df['ATR'] = period_df['True Range'].ewm(alpha=1/14, adjust=False).mean()

#     # 3. Calculate Final Metrics for Summary Printout
#     # Price
#     end_price = period_df['Adj Close'].ffill().iloc[-1]
#     normalized_price = end_price / start_price
    
#     # Sharpe
#     mean_daily_return = period_df['Daily Return'].mean()
#     std_daily_return = period_df['Daily Return'].std()
#     daily_sharpe = 0 if std_daily_return == 0 else mean_daily_return / std_daily_return
#     annualized_sharpe = daily_sharpe * np.sqrt(252)

#     # Return/ATR
#     mean_atr = period_df['ATR'].mean()
#     return_on_atr = 0 if mean_atr == 0 else mean_daily_return / mean_atr

#     # 4. Display Summary and DataFrame
#     display(Markdown(f"### Sanity Check for Ticker: `{ticker}` | Period: `{period}`"))
    
#     display(Markdown("**1. Price Performance Calculation:**"))
#     print(f"   - Start Date ({period_df.index.min().date()}): {start_price:,.2f}")
#     print(f"   - End Date   ({period_df.index.max().date()}): {end_price:,.2f}")
#     print(f"   - Final Normalized Price: ({end_price:,.2f} / {start_price:,.2f}) = {normalized_price:.4f}\n")

#     display(Markdown("**2. Sharpe Ratio Calculation:**"))
#     print(f"   - Number of Trading Days: {len(period_df)}")
#     print(f"   - Mean of Daily Returns: {mean_daily_return:.6f}")
#     print(f"   - Std Dev of Daily Returns: {std_daily_return:.6f}")
#     print(f"   - Annualized Sharpe Ratio: {annualized_sharpe:.4f}\n")
    
#     display(Markdown("**3. Return/ATR Calculation:**"))
#     print(f"   - Mean of Daily Returns: {mean_daily_return:.6f}")
#     print(f"   - Mean ATR (14-day EWM): {mean_atr:.6f}")
#     print(f"   - Return / Mean ATR: {return_on_atr:.6f}\n")

#     display(Markdown("**4. Underlying Data and Calculation Worksheet:**"))
#     # Define a clean column order for the final display
#     display_cols = [
#         'Adj Open', 'Adj High', 'Adj Low', 'Adj Close', # Original Data
#         'Normalized Price', 'Daily Return',             # Metric Columns
#         'Prev Close', 'True Range', 'ATR'               # ATR Intermediates
#     ]
#     # Filter for columns that actually exist to avoid errors if some are missing
#     display_cols = [col for col in display_cols if col in period_df.columns]
#     with pd.option_context('display.max_rows', 15):
#         display(period_df[display_cols])
    
#     return period_df

In [None]:
# --- Helper "Sanity Check" Function (MODIFIED for Sharpe (ATR)) ---
def inspect_ticker_data(df_ohlcv, ticker, period):
    """
    Provides a detailed, transparent breakdown of performance metric calculations 
    for a single ticker over a specific period.
    ...
    """
    # ... (Section 1: Initial Data Slicing is unchanged) ...
    if ticker not in df_ohlcv.index.get_level_values(0):
        print(f"Error: Ticker '{ticker}' not found in the DataFrame.")
        return
    ticker_df = df_ohlcv.loc[ticker].copy()
    ticker_df.index = pd.to_datetime(ticker_df.index)
    end_date = ticker_df.index.max()
    periods = {
        '1D': end_date - pd.DateOffset(days=1), '5D': end_date - pd.DateOffset(days=5),
        '3M': end_date - pd.DateOffset(months=3), '6M': end_date - pd.DateOffset(months=6),
        'YTD': datetime(end_date.year, 1, 1), '1Y': end_date - pd.DateOffset(years=1),
        '5Y': end_date - pd.DateOffset(years=5),
    }
    if period not in periods:
        print(f"Error: Period '{period}' is not a valid option.")
        return
    start_date = periods[period]
    period_df = ticker_df.loc[start_date:end_date].copy()
    if len(period_df) < 2:
        print(f"Not enough data for ticker '{ticker}' in period '{period}'.")
        return

    # 2. Add Calculation Columns to the DataFrame
    # Price and Sharpe columns (unchanged)
    start_price = period_df['Adj Close'].bfill().iloc[0]
    period_df['Normalized Price'] = period_df['Adj Close'] / start_price
    period_df['Daily Return'] = period_df['Adj Close'].pct_change()

    # <<< MODIFIED: Logic for ATRp calculation >>>
    period_df['Prev Close'] = period_df['Adj Close'].shift(1)
    period_df['High-Low'] = period_df['Adj High'] - period_df['Adj Low']
    period_df['High-PrevClose'] = abs(period_df['Adj High'] - period_df['Prev Close'])
    period_df['Low-PrevClose'] = abs(period_df['Adj Low'] - period_df['Prev Close'])
    period_df['True Range'] = period_df[['High-Low', 'High-PrevClose', 'Low-PrevClose']].max(axis=1)
    period_df['ATR'] = period_df['True Range'].ewm(alpha=1/14, adjust=False).mean()
    # New column for percentage ATR
    period_df['ATRp'] = period_df['ATR'] / period_df['Adj Close'] 

    # 3. Calculate Final Metrics for Summary Printout
    # Price and Sharpe metrics (unchanged)
    end_price = period_df['Adj Close'].ffill().iloc[-1]
    normalized_price = end_price / start_price
    mean_daily_return = period_df['Daily Return'].mean()
    std_daily_return = period_df['Daily Return'].std()
    annualized_sharpe = (0 if std_daily_return == 0 else mean_daily_return / std_daily_return) * np.sqrt(252)

    # <<< MODIFIED: Final Sharpe (ATR) metric calculation >>>
    mean_atrp = period_df['ATRp'].mean()
    sharpe_atr_value = 0 if mean_atrp == 0 else mean_daily_return / mean_atrp

    # 4. Display Summary and DataFrame
    display(Markdown(f"### Sanity Check for Ticker: `{ticker}` | Period: `{period}`"))
    
    # Price and Sharpe printout (unchanged)
    display(Markdown("**1. Price Performance Calculation:**"))
    print(f"   - Start Date ({period_df.index.min().date()}): {start_price:,.2f}")
    print(f"   - End Date   ({period_df.index.max().date()}): {end_price:,.2f}")
    print(f"   - Final Normalized Price: ({end_price:,.2f} / {start_price:,.2f}) = {normalized_price:.4f}\n")
    display(Markdown("**2. Sharpe Ratio Calculation:**"))
    print(f"   - Number of Trading Days: {len(period_df)}")
    print(f"   - Mean of Daily Returns: {mean_daily_return:.6f}")
    print(f"   - Std Dev of Daily Returns: {std_daily_return:.6f}")
    print(f"   - Annualized Sharpe Ratio: {annualized_sharpe:.4f}\n")
    
    # <<< MODIFIED: Renamed and updated Sharpe (ATR) printout >>>
    display(Markdown("**3. Sharpe (ATR) Calculation:**"))
    print(f"   - Mean of Daily Returns: {mean_daily_return:.6f}")
    print(f"   - Mean %ATR (ATRp): {mean_atrp:.6f}")
    print(f"   - Sharpe (ATR) Value: {sharpe_atr_value:.6f}\n")

    display(Markdown("**4. Underlying Data and Calculation Worksheet:**"))
    # <<< MODIFIED: Added ATRp to the display columns >>>
    display_cols = [
        'Adj Close', 'Normalized Price', 'Daily Return', 
        'ATR', 'ATRp'
    ]
    display_cols = [col for col in display_cols if col in period_df.columns]
    with pd.option_context('display.max_rows', 15):
        display(period_df[display_cols])
    
    return period_df

In [None]:
# --- 1. Run the Main Plot to Get the Results Container ---
_METRIC = 'Sharpe (ATR)'
_PERIOD = '3M'
_RANK_START = 20
_RANK_END = 30

print(f"--- Running main widget to get tickers ranked {_RANK_START}-{_RANK_END} for '{_METRIC}' over '{_PERIOD}' ---")

# Call the function and get the container
results_container = plot_interactive_performers_widget(
    df_OHLCV, 
    default_metric=_METRIC, 
    default_period=_PERIOD, 
    default_rank_start=_RANK_START, 
    default_rank_end=_RANK_END
)

# --- 2. Perform the Verification Check (CORRECTED LOGIC) ---
print("\n" + "="*80)
print("--- STARTING VERIFICATION PROCESS ---")
print("="*80 + "\n")

# Step 1: Extract the DataFrame from the container
results_df = results_container[0]

# Step 2: Check if the DataFrame exists and has enough rows
if results_df is not None and len(results_df) >= 5:
    
    # Step 3: Get ticker names from the DataFrame's index
    ticker_to_check_1 = results_df.index[0]  # The first ticker in the ranked DF
    ticker_to_check_2 = results_df.index[4]  # The fifth ticker in the ranked DF

    # The actual rank numbers from the 'Rank' column
    rank_1 = results_df['Rank'].iloc[0]
    rank_2 = results_df['Rank'].iloc[4]

    print(f"\n--- Verifying Rank #{rank_1}: {ticker_to_check_1} ---")
    inspection_df_1 = inspect_ticker_data(
        df_OHLCV, 
        ticker=ticker_to_check_1, 
        period=_PERIOD
    )

    print(f"\n--- Verifying Rank #{rank_2}: {ticker_to_check_2} ---")
    inspection_df_2 = inspect_ticker_data(
        df_OHLCV, 
        ticker=ticker_to_check_2, 
        period=_PERIOD
    )
    
    print("\nVerification successful. You can now compare the calculated metric values above.")
    print("The 'Sharpe (ATR) Value' for the first ticker should be higher than for the second.")

else:
    # Handle cases where the DataFrame is None or too short
    num_tickers = 0 if results_df is None else len(results_df)
    print(f"Could not perform verification check: The plotter returned fewer than 5 tickers ({num_tickers}).")

print("\n" + "="*80)
print("--- VERIFICATION PROCESS COMPLETE ---")
print("="*80)

In [None]:
import os
# safe, platform-independent way
inspection_df_1.to_csv(os.path.join(download_path, f'{ticker_to_check_1}.csv'))
inspection_df_2.to_csv(os.path.join(download_path, f'{ticker_to_check_2}.csv'))

In [None]:
cutoff = pd.Timestamp('today') - pd.DateOffset(months=3)
last_3m = df_OHLCV.loc[df_OHLCV.index >= cutoff].copy()
last_3m

In [None]:
df_OHLCV.info()

In [None]:
df_OHLCV.sort_index(inplace=True)
df_OHLCV.loc[(ticker_to_check_1, slice('2025-06-23', '2025-09-23')), :]

In [None]:
# --- Example Usage after running the main plotting cell ---

# Let's say the main plot is showing tickers for 'Return/ATR' over '6M'
_period = '3M' 

# Pick the top-ranked ticker from the list returned by the main function
if plotted_tickers:
    _ticker = plotted_tickers[0] 

    # Call the new inspection function
    inspection_df = inspect_ticker_data(
        df_OHLCV, # Use the same dataframe you passed to the plot
        ticker=_ticker, 
        period=_period
    )
else:
    print("Run the main plotting widget first to populate the 'plotted_tickers' list.")

In [None]:
_ticker = 'IREN'
_period = '3M'
_period_df = inspect_ticker_data(df_OHLCV, ticker=_ticker, period=_period)

In [None]:
_ticker = 'IREN'
_period = '3M'
_period_df = inspect_ticker_data(df_OHLCV, ticker=_ticker, period=_period)

In [None]:
import pandas as pd
import numpy as np
from IPython.display import display, Markdown

def inspect_ticker_data(df_ohlcv, ticker, period):
    """
    Performs a detailed calculation sanity check for a single ticker over a given period.

    Args:
        df_ohlcv (pd.DataFrame): The original 'long' format DataFrame with a 
                                 (Ticker, Date) MultiIndex.
        ticker (str): The ticker symbol to inspect (e.g., 'NVDA').
        period (str): The period to analyze (e.g., '1Y', '3M', '5D').
    """
    # --- 1. Data Preparation ---
    
    # Check if the ticker exists
    if ticker not in df_ohlcv.index.get_level_values(0):
        print(f"Error: Ticker '{ticker}' not found in the DataFrame.")
        return

    # Select all data for the specified ticker and create a clean DataFrame
    ticker_df = df_ohlcv.loc[ticker].copy()
    ticker_df.index = pd.to_datetime(ticker_df.index)
    
    # Define the start date based on the period
    end_date = ticker_df.index.max()
    periods = {
        '1D': end_date - pd.DateOffset(days=1), '5D': end_date - pd.DateOffset(days=5),
        '3M': end_date - pd.DateOffset(months=3), '6M': end_date - pd.DateOffset(months=6),
        'YTD': datetime(end_date.year, 1, 1), '1Y': end_date - pd.DateOffset(years=1),
        '5Y': end_date - pd.DateOffset(years=5),
    }
    
    if period not in periods:
        print(f"Error: Period '{period}' is not a valid option. Choose from {list(periods.keys())}")
        return
        
    start_date = periods[period]
    
    # Slice the DataFrame for the requested period
    period_df = ticker_df.loc[start_date:end_date].copy()

    if len(period_df) < 2:
        print(f"Not enough data for ticker '{ticker}' in period '{period}' to perform calculations.")
        return

    # --- 2. Perform and Display Calculations ---

    display(Markdown(f"### Sanity Check for Ticker: `{ticker}` | Period: `{period}`"))

    # Price Performance
    start_price = period_df['Adj Close'].bfill().iloc[0]
    end_price = period_df['Adj Close'].ffill().iloc[-1]
    normalized_price = end_price / start_price
    
    display(Markdown("**1. Price Performance Calculation:**"))
    print(f"   - Start Date ({period_df.index.min().date()}): {start_price:,.2f}")
    print(f"   - End Date   ({period_df.index.max().date()}): {end_price:,.2f}")
    print(f"   - Final Normalized Price: ({end_price:,.2f} / {start_price:,.2f}) = {normalized_price:.4f}\n")

    # Sharpe Ratio
    period_df['Daily Return'] = period_df['Adj Close'].pct_change()
    mean_daily_return = period_df['Daily Return'].mean()
    std_daily_return = period_df['Daily Return'].std()
    
    # Handle case where standard deviation is zero (flat price)
    daily_sharpe = 0 if std_daily_return == 0 else mean_daily_return / std_daily_return
    annualized_sharpe = daily_sharpe * np.sqrt(252)

    display(Markdown("**2. Sharpe Ratio Calculation:**"))
    print(f"   - Number of Trading Days: {len(period_df)}")
    print(f"   - Mean of Daily Returns: {mean_daily_return:.6f}")
    print(f"   - Std Dev of Daily Returns: {std_daily_return:.6f}")
    print(f"   - Daily Sharpe Ratio: ({mean_daily_return:.6f} / {std_daily_return:.6f}) = {daily_sharpe:.4f}")
    print(f"   - Annualized Sharpe Ratio: ({daily_sharpe:.4f} * sqrt(252)) = {annualized_sharpe:.4f}\n")
    
    # --- 3. Display Underlying Data ---
    display(Markdown("**3. Underlying Data Sample:**"))
    display(period_df[['Adj Close', 'Daily Return']].head())
    if len(period_df) > 10:
        print("   ...")
        display(period_df[['Adj Close', 'Daily Return']].tail())
        
    return period_df[['Adj Close', 'Daily Return']]



In [None]:
_ticker = 'IREN'
_period = '3M'
_period_df = inspect_ticker_data(df_OHLCV, ticker=_ticker, period=_period)

In [None]:
_ticker_df = df_OHLCV.loc[_ticker].copy()

##################

In [None]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import numpy as np
import ipywidgets as widgets
from IPython.display import display
import pprint # Import the pretty-print library

def plot_interactive_performers_widget(df_ohlcv, 
                                       default_metric='Price',
                                       default_period='1Y', 
                                       default_rank_start=1,
                                       default_rank_end=10):
    """
    Creates a robust interactive plot using ipywidgets for state management.
    Allows for selecting tickers by a slice of their rank (e.g., 10th to 20th).
    Returns a list that is updated with the currently displayed tickers.
    """
    # --- [Section 1 & 2: Data Prep & Pre-computation] ---
    print("Reshaping and pre-computing all performance data...")
    if not isinstance(df_ohlcv.index, pd.MultiIndex) or 'Adj Close' not in df_ohlcv.columns:
        raise ValueError("Expected 'long' format with (Ticker, Date) MultiIndex and 'Adj Close' column.")
    df_close = df_ohlcv['Adj Close'].unstack(level=0)
    df_close.index = pd.to_datetime(df_close.index)
    end_date = df_close.index.max()
    metrics = ['Price', 'Sharpe']
    periods = {
        '1D': end_date - pd.DateOffset(days=1), '5D': end_date - pd.DateOffset(days=5),
        '3M': end_date - pd.DateOffset(months=3), '6M': end_date - pd.DateOffset(months=6),
        'YTD': datetime(end_date.year, 1, 1), '1Y': end_date - pd.DateOffset(years=1),
        '5Y': end_date - pd.DateOffset(years=5),
    }
    all_rankings = {metric: {} for metric in metrics}
    period_normalized_data = {}
    for label, start_date in periods.items():
        period_df = df_close.loc[start_date:end_date]
        if period_df.empty or len(period_df) < 2: continue
        first_prices, last_prices = period_df.bfill().iloc[0], period_df.ffill().iloc[-1]
        all_rankings['Price'][label] = (last_prices / first_prices).dropna().sort_values(ascending=False).index.tolist()
        daily_returns = period_df.pct_change()
        mean_returns, std_returns = daily_returns.mean(), daily_returns.std()
        sharpe_ratio = (mean_returns / std_returns).fillna(0)
        all_rankings['Sharpe'][label] = (sharpe_ratio * np.sqrt(252)).sort_values(ascending=False).index.tolist()
        period_normalized_data[label] = period_df.div(first_prices)
    print("Pre-computation finished.")

    # --- 3. Create ipywidgets Controls & Data Holder ---
    rank_options = [1, 5, 10, 20, 30, 40, 50, 75, 100]
    max_traces = 50 
    metric_dropdown = widgets.Dropdown(options=metrics, value=default_metric, description='Metric:')
    period_dropdown = widgets.Dropdown(options=list(periods.keys()), value=default_period, description='Period:')
    rank_start_dropdown = widgets.Dropdown(options=rank_options, value=default_rank_start, description='Rank Start:')
    rank_end_dropdown = widgets.Dropdown(options=rank_options, value=default_rank_end, description='Rank End:')
    update_button = widgets.Button(description="Update Chart", button_style='primary')
    ticker_list_output = widgets.Output()
    
    _currently_displayed_tickers = []

    # --- 4. Create a FigureWidget ---
    fig = go.FigureWidget()
    for i in range(max_traces):
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'placeholder_{i}', visible=False, showlegend=False))

    # --- 5. Define the Update Logic ---
    def update_plot(button_click):
        metric, period, rank_start, rank_end = metric_dropdown.value, period_dropdown.value, rank_start_dropdown.value, rank_end_dropdown.value
        
        ticker_list_output.clear_output()
        
        if rank_start > rank_end:
            with ticker_list_output: print("Error: 'Rank Start' must be less than or equal to 'Rank End'.")
            return
        
        ranked_tickers = all_rankings[metric].get(period, [])
        tickers_to_display = ranked_tickers[rank_start-1:rank_end]
        norm_data = period_normalized_data.get(period)

        with fig.batch_update():
            for i in range(max_traces):
                trace = fig.data[i]
                if i < len(tickers_to_display) and norm_data is not None:
                    ticker = tickers_to_display[i]
                    trace.x, trace.y, trace.name = norm_data[ticker].index, norm_data[ticker], ticker
                    trace.visible, trace.showlegend = True, True
                else:
                    trace.visible, trace.showlegend = False, False
        
        _currently_displayed_tickers.clear()
        _currently_displayed_tickers.extend(tickers_to_display)
        
        with ticker_list_output:
            print("Currently Displayed Tickers:")
            # --- NEW: Use compact=True to force a more horizontal layout ---
            pprint.pprint(tickers_to_display, width=120, compact=True) 

    # --- 6. Set up Initial View and Layout ---
    update_plot(None)
    fig.update_layout(
        title_text='Top Performing Tickers (Normalized to 1)',
        xaxis_title='Date', yaxis_title='Normalized Price (Start = 1)',
        hovermode='x unified', legend_title_text='Tickers',
        height=700, margin=dict(t=50)
    )
    fig.add_hline(y=1, line_width=1, line_dash="dash", line_color="grey")
    update_button.on_click(update_plot)
    
    # --- 7. Display the UI ---
    controls = widgets.HBox([metric_dropdown, period_dropdown, rank_start_dropdown, rank_end_dropdown, update_button])
    ui_container = widgets.VBox([controls, ticker_list_output], layout=widgets.Layout(margin='10px 0 20px 0'))
    
    display(ui_container, fig)
    
    return _currently_displayed_tickers

In [None]:
# --- NEW: Initialize the variable before the call ---
plotted_tickers = [] 

# Call the final function and capture the returned list
# The 'plotted_tickers' list will be updated in place every time you click the button.
plotted_tickers = plot_interactive_performers_widget(
    df_ohlcv=df_OHLCV, 
    default_metric='Sharpe', 
    default_period='1Y', 
    default_rank_start=20, 
    default_rank_end=40
)

print("\n--- After the plot, the 'plotted_tickers' variable holds the last displayed list: ---")
print(plotted_tickers)

In [None]:
print("\n--- After the plot, the 'plotted_tickers' variable holds the last displayed list: ---")
print(plotted_tickers)