In [17]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import ipywidgets as widgets
import pprint 
from pathlib import Path
from datetime import datetime, date
from IPython.display import display, Markdown

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000)

download_path = Path.home() / "Downloads"  
# OHLCV_file_path = r'c:\Users\ping\Files_win10\python\py311\stocks\data\df_OHLCV_clean_stocks_etfs.parquet'
OHLCV_file_path = r'c:\Users\ping\Files_win10\python\py311\stocks\data\df_OHLCV_stocks_etfs.parquet'

df_OHLCV = pd.read_parquet(OHLCV_file_path, engine='pyarrow')

In [18]:
df_OHLCV.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1094338 entries, ('AA', Timestamp('2025-09-25 00:00:00')) to ('ZWS', Timestamp('2023-01-03 00:00:00'))
Data columns (total 5 columns):
 #   Column     Non-Null Count    Dtype  
---  ------     --------------    -----  
 0   Adj Open   1094338 non-null  float64
 1   Adj High   1094338 non-null  float64
 2   Adj Low    1094338 non-null  float64
 3   Adj Close  1094338 non-null  float64
 4   Volume     1094338 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 46.0+ MB


In [None]:
df_OHLCV.loc['AA']

### The New "Walk-Forward" Analyzer Function

Here is the complete code. You can add this new function to your project; it does not replace or interfere with our previous functions.

In [None]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime, date
import numpy as np
import ipywidgets as widgets
from IPython.display import display, Markdown
import pprint 

def plot_walk_forward_analyzer(df_ohlcv, 
                               default_start_date=None,
                               default_calc_period='3M', 
                               default_fwd_period='1M',
                               default_metric='Sharpe (ATR)',
                               default_rank_start=1,
                               default_rank_end=10):
    """
    Creates a robust, interactive walk-forward analysis plot.
    Returns a container whose first element is updated with a results DataFrame
    that includes both calculation gain ('CalcGain') and forward-looking gain ('FwdGain').
    """
    print("Initializing Walk-Forward Analyzer...")
    if not isinstance(df_ohlcv.index, pd.MultiIndex):
        raise ValueError("Input DataFrame must have a (Ticker, Date) MultiIndex.")
    df_ohlcv = df_ohlcv.sort_index()
    print("Pre-processing data (unstacking)...")
    df_close_full = df_ohlcv['Adj Close'].unstack(level=0)
    df_high_full = df_ohlcv['Adj High'].unstack(level=0)
    df_low_full = df_ohlcv['Adj Low'].unstack(level=0)
    df_close_full.index = pd.to_datetime(df_close_full.index)
    df_high_full.index = pd.to_datetime(df_high_full.index)
    df_low_full.index = pd.to_datetime(df_low_full.index)
    
    if isinstance(default_start_date, str):
        default_start_date = pd.to_datetime(default_start_date).date()
    
    min_date_available = df_close_full.index.min()
    max_date_available = df_close_full.index.max()
    if default_start_date is None:
        default_start_date = max_date_available.date() - pd.DateOffset(years=1)
        
    start_date_picker = widgets.DatePicker(description='Start Date:', value=default_start_date, disabled=False)
    # ... (rest of widget definitions are unchanged) ...
    calc_period_options = {'1M': pd.DateOffset(months=1), '3M': pd.DateOffset(months=3), '6M': pd.DateOffset(months=6), '1Y': pd.DateOffset(years=1)}
    fwd_period_options = {'0D': pd.DateOffset(days=0), '1W': pd.DateOffset(weeks=1), '2W': pd.DateOffset(weeks=2), '1M': pd.DateOffset(months=1), '3M': pd.DateOffset(months=3)}
    calc_period_dropdown = widgets.Dropdown(options=calc_period_options.keys(), value=default_calc_period, description='Calc Period:')
    fwd_period_dropdown = widgets.Dropdown(options=fwd_period_options.keys(), value=default_fwd_period, description='Fwd Period:')
    metrics = ['Price', 'Sharpe', 'Sharpe (ATR)']
    metric_dropdown = widgets.Dropdown(options=metrics, value=default_metric, description='Metric:')
    rank_options = [1, 5, 10, 20, 30, 40, 50, 75, 100]
    rank_start_dropdown = widgets.Dropdown(options=rank_options, value=default_rank_start, description='Rank Start:')
    rank_end_dropdown = widgets.Dropdown(options=rank_options, value=default_rank_end, description='Rank End:')
    update_button = widgets.Button(description="Update Chart", button_style='primary')
    ticker_list_output = widgets.Output()
    results_container = [None]
    fig = go.FigureWidget()
    max_traces = 50
    for i in range(max_traces):
        fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'placeholder_{i}', visible=False, showlegend=False))


    def update_plot(button_click):
        ticker_list_output.clear_output()
        start_date = pd.to_datetime(start_date_picker.value)
        calc_period = calc_period_options[calc_period_dropdown.value]
        fwd_period = fwd_period_options[fwd_period_dropdown.value]
        metric = metric_dropdown.value
        rank_start, rank_end = rank_start_dropdown.value, rank_end_dropdown.value
        if rank_start > rank_end:
            with ticker_list_output: print("Error: 'Rank Start' must be <= 'Rank End'.")
            return
        calc_end_date_theoretical = start_date + calc_period
        viz_end_date_theoretical = calc_end_date_theoretical + fwd_period
        safe_start_date = max(start_date, min_date_available)
        safe_calc_end_date = min(calc_end_date_theoretical, max_date_available)
        safe_viz_end_date = min(viz_end_date_theoretical, max_date_available)
        if safe_start_date >= safe_calc_end_date:
            with ticker_list_output: print(f"Error: Invalid date range. The calculation period has no data.")
            return
            
        calc_close_raw = df_close_full.loc[safe_start_date:safe_calc_end_date]
        
        # --- DEFINITIVE FIX: Drop columns (tickers) that have NO data in the period ---
        calc_close = calc_close_raw.dropna(axis=1, how='all')
        
        if calc_close.shape[1] == 0 or len(calc_close) < 2:
            with ticker_list_output: print("Error: Not enough data in the calculation period to rank.")
            return

        metric_values = {}
        first_prices = calc_close.bfill().iloc[0]
        last_prices = calc_close.ffill().iloc[-1]
        metric_values['Price'] = (last_prices / first_prices).dropna()
        
        # This will now be called on a clean DataFrame with no all-NaN columns
        daily_returns = calc_close.bfill().ffill().pct_change()
        
        mean_returns, std_returns = daily_returns.mean(), daily_returns.std()
        metric_values['Sharpe'] = (mean_returns / std_returns * np.sqrt(252)).fillna(0)
        
        # Ensure High/Low DFs only contain the valid tickers
        valid_tickers = calc_close.columns
        calc_high = df_high_full[valid_tickers].loc[safe_start_date:safe_calc_end_date]
        calc_low = df_low_full[valid_tickers].loc[safe_start_date:safe_calc_end_date]

        high_low = calc_high - calc_low
        high_prev_close = abs(calc_high - df_close_full[valid_tickers].shift(1).loc[safe_start_date:safe_calc_end_date])
        low_prev_close = abs(calc_low - df_close_full[valid_tickers].shift(1).loc[safe_start_date:safe_calc_end_date])
        tr = np.maximum(high_low, np.maximum(high_prev_close, low_prev_close))
        atr = tr.ewm(alpha=1/14, adjust=False).mean()
        atrp = (atr / calc_close).mean()
        metric_values['Sharpe (ATR)'] = (mean_returns / atrp).fillna(0)
        
        sorted_tickers = metric_values[metric].sort_values(ascending=False)
        # ... (rest of the function is unchanged and will work correctly) ...
        tickers_to_display = sorted_tickers.index[rank_start-1:rank_end].tolist()
        normalized_plot_data = df_close_full[tickers_to_display].loc[safe_start_date:safe_viz_end_date]
        normalized_plot_data = normalized_plot_data.div(normalized_plot_data.bfill().iloc[0])
        actual_calc_end_ts = calc_close.index.max()
        actual_viz_start_date = normalized_plot_data.index.min().date()
        actual_viz_end_date = normalized_plot_data.index.max().date()
        actual_calc_end_date = actual_calc_end_ts.date()
        with fig.batch_update():
            fig.layout.shapes = []
            fig.add_shape(type="line", x0=actual_calc_end_ts, y0=0, x1=actual_calc_end_ts, y1=1, xref='x', yref='paper', line=dict(color="grey", width=2, dash="dash"))
            for i in range(max_traces):
                trace = fig.data[i]
                if i < len(tickers_to_display):
                    ticker = tickers_to_display[i]
                    trace.x, trace.y, trace.name = normalized_plot_data.index, normalized_plot_data[ticker], ticker
                    trace.visible, trace.showlegend = True, True
                else:
                    trace.visible, trace.showlegend = False, False
        
        calc_start_prices = calc_close.bfill().iloc[0]
        calc_end_prices = calc_close.ffill().iloc[-1]
        
        fwd_close_slice = df_close_full.loc[actual_calc_end_ts:safe_viz_end_date]
        
        if fwd_close_slice.empty or len(fwd_close_slice) < 2:
            viz_end_prices = calc_end_prices
        else:
            viz_end_prices = fwd_close_slice.ffill().iloc[-1]

        calc_gains = (calc_end_prices / calc_start_prices) - 1
        fwd_gains = (viz_end_prices / calc_end_prices) - 1
        results_df = pd.DataFrame({'Rank': range(rank_start, rank_start + len(tickers_to_display)), 'Metric': metric, 'MetricValue': sorted_tickers.loc[tickers_to_display].values, 'CalcPrice': calc_end_prices.loc[tickers_to_display], 'CalcGain': calc_gains.loc[tickers_to_display], 'CalcPeriod': calc_period_dropdown.value, 'FwdPeriod': fwd_period_dropdown.value, 'FwdGain': fwd_gains.loc[tickers_to_display]}, index=pd.Index(tickers_to_display, name='Ticker'))
        results_df = results_df[['Rank', 'Metric', 'MetricValue', 'CalcPrice', 'CalcGain', 'CalcPeriod', 'FwdPeriod', 'FwdGain']]
        results_container[0] = results_df
        with ticker_list_output:
             print(f"Analyzing from {actual_viz_start_date} to {actual_viz_end_date}.")
             print(f"  - Ranking based on performance from {actual_viz_start_date} to {actual_calc_end_date}.")
             pprint.pprint(tickers_to_display, width=120, compact=True)

    fig.update_layout(title_text='Walk-Forward Performance Analysis', xaxis_title='Date', yaxis_title='Normalized Price (Start = 1)', hovermode='x unified', legend_title_text='Tickers (Ranked)', height=700, margin=dict(t=50))
    fig.add_hline(y=1, line_width=1, line_dash="dash", line_color="grey")
    update_button.on_click(update_plot)
    controls_row1 = widgets.HBox([start_date_picker, calc_period_dropdown, fwd_period_dropdown])
    controls_row2 = widgets.HBox([metric_dropdown, rank_start_dropdown, rank_end_dropdown, update_button])
    ui_container = widgets.VBox([controls_row1, controls_row2, ticker_list_output], layout=widgets.Layout(margin='10px 0 20px 0'))
    display(ui_container, fig)
    update_plot(None)
    return results_container


In [None]:
# --- Example Usage of the Walk-Forward Analyzer ---
# You can specify a starting point, or let it default
# start_of_year = date(2024, 9, 25)
start_of_year = '2023-01-01'

# Call the new function
walk_forward_results = plot_walk_forward_analyzer(
    df_OHLCV,
    default_start_date=start_of_year,
    default_calc_period='6M',
    default_fwd_period='1W',
    default_metric='Sharpe (ATR)',
    default_rank_start=1,
    default_rank_end=10
)

In [None]:
print(walk_forward_results[0].columns)
walk_forward_results

In [None]:
_start_date = '2023-01-01'
_calc_period = '3M'
_fwd_period = '1W'
_metric = 'Sharpe (ATR)'
_rank_start = 20
_rank_end = 30

# ```python
# Your original call that produced the results DataFrame
walk_forward_results = plot_walk_forward_analyzer(
    df_OHLCV,
    default_start_date=_start_date, # Using a specific date for reproducibility
    default_calc_period=_calc_period,
    default_fwd_period=_fwd_period,
    default_metric=_metric,
    default_rank_start=_rank_start,
    default_rank_end=_rank_end,
)


In [None]:
walk_forward_results[0]

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from IPython.display import display, Markdown

def verify_walk_forward_calculation(df_ohlcv, 
                                    ticker, 
                                    start_date, 
                                    calc_period, 
                                    fwd_period, 
                                    export_csv=False):
    """
    Provides a transparent, step-by-step breakdown of the calculations used in 
    the walk-forward analyzer for a SINGLE ticker and a specific time slice.
    If export_csv is True, it generates a file with all intermediate calculations.
    """
    # ... (setup part is unchanged) ...
    display(Markdown(f"## Verification Report for Ticker: `{ticker}`"))
    period_options = { '1M': pd.DateOffset(months=1), '3M': pd.DateOffset(months=3), '6M': pd.DateOffset(months=6), '1Y': pd.DateOffset(years=1), '0D': pd.DateOffset(days=0), '1W': pd.DateOffset(weeks=1), '2W': pd.DateOffset(weeks=2) }
    try:
        df_ticker_raw = df_ohlcv.loc[ticker].sort_index()
        df_ticker = df_ticker_raw.copy()
        df_ticker.columns = df_ticker.columns.str.strip()
    except KeyError:
        print(f"---! ERROR: Ticker '{ticker}' not found in the DataFrame. !---")
        return
    adj_close_col = 'Adj Close'; adj_high_col = 'Adj High'; adj_low_col = 'Adj Low'
    start_date_ts = pd.to_datetime(start_date)
    calc_offset = period_options[calc_period]; fwd_offset = period_options[fwd_period]
    calc_end_date_ts = start_date_ts + calc_offset; fwd_end_date_ts = calc_end_date_ts + fwd_offset
    display(Markdown(f"**Analysis Start Date:** `{start_date_ts.date()}`"))
    display(Markdown(f"**Requested Calculation Period:** `{start_date_ts.date()}` to `{calc_end_date_ts.date()}` ({calc_period})"))
    display(Markdown(f"**Requested Forward Period:**   `{calc_end_date_ts.date()}` to `{fwd_end_date_ts.date()}` ({fwd_period})"))

    display(Markdown("### A. Calculation Period Analysis ('In-Sample')"))
    calc_df = df_ticker.loc[start_date_ts:calc_end_date_ts].copy()
    
    # --- DEFINITIVE FIX: Check for at least 2 valid data points ---
    if calc_df[adj_close_col].notna().sum() < 2:
        print("\n---! ERROR: Not enough data points (less than 2) in the Calculation Period to proceed. !---")
        return
        
    actual_calc_start_date = calc_df.index.min().date()
    actual_calc_end_date = calc_df.index.max().date()
    display(Markdown(f"**Actual Dates Used:** `{actual_calc_start_date}` to `{actual_calc_end_date}` ({len(calc_df)} trading days)"))

    # ... (rest of the function is the same, but now safer) ...
    # The fix is mainly the check above. The pct_change call below is also updated for robustness.
    format_dict = {col: '{:,.2f}' for col in calc_df.columns if 'Adj' in col or 'Open' in col}; format_dict['Volume'] = '{:,}'
    display(pd.concat([calc_df.head(3), calc_df.tail(3)]).style.format(format_dict))
    calc_start_price = calc_df[adj_close_col].bfill().iloc[0]
    calc_end_price = calc_df[adj_close_col].ffill().iloc[-1]
    calc_gain = (calc_end_price / calc_start_price) - 1
    display(Markdown("#### `CalcPrice` and `CalcGain` Verification:"))
    print(f"  - Calc Start Price (on {calc_df[adj_close_col].first_valid_index().date()}): ${calc_start_price:.2f}")
    print(f"  - Calc End Price   (on {calc_df[adj_close_col].last_valid_index().date()}): ${calc_end_price:.2f}  <-- This is 'CalcPrice'")
    print(f"  - CalcGain = (${calc_end_price:.2f} / ${calc_start_price:.2f}) - 1 = {calc_gain:.2%}")
    display(Markdown("#### `MetricValue` Verification:"))
    price_metric = calc_end_price / calc_start_price
    print(f"\n1. Price Metric:")
    print(f"   - Formula: Last Price / First Price"); print(f"   - Value: ${calc_end_price:.2f} / ${calc_start_price:.2f} = {price_metric:.4f}")
    
    daily_returns = calc_df[adj_close_col].bfill().ffill().pct_change()

    mean_return = daily_returns.mean()
    std_return = daily_returns.std()
    sharpe_ratio = (mean_return / std_return * np.sqrt(252)) if std_return > 0 else 0
    print(f"\n2. Sharpe Ratio Metric:")
    print(f"   - Mean Daily Return: {mean_return:.6f}"); print(f"   - Std Dev Daily Return: {std_return:.6f}")
    print(f"   - Formula: (Mean / Std Dev) * sqrt(252)"); print(f"   - Value: ({mean_return:.6f} / {std_return:.6f}) * {np.sqrt(252):.2f} = {sharpe_ratio:.4f}")
    high_low = calc_df[adj_high_col] - calc_df[adj_low_col]
    high_prev_close = abs(calc_df[adj_high_col] - calc_df[adj_close_col].shift(1))
    low_prev_close = abs(calc_df[adj_low_col] - calc_df[adj_close_col].shift(1))
    tr = np.maximum(high_low, np.maximum(high_prev_close, low_prev_close))
    atr = tr.ewm(alpha=1/14, adjust=False).mean()
    atrp_series = atr / calc_df[adj_close_col]
    atrp_mean = atrp_series.mean()
    sharpe_atr = (mean_return / atrp_mean) if atrp_mean > 0 else 0
    print(f"\n3. Sharpe (ATR) Metric:")
    print(f"   - Mean Daily Return: {mean_return:.6f} (same as above)"); print(f"   - Average ATR Percent (ATRP): {atrp_mean:.6f}")
    print(f"   - Formula: Mean Daily Return / ATRP"); print(f"   - Value: {mean_return:.6f} / {atrp_mean:.6f} = {sharpe_atr:.4f}")
    # ... (rest of function is unchanged)
    display(Markdown("\n### B. Forward Period Analysis ('Out-of-Sample')"))
    fwd_df = df_ticker.loc[actual_calc_end_date:fwd_end_date_ts].copy()
    if fwd_df.empty or len(fwd_df) < 2:
        display(Markdown("**No valid data found in the Forward Period.**"))
        fwd_end_price = calc_end_price
        fwd_gain = 0
    else:
        actual_fwd_start_date = fwd_df.index.min().date()
        actual_fwd_end_date = fwd_df.index.max().date()
        display(Markdown(f"**Actual Dates Used:** `{actual_fwd_start_date}` to `{actual_fwd_end_date}` ({len(fwd_df)} trading days)"))
        fwd_end_price = fwd_df[adj_close_col].ffill().iloc[-1]
    fwd_start_price = calc_end_price
    fwd_gain = (fwd_end_price / fwd_start_price) - 1
    display(Markdown("#### `FwdGain` Verification:"))
    print(f"  - Fwd Start Price (same as Calc End Price): ${fwd_start_price:.2f}")
    if not fwd_df.empty and len(fwd_df) >= 2:
        print(f"  - Fwd End Price   (on {fwd_df[adj_close_col].last_valid_index().date()}): ${fwd_end_price:.2f}")
    else:
        print(f"  - Fwd End Price   (No fwd data, using start price): ${fwd_end_price:.2f}")
    print(f"  - FwdGain = (${fwd_end_price:.2f} / ${fwd_start_price:.2f}) - 1 = {fwd_gain:.2%}")
    display(Markdown("### C. Final Summary Table"))
    summary_data = { 'Metric': ['End of Calc Price', 'Calc Period Gain', 'Forward Period Gain','---','Metric: Price', 'Metric: Sharpe', 'Metric: Sharpe (ATR)'], 'Calculated Value': [f"${calc_end_price:.2f}", f"{calc_gain:.2%}", f"{fwd_gain:.2%}", '---', f"{price_metric:.4f}", f"{sharpe_ratio:.4f}", f"{sharpe_atr:.4f}"], 'Corresponds To DataFrame Column': ['`CalcPrice`', '`CalcGain`', '`FwdGain`', '---', '`MetricValue`', '`MetricValue`', '`MetricValue`'] }
    summary_df = pd.DataFrame(summary_data)
    display(summary_df)
    if export_csv:
        calc_df['Period'] = 'Calculation'; calc_df['Daily_Return'] = daily_returns; calc_df['High_Minus_Low'] = high_low; calc_df['Abs_High_vs_PrevClose'] = high_prev_close; calc_df['Abs_Low_vs_PrevClose'] = low_prev_close; calc_df['True_Range'] = tr; calc_df['ATR_14'] = atr; calc_df['ATRP'] = atrp_series
        fwd_df['Period'] = 'Forward'
        combined_df = pd.concat([calc_df, fwd_df.iloc[1:]])
        ordered_cols = [ 'Period', adj_close_col, 'Daily_Return', adj_high_col, adj_low_col, 'High_Minus_Low', 'Abs_High_vs_PrevClose', 'Abs_Low_vs_PrevClose', 'True_Range', 'ATR_14', 'ATRP', 'Volume' ]
        remaining_cols = [col for col in combined_df.columns if col not in ordered_cols]
        final_cols = ordered_cols + remaining_cols
        filename = f"verification_{ticker}_{start_date_ts.strftime('%Y%m%d')}.csv"
        combined_df[final_cols].to_csv(filename, float_format='%.8f')
        print(f"\n✅ Detailed data for '{ticker}' exported to '{filename}'")


In [None]:
_export_csv = True

# Now, let's verify top-ranked ticker from the results
_verify_ticker = walk_forward_results[0].index[9] 

# Verify the calculation for 'BIL' using the same parameters
verify_walk_forward_calculation(
    df_ohlcv=df_OHLCV,
    ticker=_verify_ticker,
    start_date=_start_date,
    calc_period=_calc_period,
    fwd_period=_fwd_period,
    export_csv=_export_csv  # Set to True to get the data in Excel
)


In [None]:
df_OHLCV.loc[_verify_ticker].tail(50)

### **J. Welles Wilder Jr. definition ATR that captures overnight gaps.**

Let's be precise:

*   **We WILL calculate** the **True Range (TR)** for each day, which is the maximum of:
    1.  `(Today's High - Today's Low)`
    2.  `abs(Today's High - Yesterday's Close)`
    3.  `abs(Today's Low - Yesterday's Close)`
*   Then, we will typically calculate the **Average True Range (ATR)**, which is a **Exponential Moving Average** of this True Range value. This is the standard industry indicator for volatility that includes intraday and overnight movement.

### **Return/ATR Calculation**

You are correct that the ATR calculation should be tied to the `Period` dropdown, but here's how we'll do it in a standard, robust way:

**Step 1: Pre-computation (Done once for all stocks)**

First, for the entire historical dataset, we will calculate a standard **14-day Average True Range (ATR)**. This is the industry standard lookback period defined by Wilder. This step creates a new, continuous ATR data series for every single stock, just like we have an 'Adj Close' series.

**Step 2: On-the-Fly Calculation (When you click "Update Chart")**

This is where your interpretation comes in. When you make your selections in the dropdowns (e.g., `Metric: Return/ATR`, `Period: 3M`):

1.  We slice the **daily returns** data to get only the last **3 months**.
2.  We also slice the pre-computed **14-day ATR** data to get only the last **3 months**.
3.  We then calculate:
    *   **Numerator:** The `mean()` of the daily returns over those 3 months.
    *   **Denominator:** The `mean()` of the ATR values over those 3 months.
4.  The final score for the new metric is `Numerator / Denominator`.

**In short: The `Period` dropdown defines the window of data we analyze, not the lookback period of the ATR indicator itself.**

This is the best practice because it ensures we are always comparing apples to apples. We're using a consistent measure of volatility (14-day ATR) and seeing how it behaves over different analysis windows (1D, 3M, 1Y, etc.).

### ATR_14 Calculation  `pandas.ewm(..., adjust=False)`
You have hit upon the most subtle, confusing, and important aspect of how pandas calculates Exponential Moving Averages.

Your thinking is perfectly logical, and your calculation of the 14-period simple average (`4.33614...`) is **100% correct.** The reason it doesn't match the CSV is the key detail.

The discrepancy arises because the method we are using, `pandas.ewm(..., adjust=False)`, **does not use a simple average as a "seed" value.** This is a very common misconception because the "SMA seed" method is often taught for manual calculation.

Instead, `ewm(adjust=False)` applies the exponential smoothing formula **recursively from the very first data point.**

### How `ewm(adjust=False)` Actually Works

Let's trace it with your data.
`alpha = 1/14` and `1 - alpha = 13/14`.

1.  **First Valid `True_Range` (Jan 4):** `4.283`
    *   The `ewm` starts here. The first ATR is simply the first value.
    *   **ATR on Jan 4 = 4.283**. (Matches your CSV)

2.  **Second Valid `True_Range` (Jan 5):** `3.956`
    *   `ATR = (Current_TR * 1/14) + (Previous_ATR * 13/14)`
    *   `ATR = (3.956 * 1/14) + (4.283 * 13/14)`
    *   `ATR = 0.2825714 + 3.9774286`
    *   **ATR on Jan 5 = 4.25999...**. (Rounds to `4.25964...` in your CSV due to higher precision floating point math). This is a match.