In [1]:
#!/usr/bin/env python3
import os
import pandas as pd
import numpy as np
from datetime import datetime

"""
Purpose:
    Compute ATR for multiple lookback windows (simple rolling mean of True Range)
    and save outputs into per-window subdirectories.

Outputs:
    OUTPUT_ROOT/atr_{W}D/{ticker}.parquet  with columns: date, tr, atr{W}
    VER_ROOT/atr_{W}D/atr_validation-<timestamp>.csv (per-window)
    VER_ROOT/atr_multi_validation-<timestamp>.csv (combined)
"""

# ============================================================
# CONFIG
# ============================================================

INPUT_DIR   = "./3-adjusted_All_Prices_OHLC"
OUTPUT_ROOT = "./4-ATR20_adjusted_All_Prices"  # keep existing dir as root
VER_ROOT    = "./system_verification/4-ATR20_adjusted_All_Prices"

os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(VER_ROOT, exist_ok=True)

# 5 reasonable ATR windows (trading days)
ATR_WINDOWS = [14, 20, 28, 42, 63]

REQUIRED = {"open_adj", "high_adj", "low_adj", "close_adj", "date"}

print(f"\nLoading adjusted price files from: {INPUT_DIR}")
files = sorted([f for f in os.listdir(INPUT_DIR) if f.endswith(".parquet")])
print(f"Found {len(files)} tickers.\n")

# ============================================================
# VALIDATION STORAGE
# ============================================================

validation_rows = []

def add_validation(ticker, window, issue, detail=""):
    row = {"ticker": ticker, "window": int(window), "issue": issue, "detail": detail}
    validation_rows.append(row)
    print(f"⚠ VALIDATION: {ticker} | W={window} | {issue} | {detail}")

# ============================================================
# ATR BASE COMPUTATION (TR only, once per ticker)
# ============================================================

def compute_true_range(df: pd.DataFrame, ticker: str) -> pd.DataFrame | None:
    # --- Required columns ---
    if not REQUIRED.issubset(df.columns):
        missing = REQUIRED - set(df.columns)
        add_validation(ticker, -1, "missing_columns", str(missing))
        return None

    # --- Date validation / sorting ---
    df = df.copy()
    df["date"] = pd.to_datetime(df["date"])
    if not df["date"].is_monotonic_increasing:
        add_validation(ticker, -1, "non_monotonic_dates", "sorting by date")
        df = df.sort_values("date")

    df = df.reset_index(drop=True)

    # --- Basic sanity checks ---
    if (df["close_adj"] <= 0).any():
        add_validation(ticker, -1, "nonpositive_close_adj", f"{int((df['close_adj'] <= 0).sum())} rows")

    # TRUE RANGE requires prev close
    df["prev_close"] = df["close_adj"].shift(1)

    tr1 = (df["high_adj"] - df["low_adj"]).astype(float)
    tr2 = (df["high_adj"] - df["prev_close"]).abs().astype(float)
    tr3 = (df["low_adj"]  - df["prev_close"]).abs().astype(float)

    df["tr"] = np.maximum(tr1, np.maximum(tr2, tr3))

    if (df["tr"] < 0).any():
        add_validation(ticker, -1, "negative_TR_detected", f"{int((df['tr'] < 0).sum())} rows")

    return df

# ============================================================
# MAIN LOOP
# ============================================================

# per-window validation collection (so we can also write window-specific logs)
validation_by_window = {w: [] for w in ATR_WINDOWS}

for file in files:
    ticker = file.replace(".parquet", "")
    in_path = os.path.join(INPUT_DIR, file)

    print(f"Processing {ticker}...")

    try:
        raw = pd.read_parquet(in_path)
        base = compute_true_range(raw, ticker)

        if base is None:
            print(f"❌ {ticker}: TR not computed (validation issue).\n")
            continue

        n = len(base)

        # Compute and save each ATR window into its own subdir
        for W in ATR_WINDOWS:
            out_dir = os.path.join(OUTPUT_ROOT, f"atr_{W}D")
            ver_dir = os.path.join(VER_ROOT,  f"atr_{W}D")
            os.makedirs(out_dir, exist_ok=True)
            os.makedirs(ver_dir, exist_ok=True)

            if n < W:
                last_date = base["date"].max().date()
                add_validation(ticker, W, "insufficient_history", f"{n} rows (<{W}), last={last_date}")
                validation_by_window[W].append(validation_rows[-1])
                continue

            atr_col = f"atr{W}"
            atr = base["tr"].rolling(window=W, min_periods=W).mean()

            # Optional: detect absurd ATR spikes (window-specific)
            if n > (W + 10):
                median_atr = float(np.nanmedian(atr.values))
                if median_atr > 0:
                    max_atr = float(np.nanmax(atr.values))
                    if max_atr > median_atr * 20:
                        add_validation(
                            ticker, W, "ATR_spike_detected",
                            f"max {atr_col}={max_atr:.4f}, median={median_atr:.4f}"
                        )
                        validation_by_window[W].append(validation_rows[-1])

            df_out = base[["date", "tr"]].copy()
            df_out[atr_col] = atr.values

            out_path = os.path.join(out_dir, f"{ticker}.parquet")
            df_out.to_parquet(out_path, index=False)

        print(f"✔ {ticker}: saved {len(ATR_WINDOWS)} ATR lookbacks\n")

    except Exception as e:
        add_validation(ticker, -1, "processing_error", str(e))
        print(f"❌ ERROR for {ticker}: {e}\n")

# ============================================================
# SAVE VALIDATION REPORTS
# ============================================================

ts = datetime.now().strftime("%Y%m%d-%H%M%S")
validation_df = pd.DataFrame(validation_rows)

# Combined validation file
val_file_all = os.path.join(VER_ROOT, f"atr_multi_validation-{ts}.csv")
validation_df.to_csv(val_file_all, index=False)

# Per-window validation files (only rows for that window)
for W in ATR_WINDOWS:
    df_w = validation_df[validation_df["window"] == W].copy()
    val_file_w = os.path.join(VER_ROOT, f"atr_{W}D", f"atr_validation-{ts}.csv")
    df_w.to_csv(val_file_w, index=False)

print("\n====================================================")
print(" MULTI-ATR VALIDATION SUMMARY")
print("====================================================")
if validation_df.empty:
    print("No validation issues detected.")
else:
    print(validation_df)

print("\nValidation logs saved to:")
print(f"  → {val_file_all}")
for W in ATR_WINDOWS:
    print(f"  → {os.path.join(VER_ROOT, f'atr_{W}D', f'atr_validation-{ts}.csv')}")
print("====================================================\n")



Loading adjusted price files from: ./3-adjusted_All_Prices_OHLC
Found 1167 tickers.

Processing A...
✔ A: saved 5 ATR lookbacks

Processing AAL...
✔ AAL: saved 5 ATR lookbacks

Processing AAMRQ...
✔ AAMRQ: saved 5 ATR lookbacks

Processing AAP...
✔ AAP: saved 5 ATR lookbacks

Processing AAPL...
⚠ VALIDATION: AAPL | W=14 | ATR_spike_detected | max atr14=13.8029, median=0.2692
⚠ VALIDATION: AAPL | W=20 | ATR_spike_detected | max atr20=11.3977, median=0.2763
⚠ VALIDATION: AAPL | W=28 | ATR_spike_detected | max atr28=9.9184, median=0.2817
⚠ VALIDATION: AAPL | W=42 | ATR_spike_detected | max atr42=8.9032, median=0.2811
⚠ VALIDATION: AAPL | W=63 | ATR_spike_detected | max atr63=7.8908, median=0.2798
✔ AAPL: saved 5 ATR lookbacks

Processing ABBV...
✔ ABBV: saved 5 ATR lookbacks

Processing ABI1...
⚠ VALIDATION: ABI1 | W=14 | ATR_spike_detected | max atr14=16.5366, median=0.7155
⚠ VALIDATION: ABI1 | W=20 | ATR_spike_detected | max atr20=15.1383, median=0.7150
✔ ABI1: saved 5 ATR lookbacks

P