In [1]:
import pandas as pd
import numpy as np

# ============================================================
# CONFIG
# ============================================================
FILE = "AAPL_raw_prices.csv"   # Contains: date, close
WINDOW = 90                    # Regression window
TRADING_DAYS = 250             # Annualization standard

# ============================================================
# LOAD PRICE DATA
# ============================================================
df = pd.read_csv(FILE)
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date").reset_index(drop=True)
df.rename(columns={"close": "price"}, inplace=True)

# We will build explicit regression columns for each row where WINDOW days exist
records = []

for end_idx in range(len(df)):
    start_idx = end_idx - WINDOW + 1
    if start_idx < 0:
        continue   # Not enough data
    
    window = df.iloc[start_idx:end_idx+1]
    prices = window["price"].values
    
    # ============================================================
    # EXPLICIT STEP-BY-STEP REGRESSION
    # ============================================================
    # 1. x = 0..WINDOW-1
    x = np.arange(WINDOW, dtype=float)
    # 2. ln(price)
    y = np.log(prices)
    
    # 3. Means
    x_mean = x.mean()
    y_mean = y.mean()
    
    # 4. Covariance and variance
    cov_xy = np.mean((x - x_mean) * (y - y_mean))
    var_x  = np.mean((x - x_mean)**2)
    
    # 5. Regression coefficients
    beta1 = cov_xy / var_x
    beta0 = y_mean - beta1 * x_mean
    
    # 6. Predicted ln(price)
    y_hat = beta0 + beta1 * x
    
    # 7. R²
    ss_tot = np.sum((y - y_mean)**2)
    ss_res = np.sum((y - y_hat)**2)
    r2 = 1 - ss_res / ss_tot if ss_tot != 0 else 1
    
    # 8. Convert slope to daily % gain
    daily_mult = np.exp(beta1)
    
    # 9. Annualize
    annual_mult = daily_mult ** TRADING_DAYS
    annual_slope = annual_mult - 1
    
    # 10. Adjusted slope = annual_slope × r2 (per the book)
    adjusted_slope = annual_slope * r2
    
    # Save row
    records.append({
        "date": df.loc[end_idx, "date"],
        "price": df.loc[end_idx, "price"],
        "start_date": window["date"].iloc[0],
        "end_date": df.loc[end_idx, "date"],
        "day_index": list(x),
        "ln_price": list(y),
        "x_mean": x_mean,
        "y_mean": y_mean,
        "cov_xy": cov_xy,
        "var_x": var_x,
        "beta1": beta1,
        "beta0": beta0,
        "daily_mult": daily_mult,
        "annual_mult": annual_mult,
        "annual_slope": annual_slope,
        "r2": r2,
        "adjusted_slope": adjusted_slope
    })

# ============================================================
# SAVE FULL EXPLICIT REGRESSION TABLE
# ============================================================
out = pd.DataFrame(records)
out.to_csv("AAPL_explicit_regression_debug.csv", index=False)

print("\n\nSaved explicit full regression breakdown → AAPL_explicit_regression_debug.csv")
print("Rows:", len(out))
print(out.head())




Saved explicit full regression breakdown → AAPL_explicit_regression_debug.csv
Rows: 6930
        date  price start_date   end_date  \
0 1998-05-11  0.276 1997-12-31 1998-05-11   
1 1998-05-12  0.269 1998-01-02 1998-05-12   
2 1998-05-13  0.272 1998-01-05 1998-05-13   
3 1998-05-14  0.269 1998-01-06 1998-05-14   
4 1998-05-15  0.264 1998-01-07 1998-05-15   

                                           day_index  \
0  [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...   
1  [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...   
2  [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...   
3  [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...   
4  [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...   

                                            ln_price  x_mean    y_mean  \
0  [-2.145581344184381, -1.9310215365615626, -1.9...    44.5 -1.580076   
1  [-1.9310215365615626, -1.9519282213808764, -1....    44.5 -1.570825   
2  [-1.9519282213808764, -1.7778565640590636, -1....    44.5 -1.563836   
3  [-