In [2]:
import pandas as pd
import os

# === CONFIG ===
path = r"C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\df_sp500.csv"
out_dir = r"C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\Alpha360"
os.makedirs(out_dir, exist_ok=True)

# === LOAD ===
df = pd.read_csv(path, parse_dates=["date"])
df = df.sort_values(["stock_code", "date"])

# === Compute VWAP2 (2-day rolling VWAP) ===
df["vwap"] = (df["close"] * df["volume"]).groupby(df["stock_code"]).cumsum() / df["volume"].groupby(df["stock_code"]).cumsum()
df["vwap2"] = (
    ((df["close"] * df["volume"]).groupby(df["stock_code"]).rolling(2).sum().reset_index(level=0, drop=True)) /
    (df["volume"].groupby(df["stock_code"]).rolling(2).sum().reset_index(level=0, drop=True))
)

# === Pivot into wide panel ===
def pivot_feature(feature):
    return (
        df.pivot(index="date", columns="stock_code", values=feature)
        .sort_index()
    )

# === Generate and save lag CSVs ===
def expand_and_save(feature_name):
    base = pivot_feature(feature_name)
    for lag in range(60):
        shifted = base.shift(lag)
        out_path = os.path.join(out_dir, f"{feature_name.upper()}{lag}.csv")
        shifted.to_csv(out_path, index_label="datetime")

# === Run for each feature ===
for col in ["open", "high", "low", "close", "volume", "vwap2"]:
    expand_and_save(col)

print("Done. Files match Stockformer CSV layout.")


Done. Files match Stockformer CSV layout.
