In [11]:
import pandas as pd
import numpy as np

# Load data
train = pd.read_csv("train.csv")
train["date"] = pd.to_datetime(train["date"])

# Sort (ABSOLUTELY REQUIRED)
train = train.sort_values(["store", "item", "date"])

# -------------------------
# TIME FEATURES
# -------------------------
train["day_of_week"] = train["date"].dt.weekday
train["is_weekend"] = train["day_of_week"].isin([5, 6]).astype(int)
train["month"] = train["date"].dt.month

# -------------------------
# LAG FEATURES
# -------------------------
for lag in [1, 7, 14]:
    train[f"lag_{lag}"] = (
        train.groupby(["store", "item"])["sales"]
        .shift(lag)
    )

# -------------------------
# ROLLING FEATURES
# -------------------------
for window in [7, 14]:
    train[f"rolling_mean_{window}"] = (
        train.groupby(["store", "item"])["sales"]
        .transform(lambda x: x.rolling(window, min_periods=1).mean())
    )

# -------------------------
# INVENTORY FEATURE
# -------------------------
LEAD_TIME = 7
SAFETY_FACTOR = 1.2

train["inventory_level"] = (
    train["rolling_mean_7"] * LEAD_TIME * SAFETY_FACTOR
).astype(int)

# -------------------------
# DROP NA FROM LAGS
# -------------------------
train = train.dropna().reset_index(drop=True)

# Save
train.to_csv("train_features.csv", index=False)

print("✅ Feature engineering completed")
print("Final shape:", train.shape)






✅ Feature engineering completed
Final shape: (906000, 13)
