In [11]:
import pandas as pd
import numpy as np
import joblib

# -------------------------
# PATHS
# -------------------------
TRAIN_PATH = "train.csv"
TEST_PATH = "test.csv"
OUT_PATH = "test_processed.csv"

# -------------------------
# LOAD DATA
# -------------------------
train = pd.read_csv(TRAIN_PATH)
test = pd.read_csv(TEST_PATH)

train["date"] = pd.to_datetime(train["date"])
test["date"] = pd.to_datetime(test["date"])

# -------------------------
# SORT & IDS
# -------------------------
train = train.sort_values(["store", "item", "date"])
test = test.sort_values(["store", "item", "date"])

train["product_id"] = train["store"].astype(str) + "_" + train["item"].astype(str)
test["product_id"] = test["store"].astype(str) + "_" + test["item"].astype(str)

# -------------------------
# LAST KNOWN HISTORY
# -------------------------
last_history = (
    train.groupby("product_id")
         .tail(14)
         .groupby("product_id")["sales"]
         .agg(["mean"])
)

# -------------------------
# TIME FEATURES
# -------------------------
test["day_of_week"] = test["date"].dt.weekday
test["is_weekend"] = test["day_of_week"].isin([5, 6]).astype(int)
test["month"] = test["date"].dt.month

# -------------------------
# ROLLING FEATURES (FROM TRAIN)
# -------------------------
test["rolling_mean_7"] = test["product_id"].map(last_history["mean"])
test["rolling_mean_14"] = test["product_id"].map(last_history["mean"])

# -------------------------
# LAG FEATURES (APPROX)
# -------------------------
test["lag_1"] = test["rolling_mean_7"]
test["lag_7"] = test["rolling_mean_7"]
test["lag_14"] = test["rolling_mean_14"]

# -------------------------
# INVENTORY FEATURE
# -------------------------
LEAD_TIME = 7
SAFETY_FACTOR = 1.2

test["inventory_level"] = (
    test["rolling_mean_7"] * LEAD_TIME * SAFETY_FACTOR
).fillna(0).astype(int)

# -------------------------
# FINAL DATASET
# -------------------------
FEATURES = [
    "day_of_week",
    "is_weekend",
    "month",
    "lag_1",
    "lag_7",
    "lag_14",
    "rolling_mean_7",
    "rolling_mean_14",
    "inventory_level"
]

test_final = test[["id"] + FEATURES]

# -------------------------
# SAVE
# -------------------------
test_final.to_csv(OUT_PATH, index=False)

print("✅ Test data processed and saved")
print("Shape:", test_final.shape)


joblib.dump(model, "/content/demand_model.pkl")

print("✅ Model saved in Colab runtime")



✅ Test data processed and saved
Shape: (45000, 10)
✅ Model saved in Colab runtime
