In [1]:
import pandas as pd
import numpy as np
import joblib
import json


In [2]:
test = pd.read_csv(r"C:\Walmart-Demand-Forecasting-Supply-Chain-Optimization--Python--Power-BI\Data\test.csv")
features = pd.read_csv(r"C:\Walmart-Demand-Forecasting-Supply-Chain-Optimization--Python--Power-BI\Data\features.csv")
stores = pd.read_csv(r"C:\Walmart-Demand-Forecasting-Supply-Chain-Optimization--Python--Power-BI\Data\stores.csv")

test["Date"] = pd.to_datetime(test["Date"])
features["Date"] = pd.to_datetime(features["Date"])

# Load saved model
model = joblib.load("model_gbr.pkl")

# Load feature column list
with open("feature_cols.json", "r") as f:
    feature_cols = json.load(f)

# Load lag starter table
lag_table = pd.read_csv("last_sales_lags.csv")

print("Loaded: model, feature_cols, lag_table")


Loaded: model, feature_cols, lag_table


In [3]:
test_df = test.merge(features, on=["Store","Date"], how="left")
test_df = test_df.merge(stores, on="Store", how="left")

# Fill MarkDown with 0
markdown_cols = ["MarkDown1","MarkDown2","MarkDown3","MarkDown4","MarkDown5"]
for col in markdown_cols:
    if col in test_df.columns:
        test_df[col] = test_df[col].fillna(0)

# Fill numeric missing with median 
numeric_cols = test_df.select_dtypes(include=["int64","float64"]).columns
for col in numeric_cols:
    test_df[col] = test_df[col].fillna(test_df[col].median())

# Date parts
test_df["Year"] = test_df["Date"].dt.year
test_df["Month"] = test_df["Date"].dt.month
test_df["Week"] = test_df["Date"].dt.isocalendar().week.astype(int)

# Encode Type like before
test_df = pd.get_dummies(test_df, columns=["Type"], drop_first=True)

test_df.head()


Unnamed: 0,Store,Dept,Date,IsHoliday_x,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,IsHoliday_y,Size,Year,Month,Week,Type_B,Type_C
0,1,1,2012-11-02,False,55.32,3.386,6766.44,5147.7,50.82,3639.9,2737.42,223.462779,6.573,False,151315,2012,11,44,False,False
1,1,1,2012-11-09,False,61.24,3.314,11421.32,3370.89,40.28,4646.79,6154.16,223.481307,6.573,False,151315,2012,11,45,False,False
2,1,1,2012-11-16,False,52.92,3.252,9696.28,292.1,103.78,1133.15,6612.69,223.512911,6.573,False,151315,2012,11,46,False,False
3,1,1,2012-11-23,True,56.23,3.211,883.59,4.17,74910.32,209.91,303.32,223.561947,6.573,True,151315,2012,11,47,False,False
4,1,1,2012-11-30,False,52.34,3.207,2460.03,0.0,3838.35,150.57,6966.34,223.610984,6.573,False,151315,2012,11,48,False,False


In [4]:
# Add any missing feature columns
for c in feature_cols:
    if c not in test_df.columns:
        test_df[c] = 0

# Keep only required columns
test_df = test_df.sort_values(["Store","Dept","Date"]).copy()
test_df.head()

Unnamed: 0,Store,Dept,Date,IsHoliday_x,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,...,Unemployment,IsHoliday_y,Size,Year,Month,Week,Type_B,Type_C,lag_1,lag_2
0,1,1,2012-11-02,False,55.32,3.386,6766.44,5147.7,50.82,3639.9,...,6.573,False,151315,2012,11,44,False,False,0,0
1,1,1,2012-11-09,False,61.24,3.314,11421.32,3370.89,40.28,4646.79,...,6.573,False,151315,2012,11,45,False,False,0,0
2,1,1,2012-11-16,False,52.92,3.252,9696.28,292.1,103.78,1133.15,...,6.573,False,151315,2012,11,46,False,False,0,0
3,1,1,2012-11-23,True,56.23,3.211,883.59,4.17,74910.32,209.91,...,6.573,True,151315,2012,11,47,False,False,0,0
4,1,1,2012-11-30,False,52.34,3.207,2460.03,0.0,3838.35,150.57,...,6.573,False,151315,2012,11,48,False,False,0,0


In [5]:
lag_table.columns
lag_table.head()


Unnamed: 0,Store,Dept,lag_1_start,lag_2_start
0,1,1,27390.81,24185.27
1,1,2,43134.88,42354.72
2,1,3,9350.9,8548.87
3,1,4,36292.6,35549.19
4,1,5,25846.94,20413.83


In [6]:
lag_table = pd.read_csv("last_sales_lags.csv")

test_df = test_df.merge(lag_table, on=["Store","Dept"], how="left")
test_df["lag_1_start"] = test_df["lag_1_start"].fillna(0)
test_df["lag_2_start"] = test_df["lag_2_start"].fillna(0)

test_df[["Store","Dept","Date","lag_1_start","lag_2_start"]].head()

Unnamed: 0,Store,Dept,Date,lag_1_start,lag_2_start
0,1,1,2012-11-02,27390.81,24185.27
1,1,1,2012-11-09,27390.81,24185.27
2,1,1,2012-11-16,27390.81,24185.27
3,1,1,2012-11-23,27390.81,24185.27
4,1,1,2012-11-30,27390.81,24185.27


In [11]:
import pandas as pd
import numpy as np

test_df = test_df.sort_values(["Store", "Dept", "Date"]).copy()

preds = np.zeros(len(test_df))

# base feature matrix
X_base = test_df[feature_cols].to_numpy()
col_index = {c: i for i, c in enumerate(feature_cols)}

i_lag1 = col_index["lag_1"]
i_lag2 = col_index["lag_2"]

current_key = None
lag1 = 0.0
lag2 = 0.0

for i in range(len(test_df)):
    store = test_df.iloc[i]["Store"]
    dept = test_df.iloc[i]["Dept"]
    key = (store, dept)

    # reset lags at start of each Store-Dept group
    if key != current_key:
        current_key = key
        lag1 = float(test_df.iloc[i]["lag_1_start"])
        lag2 = float(test_df.iloc[i]["lag_2_start"])

    # write lag values into the feature row
    X_base[i, i_lag1] = lag1
    X_base[i, i_lag2] = lag2

    # predict one row (no warning because we pass column names)
    yhat = model.predict(pd.DataFrame([X_base[i]], columns=feature_cols))[0]
    preds[i] = yhat

    # update for next week
    lag2 = lag1
    lag1 = yhat

test_df["Predicted_Weekly_Sales"] = preds

fact_test = test_df[["Store", "Dept", "Date", "Predicted_Weekly_Sales"]].copy()
fact_test.to_csv("fact_forecast_test.csv", index=False)

print("✅ Saved: fact_forecast_test.csv")
fact_test.head()


✅ Saved: fact_forecast_test.csv


Unnamed: 0,Store,Dept,Date,Predicted_Weekly_Sales
0,1,1,2012-11-02,23879.451458
1,1,1,2012-11-09,22256.683985
2,1,1,2012-11-16,22606.865411
3,1,1,2012-11-23,31322.974277
4,1,1,2012-11-30,33363.050914


In [12]:
fact_test["Predicted_Weekly_Sales"].isna().sum()



np.int64(0)

In [13]:
fact_test.shape


(115064, 4)