<a href="https://colab.research.google.com/github/lkhok22/ML-FinalProject-Walmart-Recruiting---Store-Sales-Forecasting/blob/main/model_experiment_DLinear.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries
!pip install pandas numpy matplotlib seaborn scikit-learn torch torchvision wandb pyyaml darts --quiet
import wandb
wandb.login(key="eccf2c915699fc032ad678daf0fd4b5ac60bf87c")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [75]:
# Mount Google Drive and extract data
from google.colab import drive
import zipfile
import os
drive.mount('/content/drive')
zip_path = '/content/drive/MyDrive/ML-FinalProject/data.zip'
extract_to = '/content/walmart_data/'
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)
for file_name in os.listdir(extract_to):
    if file_name.endswith('.zip'):
        with zipfile.ZipFile(os.path.join(extract_to, file_name), 'r') as zip_ref:
            zip_ref.extractall(extract_to)
print("✅ Extracted files:", os.listdir(extract_to))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Extracted files: ['test.csv.zip', 'features.csv', 'train.csv.zip', 'train.csv', 'features.csv.zip', 'test.csv', 'stores.csv', 'sampleSubmission.csv.zip', 'sampleSubmission.csv']


In [76]:
# Import libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from datetime import timedelta

In [77]:
# Load and preprocess data
train = pd.read_csv('/content/walmart_data/train.csv')
features = pd.read_csv('/content/walmart_data/features.csv')
stores = pd.read_csv('/content/walmart_data/stores.csv')
test = pd.read_csv('/content/walmart_data/test.csv')

# Merge train with features and stores
df = pd.merge(train, features, on=['Store', 'Date'], how='left')
df = pd.merge(df, stores, on='Store', how='left')
df = df.drop(columns=['IsHoliday_x']).rename(columns={'IsHoliday_y': 'IsHoliday'})
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(by=['Store', 'Dept', 'Date'])

# Add holiday-specific features
holiday_dates = {
    'SuperBowl': ['2010-02-12', '2011-02-11', '2012-02-10', '2013-02-08'],
    'LaborDay': ['2010-09-10', '2011-09-09', '2012-09-07', '2013-09-06'],
    'Thanksgiving': ['2010-11-26', '2011-11-25', '2012-11-23', '2013-11-29'],
    'Christmas': ['2010-12-31', '2011-12-30', '2012-12-28', '2013-12-27']
}
for holiday, dates in holiday_dates.items():
    df[holiday] = df['Date'].isin(pd.to_datetime(dates)).astype(int)

# Handle missing values
df['MarkDown1'].fillna(0, inplace=True)
df['MarkDown2'].fillna(0, inplace=True)
df['MarkDown3'].fillna(0, inplace=True)
df['MarkDown4'].fillna(0, inplace=True)
df['MarkDown5'].fillna(0, inplace=True)
df['CPI'].fillna(df['CPI'].mean(), inplace=True)
df['Unemployment'].fillna(df['Unemployment'].mean(), inplace=True)
df['Temperature'].fillna(df['Temperature'].mean(), inplace=True)
df['Fuel_Price'].fillna(df['Fuel_Price'].mean(), inplace=True)
df['IsHoliday'] = df['IsHoliday'].astype(int)
df['Type'] = df['Type'].map({'A': 0, 'B': 1, 'C': 2})
df['Size'].fillna(df['Size'].mean(), inplace=True)

# Check for NaN or inf in data
feature_cols = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment', 'MarkDown1', 'MarkDown2',
                'MarkDown3', 'MarkDown4', 'MarkDown5', 'Size', 'Type', 'IsHoliday',
                'SuperBowl', 'LaborDay', 'Thanksgiving', 'Christmas']
assert not df[['Weekly_Sales'] + feature_cols].isna().any().any(), "NaN values found in data"
assert not df[['Weekly_Sales'] + feature_cols].isin([np.inf, -np.inf]).any().any(), "Inf values found in data"

# Normalize features with robust scaling
scaler_sales = StandardScaler()
scaler_features = StandardScaler()
df['Weekly_Sales'] = scaler_sales.fit_transform(df[['Weekly_Sales']].clip(lower=-1e5, upper=1e5))
df[feature_cols] = scaler_features.fit_transform(df[feature_cols].clip(lower=-1e5, upper=1e5))

# Create time series dictionary with exogenous features
store_dept_pairs = df[['Store', 'Dept']].drop_duplicates()
time_series_dict = {}
for _, row in store_dept_pairs.iterrows():
    store, dept = row['Store'], row['Dept']
    sub_df = df[(df['Store'] == store) & (df['Dept'] == dept)].sort_values('Date')

    # Ensure continuous weekly data
    date_range = pd.date_range(start=sub_df['Date'].min(), end=sub_df['Date'].max(), freq='W-FRI')
    sub_df = sub_df.set_index('Date').reindex(date_range, method='ffill').reset_index()
    sub_df['Store'] = store
    sub_df['Dept'] = dept
    sub_df['Weekly_Sales'].fillna(0, inplace=True)
    sub_df['IsHoliday'].fillna(0, inplace=True)
    sub_df['Type'].fillna(df[df['Store'] == store]['Type'].iloc[0], inplace=True)
    sub_df['Size'].fillna(df[df['Store'] == store]['Size'].iloc[0], inplace=True)
    for holiday in ['SuperBowl', 'LaborDay', 'Thanksgiving', 'Christmas']:
        sub_df[holiday].fillna(0, inplace=True)
    sub_df[feature_cols] = sub_df[feature_cols].fillna(0)

    # Check for NaN or inf after reindexing
    assert not sub_df[['Weekly_Sales'] + feature_cols].isna().any().any(), f"NaN in sub_df for Store {store}, Dept {dept}"
    assert not sub_df[['Weekly_Sales'] + feature_cols].isin([np.inf, -np.inf]).any().any(), f"Inf in sub_df for Store {store}, Dept {dept}"

    time_series_dict[(store, dept)] = {
        'sales': sub_df['Weekly_Sales'].values.astype(np.float32),
        'features': sub_df[feature_cols].values.astype(np.float32),
        'dates': sub_df['index'].values,
        'is_holiday': sub_df['IsHoliday'].values.astype(np.float32)
    }
print(f"Created time series for {len(time_series_dict)} store-department pairs.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

  sub_df['Type'].fillna(df[df['Store'] == store]['Type'].iloc[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  sub_df['Size'].fillna(df[df['Store'] == store]['Size'].iloc[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  sub_df[h

Created time series for 3331 store-department pairs.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  sub_df['Type'].fillna(df[df['Store'] == store]['Type'].iloc[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  sub_df['Size'].fillna(df[df['Store'] == store]['Size'].iloc[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work 

In [82]:
class WalmartSalesDataset(Dataset):
    def __init__(self, time_series_dict, seq_len=36, pred_len=6, train=True):
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.data = []
        self.holiday_weights = []

        for (store, dept), ts_data in time_series_dict.items():
            sales = ts_data['sales']
            features = ts_data['features']
            is_holiday = ts_data['is_holiday']
            n = len(sales)
            if n < seq_len + pred_len:
                continue
            split_idx = int(0.8 * (n - seq_len - pred_len + 1)) if train else 0
            start_idx = 0 if train else split_idx
            end_idx = split_idx if train else n - seq_len - pred_len + 1
            for i in range(start_idx, end_idx):
                x_sales = sales[i:i + seq_len]
                x_features = features[i:i + seq_len]
                y = sales[i + seq_len:i + seq_len + pred_len]
                w = is_holiday[i + seq_len:i + seq_len + pred_len] * 4 + 1
                # Validate data
                if np.any(np.isnan(x_sales)) or np.any(np.isnan(x_features)) or np.any(np.isnan(y)) or np.any(np.isnan(w)):
                    continue
                if np.any(np.isinf(x_sales)) or np.any(np.isinf(x_features)) or np.any(np.isinf(y)) or np.any(np.isinf(w)):
                    continue
                self.data.append((x_sales, x_features, y))
                self.holiday_weights.append(w)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x_sales, x_features, y = self.data[idx]
        w = self.holiday_weights[idx]
        return (torch.tensor(x_sales, dtype=torch.float32).unsqueeze(-1),
                torch.tensor(x_features, dtype=torch.float32),
                torch.tensor(y, dtype=torch.float32),
                torch.tensor(w, dtype=torch.float32))

In [83]:
# Create train and validation datasets
train_dataset = WalmartSalesDataset(time_series_dict, seq_len=36, pred_len=6, train=True)
val_dataset = WalmartSalesDataset(time_series_dict, seq_len=36, pred_len=6, train=False)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
print(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}")

Train samples: 251133, Val samples: 316241


In [84]:
import torch
import torch.nn as nn

class DLinear(nn.Module):
    def __init__(self, seq_len, pred_len, n_features, dropout=0.1):
        super(DLinear, self).__init__()
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.n_features = n_features

        # Trend component with deeper MLP
        self.Trend_MLP = nn.Sequential(
            nn.Linear(seq_len, seq_len),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(seq_len, seq_len // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(seq_len // 2, pred_len)
        )

        # Seasonal component with deeper MLP
        self.Seasonal_MLP = nn.Sequential(
            nn.Linear(seq_len, seq_len),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(seq_len, seq_len // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(seq_len // 2, pred_len)
        )

        # Exogenous component with deeper MLP
        self.Exogenous_MLP = nn.Sequential(
            nn.Linear(seq_len * n_features, 512),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, pred_len)
        )

    def forward(self, x_sales, x_features):
        x_sales = x_sales.squeeze(-1)
        trend = self.Trend_MLP(x_sales)
        seasonal = self.Seasonal_MLP(x_sales)
        x_features = x_features.view(x_features.size(0), -1)
        exogenous = self.Exogenous_MLP(x_features)
        return trend + seasonal + exogenous

In [85]:
# Custom WMAE loss
def wmae_loss(preds, targets, weights):
    return torch.mean(weights * torch.abs(preds - targets))

In [86]:
# Training and validation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DLinear(seq_len=36, pred_len=6, n_features=16).to(device)  # Updated n_features for new holiday features
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)  # Lower learning rate
criterion_mse = nn.MSELoss()
criterion_wmae = wmae_loss

# Initialize WandB
wandb.init(project="walmart-dlinear", name="dlinear-enhanced-run-v2", config={
    "seq_len": 36, "pred_len": 6, "batch_size": 32, "epochs": 20,
    "learning_rate": 0.0001, "model": "DLinear-Enhanced", "n_features": 16
})

# Early stopping variables
best_val_wmae = float('inf')
best_model_state = None
patience = 5
counter = 0

# Training and validation loop
for epoch in range(20):
    model.train()
    train_loss_mse, train_loss_wmae, train_wmae_unscaled = 0.0, 0.0, 0.0
    train_batches = 0
    for xb_sales, xb_features, yb, wb in train_loader:
        xb_sales, xb_features, yb, wb = xb_sales.to(device), xb_features.to(device), yb.to(device), wb.to(device)
        optimizer.zero_grad()
        preds = model(xb_sales, xb_features)
        loss_mse = criterion_mse(preds, yb)
        loss_wmae = criterion_wmae(preds, yb, wb)
        loss = loss_mse + 2 * loss_wmae  # Increase WMAE weight
        if torch.isnan(loss) or torch.isinf(loss):
            continue
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        train_loss_mse += loss_mse.item() * xb_sales.size(0)
        train_loss_wmae += loss_wmae.item() * xb_sales.size(0)
        # Compute unscaled WMAE
        preds_unscaled = scaler_sales.inverse_transform(preds.detach().cpu().numpy())
        yb_unscaled = scaler_sales.inverse_transform(yb.detach().cpu().numpy())
        train_wmae_unscaled += np.mean(wb.cpu().numpy() * np.abs(preds_unscaled - yb_unscaled)) * xb_sales.size(0)
        train_batches += xb_sales.size(0)

    train_loss_mse = train_loss_mse / train_batches if train_batches > 0 else float('nan')
    train_loss_wmae = train_loss_wmae / train_batches if train_batches > 0 else float('nan')
    train_wmae_unscaled = train_wmae_unscaled / train_batches if train_batches > 0 else float('nan')

    # Validation
    model.eval()
    val_loss_mse, val_loss_wmae, val_wmae_unscaled = 0.0, 0.0, 0.0
    val_batches = 0
    with torch.no_grad():
        for xb_sales, xb_features, yb, wb in val_loader:
            xb_sales, xb_features, yb, wb = xb_sales.to(device), xb_features.to(device), yb.to(device), wb.to(device)
            preds = model(xb_sales, xb_features)
            loss_mse = criterion_mse(preds, yb)
            loss_wmae = criterion_wmae(preds, yb, wb)
            if torch.isnan(loss_mse) or torch.isinf(loss_mse) or torch.isnan(loss_wmae) or torch.isinf(loss_wmae):
                continue
            val_loss_mse += loss_mse.item() * xb_sales.size(0)
            val_loss_wmae += loss_wmae.item() * xb_sales.size(0)
            preds_unscaled = scaler_sales.inverse_transform(preds.cpu().numpy())
            yb_unscaled = scaler_sales.inverse_transform(yb.cpu().numpy())
            val_wmae_unscaled += np.mean(wb.cpu().numpy() * np.abs(preds_unscaled - yb_unscaled)) * xb_sales.size(0)
            val_batches += xb_sales.size(0)

    val_loss_mse = val_loss_mse / val_batches if val_batches > 0 else float('nan')
    val_loss_wmae = val_loss_wmae / val_batches if val_batches > 0 else float('nan')
    val_wmae_unscaled = val_wmae_unscaled / val_batches if val_batches > 0 else float('nan')

    print(f"Epoch {epoch+1}/20 — Train MSE: {train_loss_mse:.4f}, Train WMAE: {train_loss_wmae:.4f}, "
          f"Train WMAE Unscaled: {train_wmae_unscaled:.2f}, Val MSE: {val_loss_mse:.4f}, "
          f"Val WMAE: {val_loss_wmae:.4f}, Val WMAE Unscaled: {val_wmae_unscaled:.2f}")
    wandb.log({
        "train_mse": train_loss_mse, "train_wmae": train_loss_wmae, "train_wmae_unscaled": train_wmae_unscaled,
        "val_mse": val_loss_mse, "val_wmae": val_loss_wmae, "val_wmae_unscaled": val_wmae_unscaled, "epoch": epoch+1
    })

    # Early stopping
    if val_loss_wmae < best_val_wmae:
        best_val_wmae = val_loss_wmae
        best_model_state = model.state_dict()
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

# Save best model
if best_model_state is not None:
    torch.save(best_model_state, '/content/drive/MyDrive/ML-FinalProject/dlinear_model_best.pth')
wandb.finish()

Epoch 1/20 — Train MSE: 0.1260, Train WMAE: 0.2779, Train WMAE Unscaled: 5691.32, Val MSE: 0.0677, Val WMAE: 0.1803, Val WMAE Unscaled: 3692.64
Epoch 2/20 — Train MSE: 0.0888, Train WMAE: 0.2369, Train WMAE Unscaled: 4851.89, Val MSE: 0.0653, Val WMAE: 0.1767, Val WMAE Unscaled: 3617.76
Epoch 3/20 — Train MSE: 0.0839, Train WMAE: 0.2279, Train WMAE Unscaled: 4666.60, Val MSE: 0.0638, Val WMAE: 0.1725, Val WMAE Unscaled: 3531.99
Epoch 4/20 — Train MSE: 0.0817, Train WMAE: 0.2216, Train WMAE Unscaled: 4538.23, Val MSE: 0.0626, Val WMAE: 0.1703, Val WMAE Unscaled: 3487.90
Epoch 5/20 — Train MSE: 0.0800, Train WMAE: 0.2183, Train WMAE Unscaled: 4470.77, Val MSE: 0.0624, Val WMAE: 0.1697, Val WMAE Unscaled: 3473.98
Epoch 6/20 — Train MSE: 0.0789, Train WMAE: 0.2151, Train WMAE Unscaled: 4404.80, Val MSE: 0.0605, Val WMAE: 0.1628, Val WMAE Unscaled: 3333.84
Epoch 7/20 — Train MSE: 0.0785, Train WMAE: 0.2128, Train WMAE Unscaled: 4357.16, Val MSE: 0.0623, Val WMAE: 0.1654, Val WMAE Unscaled: 

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_mse,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_wmae,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
train_wmae_unscaled,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_mse,█▆▅▄▄▃▄▄▃▃▃▂▂▂▂▃▂▁▁▁
val_wmae,█▇▆▅▅▃▄▄▃▄▃▃▂▃▂▃▂▁▂▁
val_wmae_unscaled,█▇▆▅▅▃▄▄▃▄▃▃▂▃▂▃▂▁▂▁

0,1
epoch,20.0
train_mse,0.07527
train_wmae,0.2026
train_wmae_unscaled,4148.68262
val_mse,0.05852
val_wmae,0.15646
val_wmae_unscaled,3203.87842


In [87]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import os
import wandb

# Initialize WandB for submission tracking
wandb.init(project="walmart-dlinear", name="submission-run-v2", config={
    "seq_len": 36, "pred_len": 6, "model": "DLinear-Enhanced", "n_features": 16
})

# Step 1: Load and preprocess test data
test = pd.read_csv('/content/walmart_data/test.csv')
features = pd.read_csv('/content/walmart_data/features.csv')
stores = pd.read_csv('/content/walmart_data/stores.csv')

# Merge test with features and stores
test_df = pd.merge(test, features, on=['Store', 'Date'], how='left')
test_df = pd.merge(test_df, stores, on='Store', how='left')
test_df = test_df.drop(columns=['IsHoliday_x']).rename(columns={'IsHoliday_y': 'IsHoliday'})
test_df['Date'] = pd.to_datetime(test_df['Date'])
test_df = test_df.sort_values(by=['Store', 'Dept', 'Date'])

# Add holiday-specific features
holiday_dates = {
    'SuperBowl': ['2010-02-12', '2011-02-11', '2012-02-10', '2013-02-08'],
    'LaborDay': ['2010-09-10', '2011-09-09', '2012-09-07', '2013-09-06'],
    'Thanksgiving': ['2010-11-26', '2011-11-25', '2012-11-23', '2013-11-29'],
    'Christmas': ['2010-12-31', '2011-12-30', '2012-12-28', '2013-12-27']
}
for holiday, dates in holiday_dates.items():
    test_df[holiday] = test_df['Date'].isin(pd.to_datetime(dates)).astype(int)

# Handle missing values consistently with training
test_df['MarkDown1'].fillna(0, inplace=True)
test_df['MarkDown2'].fillna(0, inplace=True)
test_df['MarkDown3'].fillna(0, inplace=True)
test_df['MarkDown4'].fillna(0, inplace=True)
test_df['MarkDown5'].fillna(0, inplace=True)
test_df['CPI'].fillna(df['CPI'].mean(), inplace=True)  # Use training mean
test_df['Unemployment'].fillna(df['Unemployment'].mean(), inplace=True)
test_df['Temperature'].fillna(df['Temperature'].mean(), inplace=True)
test_df['Fuel_Price'].fillna(df['Fuel_Price'].mean(), inplace=True)
test_df['IsHoliday'] = test_df['IsHoliday'].astype(int)
test_df['Type'] = test_df['Type'].map({'A': 0, 'B': 1, 'C': 2})
test_df['Size'].fillna(df['Size'].mean(), inplace=True)

# Scale features using the same scaler as training
feature_cols = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment', 'MarkDown1', 'MarkDown2',
                'MarkDown3', 'MarkDown4', 'MarkDown5', 'Size', 'Type', 'IsHoliday',
                'SuperBowl', 'LaborDay', 'Thanksgiving', 'Christmas']
test_df[feature_cols] = scaler_features.transform(test_df[feature_cols].clip(lower=-1e5, upper=1e5))

# Check for NaN or inf
assert not test_df[feature_cols].isna().any().any(), "NaN values in test data"
assert not test_df[feature_cols].isin([np.inf, -np.inf]).any().any(), "Inf values in test data"

# Step 2: Prepare test sequences
def create_test_sequences(test_df, time_series_dict, seq_len=36):
    test_data = []
    test_ids = []
    store_dept_pairs = test_df[['Store', 'Dept']].drop_duplicates()

    for _, row in store_dept_pairs.iterrows():
        store, dept = row['Store'], row['Dept']
        test_sub_df = test_df[(test_df['Store'] == store) & (test_df['Dept'] == dept)].sort_values('Date')

        if len(test_sub_df) == 0:
            continue

        # Get historical data from training
        if (store, dept) in time_series_dict:
            train_sales = time_series_dict[(store, dept)]['sales']
            train_features = time_series_dict[(store, dept)]['features']
            train_dates = time_series_dict[(store, dept)]['dates']
        else:
            # Fallback for unseen store-dept pairs
            train_sales = np.zeros(seq_len, dtype=np.float32)
            train_features = np.zeros((seq_len, len(feature_cols)), dtype=np.float32)
            train_dates = test_sub_df['Date'].min() - pd.Timedelta(weeks=seq_len)
            train_dates = pd.date_range(end=train_dates, periods=seq_len, freq='W-FRI')

        # For each test date
        for date in test_sub_df['Date'].unique():
            test_row = test_sub_df[test_sub_df['Date'] == date]
            if len(test_row) == 0:
                continue

            # Get the last seq_len weeks from training data
            x_sales = train_sales[-seq_len:] if len(train_sales) >= seq_len else np.pad(train_sales, (seq_len - len(train_sales), 0), mode='constant')
            x_features = train_features[-seq_len:] if len(train_features) >= seq_len else np.pad(train_features, ((seq_len - len(train_features), 0), (0, 0)), mode='constant')

            # Replace the last week's features with test week's features
            test_features = test_row[feature_cols].values.astype(np.float32)
            if len(test_features) > 0:
                x_features[-1] = test_features[0]

            # Create Id for submission
            date_str = date.strftime('%Y-%m-%d')
            test_id = f"{int(store)}_{int(dept)}_{date_str}"

            test_data.append((x_sales, x_features))
            test_ids.append(test_id)

    return test_data, test_ids

# Create test sequences
test_data, test_ids = create_test_sequences(test_df, time_series_dict, seq_len=36)

# Define test dataset
class TestDataset(Dataset):
    def __init__(self, test_data):
        self.test_data = test_data

    def __len__(self):
        return len(self.test_data)

    def __getitem__(self, idx):
        x_sales, x_features = self.test_data[idx]
        return (torch.tensor(x_sales, dtype=torch.float32).unsqueeze(-1),
                torch.tensor(x_features, dtype=torch.float32))

test_dataset = TestDataset(test_data)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Step 3: Load the best model and generate predictions
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DLinear(seq_len=36, pred_len=6, n_features=16).to(device)
model.load_state_dict(torch.load('/content/drive/MyDrive/ML-FinalProject/dlinear_model_best.pth'))
model.eval()

predictions = []
with torch.no_grad():
    for xb_sales, xb_features in test_loader:
        xb_sales, xb_features = xb_sales.to(device), xb_features.to(device)
        preds = model(xb_sales, xb_features)  # Shape: (batch_size, pred_len)
        preds = preds[:, 0]  # Take first week prediction
        predictions.append(preds.cpu().numpy())

predictions = np.concatenate(predictions, axis=0)

# Step 4: Inverse transform predictions
predictions = scaler_sales.inverse_transform(predictions.reshape(-1, 1)).flatten()

# Step 5: Create submission file
submission = pd.DataFrame({
    'Id': test_ids,
    'Weekly_Sales': predictions
})

# Clip negative predictions
submission['Weekly_Sales'] = submission['Weekly_Sales'].clip(lower=0)

# Save submission file
submission_path = '/content/submission.csv'
submission.to_csv(submission_path, index=False)
print(f"✅ Submission file created: {submission_path}")

# Log submission to WandB
wandb.save(submission_path)

# Log summary statistics
wandb.log({
    "submission_mean_sales": submission['Weekly_Sales'].mean(),
    "submission_std_sales": submission['Weekly_Sales'].std(),
    "submission_min_sales": submission['Weekly_Sales'].min(),
    "submission_max_sales": submission['Weekly_Sales'].max()
})

wandb.finish()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df['MarkDown1'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df['MarkDown2'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

✅ Submission file created: /content/submission.csv


0,1
submission_max_sales,▁
submission_mean_sales,▁
submission_min_sales,▁
submission_std_sales,▁

0,1
submission_max_sales,101130.73438
submission_mean_sales,15886.2959
submission_min_sales,0.0
submission_std_sales,20232.91211


# public score 4906