In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv('data/walmart_sales/Walmart_Sales.csv')
df.head()

#### Data Exploration

In [None]:
df.groupby('Store')['Weekly_Sales'].mean().plot(kind='bar')

In [None]:
# Use .copy() to avoid SettingWithCopyWarning
# Subset the DataFrame for Store 4 only
df_store_4 = df[df['Store'] == 4].copy()
df_store_4['Date'] = pd.to_datetime(df_store_4['Date'], dayfirst=True)
plt.figure(figsize=(12, 6))
plt.plot(df_store_4['Date'], df_store_4['Weekly_Sales'])
plt.title('Weekly Sales Over Time, Store 4')
plt.xlabel('Date')
plt.ylabel('Weekly Sales')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Bar chart of average weekly sales by week of year (52 weeks)
df_store_4['weekofyear'] = df_store_4['Date'].dt.isocalendar().week
avg_sales_by_week = df_store_4.groupby('weekofyear')['Weekly_Sales'].mean()
plt.figure(figsize=(14, 6))
plt.bar(avg_sales_by_week.index, avg_sales_by_week.values)
plt.xlabel('Week of Year')
plt.ylabel('Average Weekly Sales')
plt.title('Average Weekly Sales by Week of Year (Store 4)')
plt.xticks(range(1, 53))
plt.tight_layout()
plt.show()

#### Feature Engineering

In [None]:
# add week of year and lagged sales features
store4_fe = df_store_4.copy()
store4_fe['weekofyear'] = store4_fe['Date'].dt.isocalendar().week
store4_fe['lag1'] = store4_fe['Weekly_Sales'].shift(1)
store4_fe['lag2'] = store4_fe['Weekly_Sales'].shift(2)
# Drop rows with NaN (from lagging)
store4_fe = store4_fe.dropna().reset_index(drop=True)
store4_fe.head()

#### Pre-Processing

In [None]:
holdout_size = 10   # number of weeks in holdout set

In [None]:
# Prepare new feature matrix and target for the new model
train_fe = store4_fe.iloc[:-holdout_size]
test_fe = store4_fe.iloc[-holdout_size:]

# Features: Date (ordinal), weekofyear, lag1, lag2
X_train_fe = np.column_stack([
    train_fe['Date'].map(lambda d: d.toordinal()).values,
    train_fe['weekofyear'].values,
    train_fe['lag1'].values,
    train_fe['lag2'].values
])
y_train_fe = train_fe['Weekly_Sales'].values.reshape(-1, 1)
X_test_fe = np.column_stack([
    test_fe['Date'].map(lambda d: d.toordinal()).values,
    test_fe['weekofyear'].values,
    test_fe['lag1'].values,
    test_fe['lag2'].values
])
y_test_fe = test_fe['Weekly_Sales'].values.reshape(-1, 1)

# Scale features and target
x_scaler_fe = MinMaxScaler()
y_scaler_fe = MinMaxScaler()
X_train_fe_scaled = x_scaler_fe.fit_transform(X_train_fe)
y_train_fe_scaled = y_scaler_fe.fit_transform(y_train_fe)
X_test_fe_scaled = x_scaler_fe.transform(X_test_fe)
y_test_fe_scaled = y_scaler_fe.transform(y_test_fe)

print('X_train_fe_scaled shape:', X_train_fe_scaled.shape)
print('y_train_fe_scaled shape:', y_train_fe_scaled.shape)

In [None]:
# Normalize X and y for training
train_df = df_store_4.iloc[:-holdout_size]
test_df = df_store_4.iloc[-holdout_size:]

# Prepare X and y
X_train = train_df['Date'].map(lambda d: d.toordinal()).values.reshape(-1, 1)
y_train = train_df['Weekly_Sales'].values.reshape(-1, 1)
X_test = test_df['Date'].map(lambda d: d.toordinal()).values.reshape(-1, 1)
y_test = test_df['Weekly_Sales'].values.reshape(-1, 1)

# Fit scalers on training data only
x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()
X_train_scaled = x_scaler.fit_transform(X_train)
y_train_scaled = y_scaler.fit_transform(y_train)
X_test_scaled = x_scaler.transform(X_test)
y_test_scaled = y_scaler.transform(y_test)

print('X_train_scaled shape:', X_train_scaled.shape)
print('y_train_scaled shape:', y_train_scaled.shape)

print('X_train_scaled range:', X_train_scaled.min(), X_train_scaled.max())
print('y_train_scaled range:', y_train_scaled.min(), y_train_scaled.max())

In [None]:
# ReLU activation function
def relu(x):
    return np.maximum(0, x)

# Derivative of ReLU activation
def relu_deriv(x):
    return (x > 0).astype(float)

# Mean Squared Error loss function
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# Derivative of Mean Squared Error loss
def mse_loss_deriv(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size

# Simple feedforward neural network with one hidden layer and L2 regularization
class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, lambda_l2=0.0):
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))
        self.lambda_l2 = lambda_l2

    def forward(self, X):
        self.z1 = X @ self.W1 + self.b1
        self.a1 = relu(self.z1)
        self.z2 = self.a1 @ self.W2 + self.b2
        return self.z2

    def backward(self, X, y, y_pred, lr=0.001):
        dz2 = mse_loss_deriv(y, y_pred)
        dW2 = self.a1.T @ dz2 + self.lambda_l2 * self.W2
        db2 = np.sum(dz2, axis=0, keepdims=True)
        da1 = dz2 @ self.W2.T
        dz1 = da1 * relu_deriv(self.z1)
        dW1 = X.T @ dz1 + self.lambda_l2 * self.W1
        db1 = np.sum(dz1, axis=0, keepdims=True)
        self.W2 -= lr * dW2
        self.b2 -= lr * db2
        self.W1 -= lr * dW1
        self.b1 -= lr * db1

In [None]:
l2_lambda = 0.01  # L2 regularization strength, to prevent overfitting
hidden_size = 6   # Number of hidden units (reduced to also prevent overfitting)
epochs = 1000 # Number of training epochs
lr = 0.01 # Learning rate

In [None]:
# Ensure the 'no feature engineering' model is trained and used with 1 feature
nn = SimpleNeuralNetwork(input_size=1, hidden_size=hidden_size, output_size=1, lambda_l2=l2_lambda)

for epoch in range(epochs):
    y_pred = nn.forward(X_train_scaled)
    loss = mse_loss(y_train_scaled, y_pred)
    nn.backward(X_train_scaled, y_train_scaled, y_pred, lr)
    if (epoch + 1) % 200 == 0:
        print(f"[No FE Model] Epoch {epoch+1}, Loss: {loss:.4f}")

# Generate predictions for the holdout set (no feature engineering)
y_test_pred_scaled = nn.forward(X_test_scaled)
y_test_pred = y_scaler.inverse_transform(y_test_pred_scaled)

In [None]:
nn_fe = SimpleNeuralNetwork(input_size=4, hidden_size=hidden_size, output_size=1, lambda_l2=l2_lambda)

for epoch in range(epochs):
    y_pred = nn_fe.forward(X_train_fe_scaled)
    loss = mse_loss(y_train_fe_scaled, y_pred)
    nn_fe.backward(X_train_fe_scaled, y_train_fe_scaled, y_pred, lr)
    if (epoch + 1) % 200 == 0:
        print(f"[FE Model] Epoch {epoch+1}, Loss: {loss:.4f}")

# Evaluate the new model's predictions and metrics on the holdout set
y_test_pred_fe_scaled = nn_fe.forward(X_test_fe_scaled)
y_test_pred_fe = y_scaler_fe.inverse_transform(y_test_pred_fe_scaled)

In [None]:
# Calculate RMSE, RMSE/Mean, and MAPE for the holdout set predictions (no feature engineering)
rmse = mean_squared_error(y_test.flatten(), y_test_pred.flatten())
mean_actual = np.mean(y_test)
rmse_over_mean = rmse / mean_actual
mape = np.mean(np.abs((y_test.flatten() - y_test_pred.flatten()) / y_test.flatten())) * 100
print(f"[No FE] RMSE: {rmse:.2f}")
print(f"[No FE] RMSE / Mean Actual: {rmse_over_mean:.4f}")
print(f"[No FE] MAPE: {mape:.2f}%")

In [None]:
# Metrics
rmse_fe = mean_squared_error(y_test_fe.flatten(), y_test_pred_fe.flatten())
mean_actual_fe = np.mean(y_test_fe)
rmse_over_mean_fe = rmse_fe / mean_actual_fe
mape_fe = np.mean(np.abs((y_test_fe.flatten() - y_test_pred_fe.flatten()) / y_test_fe.flatten())) * 100
print(f"[FE Model] RMSE: {rmse_fe:.2f}")
print(f"[FE Model] RMSE / Mean Actual: {rmse_over_mean_fe:.4f}")
print(f"[FE Model] MAPE: {mape_fe:.2f}%")

### Model Comparison: With and Without Feature Engineering
This section compares the performance of two neural network models:
- **Model 1:** Uses only the date as a feature (no feature engineering)
- **Model 2:** Uses engineered features (date, week of year, lagged sales)
Metrics and plots for both models are shown below.

In [None]:
# Summarize metrics for both models (no FE vs. FE)
print('Model Comparison (Holdout Set):')
print('-' * 40)
print(f"No Feature Engineering:\n  RMSE: {rmse:.2f}\n  RMSE/Mean: {rmse_over_mean:.4f}\n  MAPE: {mape:.2f}%\n")
print(f"With Feature Engineering:\n  RMSE: {rmse_fe:.2f}\n  RMSE/Mean: {rmse_over_mean_fe:.4f}\n  MAPE: {mape_fe:.2f}%\n")

In [None]:
# Plot both models' predictions vs. actuals for the holdout set
plt.figure(figsize=(12, 6))
plt.plot(test_df['Date'], y_test.flatten(), label='Actual', marker='o')
plt.plot(test_df['Date'], y_test_pred.flatten(), label='Predicted (No FE)', marker='x')
plt.plot(test_fe['Date'], y_test_pred_fe.flatten(), label='Predicted (With FE)', marker='s')
plt.title('Actual vs. Predicted Weekly Sales (Holdout Set), Store 4')
plt.xlabel('Date')
plt.ylabel('Weekly Sales')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()