## Load the Clean Data

In [1]:
import pandas as pd
import numpy as np

try:
    master_df = pd.read_csv('../data/cleaned_master_data.csv', parse_dates=['Date'])
    print("Clean master data loaded successfully.")
except FileNotFoundError:
    print("ERROR: 'cleaned_master_data.csv' not found. Please run the first notebook to generate it.")



Clean master data loaded successfully.


## Feature Engineering

In [None]:
print("Starting feature engineering...")

# Sorting values to ensure chronological order for time-series features
master_df = master_df.sort_values(by=['Store', 'Dept', 'Date'], ascending=True)

# Create/ensure basic time features exist
master_df['Month'] = master_df['Date'].dt.month
master_df['Year'] = master_df['Date'].dt.year
master_df['WeekOfYear'] = master_df['Date'].dt.isocalendar().week
master_df['day_of_year'] = master_df['Date'].dt.dayofyear

# Lag features (including a 52-week lag for yearly seasonality)
lags = [1, 2, 3, 4, 52]
for lag in lags:
    master_df[f'sales_lag_{lag}'] = master_df.groupby(['Store', 'Dept'])['Weekly_Sales'].shift(lag)

# Rolling window features
master_df['sales_roll_mean_4'] = master_df.groupby(['Store', 'Dept'])['Weekly_Sales'].shift(1).rolling(window=4).mean()
master_df['sales_roll_std_4'] = master_df.groupby(['Store', 'Dept'])['Weekly_Sales'].shift(1).rolling(window=4).std()

# Interaction feature
master_df['month_holiday_interaction'] = master_df['Month'] * master_df['IsHoliday']

# Droping the initial rows that have NaN values from feature creation
master_df_featured = master_df.dropna()
print("Feature engineering complete. NaN values dropped.")


# --- 3. Saving the Featured Data (Crucial Step) ---
try:
    master_df_featured.to_csv('../data/featured_data.csv', index=False)
    print("\nSUCCESS: Featured data saved to '../data/featured_data.csv'")
    print(f"Final shape of the featured dataset: {master_df_featured.shape}")
except Exception as e:
    print(f"\nERROR: Could not save the file. Error: {e}")

display(master_df_featured.head())

Starting feature engineering...
Feature engineering complete. NaN values dropped.

SUCCESS: Featured data saved to '../data/featured_data.csv'
Final shape of the featured dataset: (260979, 32)


Unnamed: 0,Store,Dept,Date,Weekly_Sales,IsHoliday,Type,Size,Temperature,Fuel_Price,MarkDown1,...,CPI_Category,day_of_year,sales_lag_1,sales_lag_2,sales_lag_3,sales_lag_4,sales_lag_52,sales_roll_mean_4,sales_roll_std_4,month_holiday_interaction
52,1,1,2011-02-04,21665.76,False,A,151315,42.27,2.989,0.0,...,> 200,35,18461.18,17341.47,17359.7,15984.24,24924.5,17286.6475,1013.925386,0
53,1,1,2011-02-11,37887.17,True,A,151315,36.39,3.022,0.0,...,> 200,42,21665.76,18461.18,17341.47,17359.7,46039.49,18707.0275,2040.798674,2
54,1,1,2011-02-18,46845.87,False,A,151315,57.36,3.045,0.0,...,> 200,49,37887.17,21665.76,18461.18,17341.47,41595.55,23838.895,9543.110891,0
55,1,1,2011-02-25,19363.83,False,A,151315,62.9,3.065,0.0,...,> 200,56,46845.87,37887.17,21665.76,18461.18,19403.54,31214.995,13449.781394,0
56,1,1,2011-03-04,20327.61,False,A,151315,59.58,3.288,0.0,...,> 200,63,19363.83,46845.87,37887.17,21665.76,21827.9,31440.6575,13169.111324,0
