In [1]:
# feature_engineering.py

import pandas as pd

# Load cleaned EDA dataset
df = pd.read_csv("retail_data_final.csv", parse_dates=["Order Date", "Ship Date"])

# Drop irrelevant or zero-value columns
df = df.drop(columns=["Deaths_per_million"], errors="ignore")

# --- Feature Engineering ---

# Extract week, month, year, weekday, and day name from Order Date
df["Order_Week"] = df["Order Date"].dt.isocalendar().week.astype(str)
df["Order_Year"] = df["Order Date"].dt.year
df["Order_Month"] = df["Order Date"].dt.month
df["Day_Of_Week"] = df["Order Date"].dt.dayofweek

df["Day_Name"] = df["Order Date"].dt.day_name()
df["is_weekend"] = df["Day_Of_Week"].isin([5, 6]).astype(int)

# Flag holidays
df["is_holiday"] = df["holiday_name"].notna().astype(int)

# Monthly Aggregation for Forecasting
df_monthly = df.groupby(["Order_Year", "Order_Month"]).agg({"Sales": "sum"}).reset_index()
df_monthly["Date"] = pd.to_datetime(df_monthly["Order_Year"].astype(str) + "-" + df_monthly["Order_Month"].astype(str) + "-01")
df_monthly = df_monthly.sort_values("Date")

# Save updated dataset and monthly aggregates
df.to_csv("retail_data_featured.csv", index=False)
df_monthly.to_csv("monthly_sales.csv", index=False)

print("✅ Feature engineering complete. Files saved:")
print("- retail_data_featured.csv")
print("- monthly_sales.csv")


✅ Feature engineering complete. Files saved:
- retail_data_featured.csv
- monthly_sales.csv
