# Sales Forecasting System - Feature Engineering
This notebook creates features for supervised learning models.

In [None]:
import pandas as pd
import numpy as np

# df = pd.read_csv('cleaned_sales_data.csv')
# Demo dataset
dates = pd.date_range('2020-01-01','2023-12-31')
np.random.seed(42)
df = pd.DataFrame({
    'date':dates,
    'sales': np.random.normal(5000,1500,len(dates)).clip(0),
    'quantity': np.random.randint(50,500,len(dates)),
    'price': np.random.uniform(10,100,len(dates)),
    'region': np.random.choice(['North','South','East','West'], len(dates)),
    'product_category': np.random.choice(['Electronics','Clothing','Food','Books'], len(dates))
})


In [None]:
# Time-based features
df['date']=pd.to_datetime(df['date'])
df['year']=df['date'].dt.year
df['month']=df['date'].dt.month
df['dayofweek']=df['date'].dt.dayofweek
# Seasonality encodings
df['sin_month']=np.sin(2*np.pi*df['month']/12)
df['cos_month']=np.cos(2*np.pi*df['month']/12)


In [None]:
# Lag and rolling features
df = df.sort_values('date')
df['sales_lag_7']=df['sales'].shift(7)
df['sales_lag_30']=df['sales'].shift(30)
df['sales_roll_7']=df['sales'].rolling(7).mean()
df['sales_roll_30']=df['sales'].rolling(30).mean()


In [None]:
# Categorical encoding
df = pd.get_dummies(df, columns=['region','product_category'], drop_first=True)
# Drop NA from lags
df = df.dropna().reset_index(drop=True)
print(df.head()); print(df.shape)
