In [1]:
import pandas as pd
import numpy as np

In [3]:
# --- Load Preprocessed Data ---
df = pd.read_csv('../data/processed/ecommerce_sales_preprocessed.csv')

In [4]:
# --- Load Raw Data to Get Original Columns (for monthly sales) ---
raw_df = pd.read_csv('../data/raw/ecommerce_sales.csv')
sales_cols = [col for col in raw_df.columns if 'sales_month' in col]

In [5]:
# Merge raw sales with processed features
df[sales_cols] = raw_df[sales_cols]

# --- Feature: Sales Variability ---
df['sales_variability'] = raw_df[sales_cols].std(axis=1)

In [6]:
# --- Feature: Sales Trend (growth ratio) ---
first_3_months = raw_df[sales_cols[:3]].mean(axis=1)
last_3_months = raw_df[sales_cols[-3:]].mean(axis=1)
df['sales_trend'] = np.where(first_3_months > 0, last_3_months / first_3_months, 1)

In [7]:
# --- Feature: Price Bucket ---
price_bins = [-np.inf, raw_df['price'].quantile(0.33), raw_df['price'].quantile(0.66), np.inf]
price_labels = ['Low', 'Medium', 'High']
df['price_bucket'] = pd.cut(raw_df['price'], bins=price_bins, labels=price_labels)

In [8]:
# One-hot encode price bucket
df = pd.get_dummies(df, columns=['price_bucket'], drop_first=True)

# --- Save Enhanced Dataset ---
df.to_csv('../data/processed/ecommerce_sales_featured.csv', index=False)