In [None]:
# -----------------------------
# 1️⃣ Imports
# -----------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error

# -----------------------------
# 2️⃣ Load dataset
# -----------------------------
df = pd.read_csv('../data/p3rocessed/data.csv')

# Encode categorical features
for col in ['Gender', 'Income', 'Customer_Segment', 'Product_Category',
            'Shipping_Method', 'Payment_Method']:
    df[col] = df[col].astype('category').cat.codes

# Extract temporal features
df['Date'] = pd.to_datetime(df['Date'])
df['Day'] = df['Date'].dt.day
df['Weekday'] = df['Date'].dt.weekday
df['Hour'] = pd.to_datetime(df['Time']).dt.hour

# -----------------------------
# 3️⃣ Select predictors and target
# -----------------------------
X = df[['Year','Month','Day','Weekday','Hour','Product_Category','Customer_Segment',
        'Shipping_Method','Payment_Method','Amount','Total_Amount','Ratings','Age']]
y = df['Total_Purchases']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -----------------------------
# 4️⃣ Train Random Forest Regressor
# -----------------------------
rf = RandomForestRegressor(n_estimators=500, max_depth=12, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("Random Forest Regressor")
print("R² Score:", round(r2_score(y_test, y_pred_rf), 3))
print("MAE:", round(mean_absolute_error(y_test, y_pred_rf), 3))

# -----------------------------
# 5️⃣ Train XGBoost Regressor
# -----------------------------
xgb = XGBRegressor(n_estimators=500, max_depth=6, learning_rate=0.1, random_state=42)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

print("\nXGBoost Regressor")
print("R² Score:", round(r2_score(y_test, y_pred_xgb), 3))
print("MAE:", round(mean_absolute_error(y_test, y_pred_xgb), 3))


In [None]:
# -----------------------------
# 1️⃣ Imports
# -----------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
import lightgbm as lgb

# -----------------------------
# 2️⃣ Load dataset
# -----------------------------
df = pd.read_csv('../data/p4rocessed/data.csv')

# Encode categorical features
for col in ['Gender','Income','Customer_Segment','Product_Category','Shipping_Method','Payment_Method']:
    df[col] = df[col].astype('category').cat.codes

# Temporal features
df['Date'] = pd.to_datetime(df['Date'])
df['Day'] = df['Date'].dt.day
df['Weekday'] = df['Date'].dt.weekday
df['Hour'] = pd.to_datetime(df['Time'], format='%H:%M:%S').dt.hour

# -----------------------------
# 3️⃣ Rolling features (7-day average purchases per Product_Category)
# -----------------------------
df = df.sort_values('Date')
df['Rolling_7d'] = df.groupby('Product_Category')['Total_Purchases'].transform(lambda x: x.rolling(7, min_periods=1).mean())

# -----------------------------
# 4️⃣ Interaction feature
# -----------------------------
df['Segment_Product'] = df['Customer_Segment'] * df['Product_Category']

# -----------------------------
# 5️⃣ Predictors and target
# -----------------------------
X = df[['Year','Month','Day','Weekday','Hour','Product_Category','Customer_Segment',
        'Shipping_Method','Payment_Method','Amount','Total_Amount','Ratings','Age',
        'Rolling_7d','Segment_Product']]
y = df['Total_Purchases']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -----------------------------
# 6️⃣ Train LightGBM Regressor
# -----------------------------
lgbm = lgb.LGBMRegressor(n_estimators=1000, learning_rate=0.05, max_depth=10, random_state=42)
lgbm.fit(X_train, y_train)
y_pred = lgbm.predict(X_test)

# Metrics
print("LightGBM Regressor")
print("R² Score:", round(r2_score(y_test, y_pred), 3))
print("MAE:", round(mean_absolute_error(y_test, y_pred), 3))
