In [10]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import joblib
import xgboost as xgb
from sklearn.neural_network import MLPClassifier

In [2]:
# --- Load Data ---
df = pd.read_csv('../data/processed/ecommerce_sales_with_embeddings.csv')
X = df.drop(columns=['success'])
y = df['success']

In [3]:
# --- Train-Test Split ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [4]:
# --- Base Model 1: XGBoost ---
xgb_model = xgb.XGBClassifier(
    n_estimators=300, learning_rate=0.05, max_depth=6, subsample=0.8, colsample_bytree=0.8, random_state=42
)
xgb_model.fit(X_train, y_train)
xgb_preds_train = xgb_model.predict_proba(X_train)[:, 1]
xgb_preds_test = xgb_model.predict_proba(X_test)[:, 1]

In [5]:
# --- Base Model 2: Neural Network (MLP) ---
mlp = MLPClassifier(hidden_layer_sizes=(256, 128), activation='relu', solver='adam', max_iter=20, random_state=42)
mlp.fit(X_train, y_train)
mlp_preds_train = mlp.predict_proba(X_train)[:, 1]
mlp_preds_test = mlp.predict_proba(X_test)[:, 1]



In [6]:
# --- Stacking: Meta Model ---
stack_train = np.column_stack((xgb_preds_train, mlp_preds_train))
stack_test = np.column_stack((xgb_preds_test, mlp_preds_test))

meta_model = LogisticRegression()
meta_model.fit(stack_train, y_train)
final_preds = meta_model.predict(stack_test)
final_proba = meta_model.predict_proba(stack_test)[:, 1]

In [7]:
# --- Metrics ---
acc = accuracy_score(y_test, final_preds)
f1 = f1_score(y_test, final_preds)
roc_auc = roc_auc_score(y_test, final_proba)

In [11]:
#create directories if they don't exist
models_dir = '../models'
os.makedirs(os.path.join(models_dir, 'xgboost'), exist_ok=True)
os.makedirs(os.path.join(models_dir, 'nn'), exist_ok=True)
os.makedirs(os.path.join(models_dir, 'meta'), exist_ok=True)

# --- Save Models ---
joblib.dump(xgb_model, '../models/xgboost/xgb_model.pkl')
joblib.dump(mlp, '../models/nn/mlp_model.pkl')
joblib.dump(meta_model, '../models/meta/meta_model.pkl')
print("✅ All models saved for deployment.")

✅ All models saved for deployment.
