In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
import xgboost as xgb

In [3]:
# --- Load Combined Dataset ---
df = pd.read_csv('../data/processed/ecommerce_sales_with_embeddings.csv')

In [4]:
# --- Split Features and Target ---
X = df.drop(columns=['success'])
y = df['success']

In [5]:
# --- Train-Test Split ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [6]:
# --- XGBoost Model ---
model = xgb.XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    eval_metric='logloss'
)


In [7]:
# Train model
model.fit(X_train, y_train)

# --- Predictions ---
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

In [8]:
# --- Metrics ---
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_proba)

In [10]:
# --- Save Model ---
import joblib
import os

# Create models directory if it doesn't exist
models_dir = '../models/xgboost'
os.makedirs(models_dir, exist_ok=True)

# Save the model
model_path = os.path.join(models_dir, 'xgb_model.pkl')
joblib.dump(model, model_path)


['../models/xgboost\\xgb_model.pkl']