In [9]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import VotingClassifier

from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

import matplotlib.pyplot as plt

# === 1. Load & Prepare Training Data ===
df = pd.read_csv('train.csv')

# Select only the required base columns
base_cols = [
    "user_id", "total_orders", "total_returns", "days_to_return_avg",
    "high_value_returns", "category_return_ratio", "exchange_ratio", "damaged_returns"
]

df = df[base_cols + ['label']].copy()
df['user_id'] = df['user_id'].astype(str).str[-5:]
df['label'] = df['label'].astype(int)

# === 2. Feature Engineering ===
df['return_rate'] = df['total_returns'] / (df['total_orders'] + 1e-5)
df['fast_return_flag'] = (df['days_to_return_avg'] < 3).astype(int)

# Drop leaky features
df.drop(columns=['user_id', 'high_value_returns', 'damaged_returns'], inplace=True, errors='ignore')

# Encode categoricals (if any)
cat_cols = ['location', 'device_fingerprint']
cat_cols = [col for col in cat_cols if col in df.columns]  # Handle absence
ordinal_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
if cat_cols:
    df[cat_cols] = ordinal_encoder.fit_transform(df[cat_cols])
    joblib.dump(ordinal_encoder, 'ordinal_encoder.pkl')

# Define features and target
X = df.drop(columns=['label'])
y = df['label']

joblib.dump(X.columns.tolist(), 'feature_columns.pkl')

# Scale features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, 'scaler.pkl')

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42
)

# === 3. Initialize Models ===
cat = CatBoostClassifier(verbose=0)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
lgb = LGBMClassifier()

# Fit individual models
cat.fit(X_train, y_train)
xgb.fit(X_train, y_train)
lgb.fit(X_train, y_train)

# Save individual models (optional)
joblib.dump(cat, 'catboost_model.pkl')
joblib.dump(xgb, 'xgboost_model.pkl')
joblib.dump(lgb, 'lightgbm_model.pkl')

# Evaluate individual models
for name, model in zip(['CatBoost', 'XGBoost', 'LightGBM'], [cat, xgb, lgb]):
    preds = model.predict(X_test)
    print(f"\n{name} Accuracy: {accuracy_score(y_test, preds):.4f}")
    print(f"{name} Classification Report:\n{classification_report(y_test, preds)}")

# === 4. Voting Ensemble ===
ensemble = VotingClassifier(
    estimators=[('cat', cat), ('xgb', xgb), ('lgb', lgb)],
    voting='soft'
)
ensemble.fit(X_train, y_train)
joblib.dump(ensemble, 'voting_ensemble_model.pkl')

# Evaluate ensemble
ensemble_preds = ensemble.predict(X_test)
print(f"\n🧠 Voting Ensemble Accuracy: {accuracy_score(y_test, ensemble_preds):.4f}")
print(f"Voting Ensemble Classification Report:\n{classification_report(y_test, ensemble_preds)}")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 640, number of negative: 7360
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1052
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080000 -> initscore=-2.442347
[LightGBM] [Info] Start training from score -2.442347

CatBoost Accuracy: 1.0000
CatBoost Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1840
           1       1.00      1.00      1.00       160

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000


XGBoost Accuracy: 0.9995
XGBoost Classification Report:
              precision    recall  f1-score   support

           0 

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [10]:
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import accuracy_score, classification_report

# === 1. Load & Prepare Test Data ===
test_df = pd.read_csv("test.csv")

# Select same base columns
base_cols = [
    "user_id", "total_orders", "total_returns", "days_to_return_avg",
    "high_value_returns", "category_return_ratio", "exchange_ratio", "damaged_returns"
]
test_df = test_df[base_cols + (['label'] if 'label' in test_df.columns else [])].copy()
test_df['user_id'] = test_df['user_id'].astype(str).str[-5:]

# Feature engineering
test_df['return_rate'] = test_df['total_returns'] / (test_df['total_orders'] + 1e-5)
test_df['fast_return_flag'] = (test_df['days_to_return_avg'] < 3).astype(int)

# Save user IDs
user_ids = test_df['user_id']

# Drop leaky features
test_df.drop(columns=['user_id', 'high_value_returns', 'damaged_returns'], inplace=True, errors='ignore')

# Load and apply encoder
cat_cols = ['location', 'device_fingerprint']
cat_cols = [col for col in cat_cols if col in test_df.columns]
if cat_cols:
    ordinal_encoder = joblib.load('ordinal_encoder.pkl')
    test_df[cat_cols] = ordinal_encoder.transform(test_df[cat_cols])

# Extract true labels if available
has_label = 'label' in test_df.columns
if has_label:
    y_test_true = test_df['label'].astype(int)

# Prepare features
X_test_raw = test_df.drop(columns=['label'], errors='ignore')

# === Fix: Align columns with training ===
expected_cols = joblib.load("feature_columns.pkl")

# Add missing columns
for col in expected_cols:
    if col not in X_test_raw.columns:
        X_test_raw[col] = 0

# Reorder columns
X_test_raw = X_test_raw[expected_cols]

# Load scaler and scale
scaler = joblib.load('scaler.pkl')
X_test_scaled = scaler.transform(X_test_raw)

# Load ensemble model
ensemble = joblib.load('voting_ensemble_model.pkl')

# Predict
preds = ensemble.predict(X_test_scaled)

# Evaluate if ground truth exists
if has_label:
    acc = accuracy_score(y_test_true, preds)
    print(f"\n✅ Ensemble Accuracy on test.csv: {acc:.4f}")
    print(f"Classification Report:\n{classification_report(y_test_true, preds)}")

# Save predictions
results_df = pd.DataFrame({
    'user_id': user_ids,
    'prediction': preds
})

if has_label:
    results_df['True_Label'] = y_test_true

print("\n🔍 Sample Predictions:")
print(results_df.head(10))

# Optional: Save
# results_df.to_csv("final_test_predictions.csv", index=False)





✅ Ensemble Accuracy on test.csv: 0.9700
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      2530
           1       1.00      0.81      0.89       470

    accuracy                           0.97      3000
   macro avg       0.98      0.90      0.94      3000
weighted avg       0.97      0.97      0.97      3000


🔍 Sample Predictions:
  user_id  prediction  True_Label
0   00028           0           0
1   00830           0           0
2   00501           0           0
3   01967           0           0
4   01636           0           0
5   02444           0           0
6   00148           1           1
7   01197           0           0
8   00012           0           0
9   01622           0           0
