In [11]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import VotingClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report

## Load & Prepare Training Data

In [14]:
df = pd.read_csv('train.csv')

In [16]:
# Select only the required base columns
base_cols = [
    "user_id", "total_orders", "total_returns", "days_to_return_avg",
    "high_value_returns", "category_return_ratio", "exchange_ratio", "damaged_returns"
]

In [18]:
df = df[base_cols + ['label']].copy()
df['user_id'] = df['user_id'].astype(str).str[-5:]
df['label'] = df['label'].astype(int)

## Feature Engineering

In [21]:
df['return_rate'] = df['total_returns'] / (df['total_orders'] + 1e-5)
df['fast_return_flag'] = (df['days_to_return_avg'] < 3).astype(int)

In [23]:
# Drop leaky features
df.drop(columns=['user_id', 'high_value_returns', 'damaged_returns'], inplace=True, errors='ignore')

In [25]:
# Encode categoricals 
cat_cols = ['location', 'device_fingerprint']
cat_cols = [col for col in cat_cols if col in df.columns]  # Handle absence
ordinal_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
if cat_cols:
    df[cat_cols] = ordinal_encoder.fit_transform(df[cat_cols])
    joblib.dump(ordinal_encoder, 'ordinal_encoder.pkl')

In [27]:
# Define features and target
X = df.drop(columns=['label'])
y = df['label']


In [29]:
joblib.dump(X.columns.tolist(), 'feature_columns.pkl')

['feature_columns.pkl']

In [31]:
# Scale features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

## Train/test split

In [34]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42
)

## Training

In [37]:
cat = CatBoostClassifier(verbose=0)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
lgb = LGBMClassifier()

cat.fit(X_train, y_train)
xgb.fit(X_train, y_train)
lgb.fit(X_train, y_train)

joblib.dump(cat, 'catboost_model.pkl')
joblib.dump(xgb, 'xgboost_model.pkl')
joblib.dump(lgb, 'lightgbm_model.pkl')

[LightGBM] [Info] Number of positive: 640, number of negative: 7360
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000289 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1052
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080000 -> initscore=-2.442347
[LightGBM] [Info] Start training from score -2.442347


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


['lightgbm_model.pkl']

In [38]:
# Evaluate models
for name, model in zip(['CatBoost', 'XGBoost', 'LightGBM'], [cat, xgb, lgb]):
    preds = model.predict(X_test)
    print(f"\n{name} Accuracy: {accuracy_score(y_test, preds):.4f}")
    print(f"{name} Classification Report:\n{classification_report(y_test, preds)}")



CatBoost Accuracy: 1.0000
CatBoost Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1840
           1       1.00      1.00      1.00       160

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000


XGBoost Accuracy: 0.9995
XGBoost Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1840
           1       1.00      0.99      1.00       160

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000


LightGBM Accuracy: 0.9995
LightGBM Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1840
           1       0.99      1.00      1.00       160

    acc



In [39]:
# Voting Ensemble 
ensemble = VotingClassifier(
    estimators=[('cat', cat), ('xgb', xgb), ('lgb', lgb)],
    voting='soft'
)
ensemble.fit(X_train, y_train)
joblib.dump(ensemble, 'voting_ensemble_model.pkl')

[LightGBM] [Info] Number of positive: 640, number of negative: 7360
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1052
[LightGBM] [Info] Number of data points in the train set: 8000, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080000 -> initscore=-2.442347
[LightGBM] [Info] Start training from score -2.442347


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


['voting_ensemble_model.pkl']

In [40]:
# Evaluate ensemble
ensemble_preds = ensemble.predict(X_test)
print(f"\n🧠 Voting Ensemble Accuracy: {accuracy_score(y_test, ensemble_preds):.4f}")
print(f"Voting Ensemble Classification Report:\n{classification_report(y_test, ensemble_preds)}")



🧠 Voting Ensemble Accuracy: 1.0000
Voting Ensemble Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1840
           1       1.00      1.00      1.00       160

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000





## Load & Prepare Test Data

In [42]:
test_df = pd.read_csv("test.csv")

In [43]:
# Same process
base_cols = [
    "user_id", "total_orders", "total_returns", "days_to_return_avg",
    "high_value_returns", "category_return_ratio", "exchange_ratio", "damaged_returns"
]
test_df = test_df[base_cols + (['label'] if 'label' in test_df.columns else [])].copy()
test_df['user_id'] = test_df['user_id'].astype(str).str[-5:]

test_df['return_rate'] = test_df['total_returns'] / (test_df['total_orders'] + 1e-5)
test_df['fast_return_flag'] = (test_df['days_to_return_avg'] < 3).astype(int)

# Save user IDs
user_ids = test_df['user_id']

test_df.drop(columns=['user_id', 'high_value_returns', 'damaged_returns'], inplace=True, errors='ignore')

cat_cols = ['location', 'device_fingerprint']
cat_cols = [col for col in cat_cols if col in test_df.columns]
if cat_cols:
    ordinal_encoder = joblib.load('ordinal_encoder.pkl')
    test_df[cat_cols] = ordinal_encoder.transform(test_df[cat_cols])

has_label = 'label' in test_df.columns
if has_label:
    y_test_true = test_df['label'].astype(int)

X_test_raw = test_df.drop(columns=['label'], errors='ignore')

expected_cols = joblib.load("feature_columns.pkl")

for col in expected_cols:
    if col not in X_test_raw.columns:
        X_test_raw[col] = 0

X_test_raw = X_test_raw[expected_cols]

scaler = joblib.load('scaler.pkl')
X_test_scaled = scaler.transform(X_test_raw)


In [44]:
ensemble = joblib.load('voting_ensemble_model.pkl')

# Predict
preds = ensemble.predict(X_test_scaled)



In [45]:
# Evaluate
if has_label:
    acc = accuracy_score(y_test_true, preds)
    print(f"\n Ensemble Accuracy on test.csv: {acc:.4f}")
    print(f"Classification Report:\n{classification_report(y_test_true, preds)}")

results_df = pd.DataFrame({
    'user_id': user_ids,
    'prediction': preds
})

if has_label:
    results_df['True_Label'] = y_test_true

print("\n Sample Predictions:")
print(results_df.head(10))


 Ensemble Accuracy on test.csv: 0.9700
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      2530
           1       1.00      0.81      0.89       470

    accuracy                           0.97      3000
   macro avg       0.98      0.90      0.94      3000
weighted avg       0.97      0.97      0.97      3000


 Sample Predictions:
  user_id  prediction  True_Label
0   00028           0           0
1   00830           0           0
2   00501           0           0
3   01967           0           0
4   01636           0           0
5   02444           0           0
6   00148           1           1
7   01197           0           0
8   00012           0           0
9   01622           0           0
