# B5W8: Fraud Detection - Model Building and Evaluation

**Interim 2 Submission**

This notebook focuses on:
1. Model building and training
2. Handling class imbalance
3. Model evaluation with appropriate metrics
4. SHAP explainability implementation

**Author:** Kirubel Gizaw  
**Challenge:** B5W8 — Tenx Platform


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    classification_report, confusion_matrix, roc_auc_score, 
    precision_recall_curve, average_precision_score, f1_score
)

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

import shap
import xgboost as xgb
import lightgbm as lgb

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("All libraries imported successfully!")


All libraries imported successfully!


In [2]:
fraud_data = pd.read_csv('../data/Fraud_Data.csv')
ip_country_data = pd.read_csv('../data/IpAddress_to_Country.csv')
credit_data = pd.read_csv('../data/creditcard.csv')

print(f"Fraud Data shape: {fraud_data.shape}")
print(f"IP Country Data shape: {ip_country_data.shape}")
print(f"Credit Card Data shape: {credit_data.shape}")


Fraud Data shape: (151112, 11)
IP Country Data shape: (138846, 3)
Credit Card Data shape: (284807, 31)


In [None]:
fraud_data['signup_time'] = pd.to_datetime(fraud_data['signup_time'])
fraud_data['purchase_time'] = pd.to_datetime(fraud_data['purchase_time'])

fraud_data['time_since_signup'] = (fraud_data['purchase_time'] - fraud_data['signup_time']).dt.total_seconds()
fraud_data['hour_of_day'] = fraud_data['purchase_time'].dt.hour
fraud_data['day_of_week'] = fraud_data['purchase_time'].dt.dayofweek

def ip_to_int(ip):
    if isinstance(ip, str) and ip.count('.') == 3:
        parts = ip.split('.')
        try:
            return int(parts[0]) * 256**3 + int(parts[1]) * 256**2 + int(parts[2]) * 256 + int(parts[3])
        except ValueError:
            return -1  # handle non-integer parts gracefully
    return -1  # fallback for NaN or malformed IPs

fraud_data['ip_int'] = fraud_data['ip_address'].apply(ip_to_int)


def find_country(ip_int, ip_ranges):
    for _, row in ip_ranges.iterrows():
        if row['lower_bound_ip_address'] <= ip_int <= row['upper_bound_ip_address']:
            return row['country']
    return 'Unknown'

fraud_data['country'] = fraud_data['ip_int'].apply(lambda x: find_country(x, ip_country_data))


In [None]:
# Select relevant features
ecommerce_features = [
    'purchase_value', 'time_since_signup', 'hour_of_day', 'day_of_week',
    'age', 'source', 'browser', 'sex', 'country'
]

# Encode categorical variables
le_source = LabelEncoder()
le_browser = LabelEncoder()
le_sex = LabelEncoder()
le_country = LabelEncoder()

fraud_data['source_encoded'] = le_source.fit_transform(fraud_data['source'])
fraud_data['browser_encoded'] = le_browser.fit_transform(fraud_data['browser'])
fraud_data['sex_encoded'] = le_sex.fit_transform(fraud_data['sex'])
fraud_data['country_encoded'] = le_country.fit_transform(fraud_data['country'])

# Final feature list
ecommerce_final_features = [
    'purchase_value', 'time_since_signup', 'hour_of_day', 'day_of_week',
    'age', 'source_encoded', 'browser_encoded', 'sex_encoded', 'country_encoded'
]

X_ecommerce = fraud_data[ecommerce_final_features]
y_ecommerce = fraud_data['class']

print(f"E-commerce features shape: {X_ecommerce.shape}")
print(f"E-commerce target distribution:\n{y_ecommerce.value_counts(normalize=True)}")


In [None]:
# Drop unwanted columns
credit_features = [col for col in credit_data.columns if col not in ['Time', 'Class']]

X_credit = credit_data[credit_features]
y_credit = credit_data['Class']

print(f"Credit card features shape: {X_credit.shape}")
print(f"Credit card target distribution:\n{y_credit.value_counts(normalize=True)}")


In [None]:
# Initialize SMOTE and RandomUnderSampler
smote = SMOTE(random_state=42)
rus = RandomUnderSampler(random_state=42)

print("Sampling techniques initialized!")


In [None]:
models = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100),
    'XGBoost': xgb.XGBClassifier(random_state=42, eval_metric='logloss'),
    'LightGBM': lgb.LGBMClassifier(random_state=42, verbose=-1)
}

print("Models initialized!")


In [None]:
def evaluate_model(y_true, y_pred, y_prob, model_name, dataset_name):
    """
    Comprehensive evaluation function for imbalanced classification
    """
    auc_roc = roc_auc_score(y_true, y_prob)
    auc_pr = average_precision_score(y_true, y_prob)
    f1 = f1_score(y_true, y_pred)
    report = classification_report(y_true, y_pred, output_dict=True)
    cm = confusion_matrix(y_true, y_pred)

    return {
        'model': model_name,
        'dataset': dataset_name,
        'auc_roc': auc_roc,
        'auc_pr': auc_pr,
        'f1_score': f1,
        'precision': report['1']['precision'],
        'recall': report['1']['recall'],
        'confusion_matrix': cm
    }

print("Evaluation function defined!")


In [None]:
# Split and scale
X_train_ecom, X_test_ecom, y_train_ecom, y_test_ecom = train_test_split(
    X_ecommerce, y_ecommerce, test_size=0.2, random_state=42, stratify=y_ecommerce
)

scaler_ecom = StandardScaler()
X_train_ecom_scaled = scaler_ecom.fit_transform(X_train_ecom)
X_test_ecom_scaled = scaler_ecom.transform(X_test_ecom)

# Apply SMOTE
X_train_ecom_balanced, y_train_ecom_balanced = smote.fit_resample(X_train_ecom_scaled, y_train_ecom)

# Train and evaluate
ecommerce_results = []

for name, model in models.items():
    print(f"Training {name} on e-commerce data...")
    model.fit(X_train_ecom_balanced, y_train_ecom_balanced)
    y_pred = model.predict(X_test_ecom_scaled)
    y_prob = model.predict_proba(X_test_ecom_scaled)[:, 1]
    results = evaluate_model(y_test_ecom, y_pred, y_prob, name, 'E-commerce')
    ecommerce_results.append(results)
    print(f"{name} - AUC-ROC: {results['auc_roc']:.4f}, F1: {results['f1_score']:.4f}")


In [None]:
# Split and scale
X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split(
    X_credit, y_credit, test_size=0.2, random_state=42, stratify=y_credit
)

scaler_credit = StandardScaler()
X_train_credit_scaled = scaler_credit.fit_transform(X_train_credit)
X_test_credit_scaled = scaler_credit.transform(X_test_credit)

# Apply SMOTE
X_train_credit_balanced, y_train_credit_balanced = smote.fit_resample(X_train_credit_scaled, y_train_credit)

# Train and evaluate
credit_results = []

for name, model in models.items():
    print(f"Training {name} on credit card data...")
    model.fit(X_train_credit_balanced, y_train_credit_balanced)
    y_pred = model.predict(X_test_credit_scaled)
    y_prob = model.predict_proba(X_test_credit_scaled)[:, 1]
    results = evaluate_model(y_test_credit, y_pred, y_prob, name, 'Credit Card')
    credit_results.append(results)
    print(f"{name} - AUC-ROC: {results['auc_roc']:.4f}, F1: {results['f1_score']:.4f}")


In [None]:
results_df = pd.DataFrame(ecommerce_results + credit_results)

fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# AUC-ROC
for dataset in ['E-commerce', 'Credit Card']:
    data = results_df[results_df['dataset'] == dataset]
    axes[0, 0].bar(data['model'], data['auc_roc'], label=dataset, alpha=0.7)
axes[0, 0].set_title('AUC-ROC Score Comparison')
axes[0, 0].legend()

# F1 Score
for dataset in ['E-commerce', 'Credit Card']:
    data = results_df[results_df['dataset'] == dataset]
    axes[0, 1].bar(data['model'], data['f1_score'], label=dataset, alpha=0.7)
axes[0, 1].set_title('F1 Score Comparison')
axes[0, 1].legend()

# Precision vs Recall
for dataset in ['E-commerce', 'Credit Card']:
    data = results_df[results_df['dataset'] == dataset]
    axes[1, 0].scatter(data['precision'], data['recall'], s=100, alpha=0.7, label=dataset)
axes[1, 0].set_title('Precision vs Recall')
axes[1, 0].legend()

# AUC-PR
for dataset in ['E-commerce', 'Credit Card']:
    data = results_df[results_df['dataset'] == dataset]
    axes[1, 1].bar(data['model'], data['auc_pr'], label=dataset, alpha=0.7)
axes[1, 1].set_title('AUC-PR Score Comparison')
axes[1, 1].legend()

plt.tight_layout()
plt.show()


In [None]:
best_ecom_model_name = results_df[results_df['dataset'] == 'E-commerce']['model'].iloc[
    results_df[results_df['dataset'] == 'E-commerce']['f1_score'].idxmax()
]

best_credit_model_name = results_df[results_df['dataset'] == 'Credit Card']['model'].iloc[
    results_df[results_df['dataset'] == 'Credit Card']['f1_score'].idxmax()
]

print(f"Best E-commerce model: {best_ecom_model_name}")
print(f"Best Credit Card model: {best_credit_model_name}")


In [None]:
print("\n=== SHAP Analysis for E-commerce Data ===")

best_ecom_model = models[best_ecom_model_name]
best_ecom_model.fit(X_train_ecom_balanced, y_train_ecom_balanced)

if hasattr(best_ecom_model, 'feature_importances_'):
    explainer = shap.TreeExplainer(best_ecom_model)
    shap_values = explainer.shap_values(X_test_ecom_scaled[:100])
else:
    explainer = shap.LinearExplainer(best_ecom_model, X_train_ecom_balanced)
    shap_values = explainer.shap_values(X_test_ecom_scaled[:100])

plt.figure(figsize=(10, 8))
shap.summary_plot(shap_values, X_test_ecom_scaled[:100], feature_names=ecommerce_final_features, show=False)
plt.title(f'SHAP Summary Plot - {best_ecom_model_name} (E-commerce)')
plt.tight_layout()
plt.show()


In [None]:
print("\n=== FINAL MODEL SELECTION ===")

ecom_best = results_df[results_df['dataset'] == 'E-commerce'].loc[
    results_df[results_df['dataset'] == 'E-commerce']['f1_score'].idxmax()
]
credit_best = results_df[results_df['dataset'] == 'Credit Card'].loc[
    results_df[results_df['dataset'] == 'Credit Card']['f1_score'].idxmax()
]

print(f"\nE-commerce Dataset:")
print(f"Best Model: {ecom_best['model']}")
print(f"F1 Score: {ecom_best['f1_score']:.4f}")
print(f"AUC-ROC: {ecom_best['auc_roc']:.4f}")
print(f"AUC-PR: {ecom_best['auc_pr']:.4f}")
print(f"Precision: {ecom_best['precision']:.4f}")
print(f"Recall: {ecom_best['recall']:.4f}")

print(f"\nCredit Card Dataset:")
print(f"Best Model: {credit_best['model']}")
print(f"F1 Score: {credit_best['f1_score']:.4f}")
print(f"AUC-ROC: {credit_best['auc_roc']:.4f}")
print(f"AUC-PR: {credit_best['auc_pr']:.4f}")
print(f"Precision: {credit_best['precision']:.4f}")
print(f"Recall: {credit_best['recall']:.4f}")


In [None]:
import joblib
import os

os.makedirs('../models', exist_ok=True)

joblib.dump(results_df, '../models/model_results.pkl')

best_ecom_model = models[ecom_best['model']]
best_ecom_model.fit(X_train_ecom_balanced, y_train_ecom_balanced)
joblib.dump(best_ecom_model, '../models/best_ecommerce_model.pkl')
joblib.dump(scaler_ecom, '../models/ecommerce_scaler.pkl')

best_credit_model = models[credit_best['model']]
best_credit_model.fit(X_train_credit_balanced, y_train_credit_balanced)
joblib.dump(best_credit_model, '../models/best_credit_model.pkl')
joblib.dump(scaler_credit, '../models/credit_scaler.pkl')

print("Models and results saved successfully!")


In [None]:
## 11. Interim 2 Summary

### Key Achievements:
1. ✅ Built and trained Logistic Regression, Random Forest, XGBoost, and LightGBM models
2. ✅ Applied SMOTE to handle class imbalance
3. ✅ Evaluated models with AUC-ROC, AUC-PR, F1, Precision, and Recall
4. ✅ Used SHAP for explainability
5. ✅ Identified and saved best-performing models

### Next Steps:
1. 🔄 Write a final report (PDF or markdown)
2. 🔄 Build a dashboard or web application
3. 🔄 Add real-time prediction capability
4. 🔄 Write deployment and usage documentation
