In [9]:
!pip install xgboost



In [10]:
!pip install lightgbm



In [11]:
import pandas as pd
import numpy as np
import joblib

In [12]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE  # Install: pip install imbalanced-learn

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

In [14]:
df_sample = pd.read_csv("OneDrive/Desktop/NPCI_Project/processed_data.csv")

if "Is Fraud?" not in df_sample.columns:
    raise ValueError("Target column 'Is Fraud?' is missing!")

X = df_sample.drop(columns=["Is Fraud?"])
y = df_sample["Is Fraud?"]

# Spliting Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Appling SMOTE to balance the dataset
smote = SMOTE(sampling_strategy=1.0, random_state=42)  # Adjust ratio if needed
X_train, y_train = smote.fit_resample(X_train, y_train)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Selected Models & Hyperparameters
models = {
    "Logistic Regression": (LogisticRegression(class_weight='balanced'), {
        "C": [0.1, 1, 10],
        "solver": ["liblinear"]
    })

}

In [15]:
# Training & Evaluating Models
best_models = {}

for name, (model, params) in models.items():
    print(f"\nTraining {name} with Hyperparameter Tuning...")

    grid_search = GridSearchCV(model, params, cv=3, scoring="roc_auc", n_jobs=-1)
    grid_search.fit(X_train_scaled, y_train)

    best_model = grid_search.best_estimator_
    best_models[name] = best_model

    y_pred = best_model.predict(X_test_scaled)
    y_prob = best_model.predict_proba(X_test_scaled)[:, 1]

    print(f"\nBest Model for {name}: {grid_search.best_params_}")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(f"ROC-AUC Score: {roc_auc_score(y_test, y_prob):.4f}")
    print(f"Classification Report:\n{classification_report(y_test, y_pred, zero_division=1)}")

#  Best Models
for name, model in best_models.items():
    joblib.dump(model, f"best_{name.replace(' ', '_')}.pkl")

print("\nModel training & hyperparameter tuning completed. Best model saved.")


Training Logistic Regression with Hyperparameter Tuning...

Best Model for Logistic Regression: {'C': 10, 'solver': 'liblinear'}
Accuracy: 0.9046
ROC-AUC Score: 0.9147
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.90      0.95    487131
           1       0.01      0.77      0.02       607

    accuracy                           0.90    487738
   macro avg       0.50      0.84      0.48    487738
weighted avg       1.00      0.90      0.95    487738


Model training & hyperparameter tuning completed. Best models saved.
