In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
import mlflow
import mlflow.sklearn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import sys
import os

In [2]:
sys.path.append(os.path.abspath('../scripts'))

In [3]:
from model_building_and_training import (
    separate_features_target_creditcard,
    separate_features_target_fraud,
    split_train_test,
    train_and_evaluate_model,get_column_types
)
from data_analysis_and_preprocessing import correct_data_types

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Load the datasets
creditcard_df = pd.read_csv('../data/creditcard.csv')
merged_fraud_df = pd.read_csv('../data/merged_fraud_data.csv')

# Correct data types for fraud data
merged_fraud_df1 = correct_data_types(merged_fraud_df)

In [5]:
merged_fraud_df1.head(5)

Unnamed: 0,user_id,signup_time,purchase_time,purchase_value,device_id,source,browser,sex,age,ip_address,class,lower_bound_ip_address,upper_bound_ip_address,country,time_to_purchase,hour_of_day,day_of_week
0,247547,2015-06-28 03:00:34,2015-08-09 03:57:29,47,KIXYSVCHIPQBR,SEO,Safari,F,30,16778864,0,16778240.0,16779263.0,Australia,3632215.0,3,6
1,220737,2015-01-28 14:21:11,2015-02-11 20:28:28,15,PKYOWQKWGJNJI,SEO,Chrome,F,34,16842045,0,16809984.0,16842751.0,Thailand,1231637.0,20,2
2,390400,2015-03-19 20:49:09,2015-04-11 23:41:23,44,LVCSXLISZHVUO,Ads,IE,M,29,16843656,0,16843264.0,16843775.0,China,1997534.0,23,5
3,69592,2015-02-24 06:11:57,2015-05-23 16:40:14,55,UHAUHNXXUADJE,Direct,Chrome,F,30,16938732,0,16924672.0,16941055.0,China,7640897.0,16,5
4,174987,2015-07-07 12:58:11,2015-11-03 04:04:30,51,XPGPMOHIDRMGE,SEO,Chrome,F,37,16971984,0,16941056.0,16973823.0,Thailand,10249579.0,4,1


In [6]:
# Drop datetime columns from fraud data
fraud_df_cleaned = merged_fraud_df1.drop(columns=['signup_time', 'purchase_time'])

In [7]:
fraud_df_cleaned.dtypes


user_id                      int64
purchase_value               int32
device_id                 category
source                    category
browser                   category
sex                       category
age                          int32
ip_address                   int32
class                        int64
lower_bound_ip_address     float64
upper_bound_ip_address     float64
country                     object
time_to_purchase           float64
hour_of_day                  int64
day_of_week                  int64
dtype: object

In [8]:
# Function to split the data into train and test sets
# For credit card data
X_creditcard, y_creditcard = separate_features_target_creditcard(creditcard_df)
X_creditcard_train, X_creditcard_test, y_creditcard_train, y_creditcard_test = split_train_test(X_creditcard, y_creditcard)

# For fraud data (merged fraud data)
X_fraud, y_fraud = separate_features_target_fraud(fraud_df_cleaned)
X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test = split_train_test(X_fraud, y_fraud)

In [9]:
# Print the shapes to verify the splits
print(f"Credit card train shape: {X_creditcard_train.shape}, Credit card test shape: {X_creditcard_test.shape}")
print(f"Fraud data train shape: {X_fraud_train.shape}, Fraud data test shape: {X_fraud_test.shape}")

Credit card train shape: (227845, 30), Credit card test shape: (56962, 30)
Fraud data train shape: (103316, 14), Fraud data test shape: (25830, 14)


In [10]:
numerical_columns_fraud = ['age', 'ip_address', 'purchase_value', 'lower_bound_ip_address', 'upper_bound_ip_address', 'time_to_purchase', 'hour_of_day', 'day_of_week']
categorical_columns_fraud = ['device_id', 'source', 'browser', 'sex', 'country']

In [11]:
preprocessor_fraud = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns_fraud),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_columns_fraud)
    ])


In [12]:
# Initialize models
logistic_regression = LogisticRegression(max_iter=1000)
decision_tree = DecisionTreeClassifier()
random_forest = RandomForestClassifier()
gradient_boosting = GradientBoostingClassifier()
mlp = MLPClassifier(max_iter=1000)


In [13]:
# Define a list of models
models = [logistic_regression, decision_tree, random_forest,gradient_boosting, mlp]

# Preprocess the training and test data for fraud dataset
X_fraud_train_transformed = preprocessor_fraud.fit_transform(X_fraud_train)
X_fraud_test_transformed = preprocessor_fraud.transform(X_fraud_test)

In [14]:
X_fraud_test_transformed

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 312654 stored elements and shape (25830, 95649)>

In [None]:
mlflow.set_experiment("Fraud Detection Models_v1")

<Experiment: artifact_location='file:///c:/Users/ftesfaye/Desktop/KIFIYA/KIFIYA_PROJECT_WEEK_8/Improved-Fraud-Detection-for-E-commerce-and-Bank-Transactions/notebooks/mlruns/845907924946011961', creation_time=1729753292179, experiment_id='845907924946011961', last_update_time=1729753292179, lifecycle_stage='active', name='Fraud Detection Models_v1', tags={}>

In [16]:
# Train and evaluate models for fraud data
for model in models:
    with mlflow.start_run(run_name=f"{model.__class__.__name__}_fraud"):
        print(f"Training and evaluating {model.__class__.__name__} on fraud data...")
        trained_model = train_and_evaluate_model(model, X_fraud_train_transformed, X_fraud_test_transformed, y_fraud_train, y_fraud_test)
        
        # Log model and performance metrics to MLflow
        mlflow.sklearn.log_model(trained_model, f"{model.__class__.__name__}_fraud_model")
        y_pred = trained_model.predict(X_fraud_test_transformed)
        acc = accuracy_score(y_fraud_test, y_pred)
        precision = precision_score(y_fraud_test, y_pred)
        recall = recall_score(y_fraud_test, y_pred)
        f1 = f1_score(y_fraud_test, y_pred)
        
        # Log metrics
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)
        mlflow.log_params(model.get_params())

Training and evaluating LogisticRegression on fraud data...
LogisticRegression Accuracy: 0.9524
              precision    recall  f1-score   support

           0       0.95      1.00      0.97     23427
           1       0.96      0.51      0.67      2403

    accuracy                           0.95     25830
   macro avg       0.96      0.75      0.82     25830
weighted avg       0.95      0.95      0.95     25830





Training and evaluating DecisionTreeClassifier on fraud data...
DecisionTreeClassifier Accuracy: 0.9525
              precision    recall  f1-score   support

           0       0.96      0.99      0.97     23427
           1       0.90      0.55      0.68      2403

    accuracy                           0.95     25830
   macro avg       0.93      0.77      0.83     25830
weighted avg       0.95      0.95      0.95     25830





Training and evaluating RandomForestClassifier on fraud data...
RandomForestClassifier Accuracy: 0.9567
              precision    recall  f1-score   support

           0       0.95      1.00      0.98     23427
           1       1.00      0.54      0.70      2403

    accuracy                           0.96     25830
   macro avg       0.98      0.77      0.84     25830
weighted avg       0.96      0.96      0.95     25830





Training and evaluating GradientBoostingClassifier on fraud data...
GradientBoostingClassifier Accuracy: 0.9568
              precision    recall  f1-score   support

           0       0.95      1.00      0.98     23427
           1       1.00      0.54      0.70      2403

    accuracy                           0.96     25830
   macro avg       0.98      0.77      0.84     25830
weighted avg       0.96      0.96      0.95     25830





Training and evaluating MLPClassifier on fraud data...
MLPClassifier Accuracy: 0.9499
              precision    recall  f1-score   support

           0       0.96      0.99      0.97     23427
           1       0.86      0.55      0.67      2403

    accuracy                           0.95     25830
   macro avg       0.91      0.77      0.82     25830
weighted avg       0.95      0.95      0.94     25830





In [15]:
# Adjust this based on the actual columns in the credit card dataset
numerical_columns_creditcard = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15',
                                'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']

In [16]:
preprocessor_creditcard = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns_creditcard)
    ])

In [17]:
# Preprocess and train for credit card data
X_creditcard_train_transformed = preprocessor_creditcard.fit_transform(X_creditcard_train)
X_creditcard_test_transformed = preprocessor_creditcard.transform(X_creditcard_test)

In [18]:
X_creditcard_test_transformed

array([[ -8.43953559,   5.18252006, -12.31324996, ...,  -5.02350367,
         -3.16294175,   1.0815138 ],
       [  0.1730534 ,  -1.65602032,  -0.08791401, ...,   0.10210496,
          0.30863538,   1.69317258],
       [  0.71421679,  -0.35634288,   0.11194078, ...,   0.02847206,
          0.013272  ,  -0.22547388],
       ...,
       [  0.02018331,  -0.06601034,  -0.83557264, ...,   0.89063984,
          1.0738269 ,  -0.31714621],
       [ -0.25325795,   0.59873588,   1.10429699, ...,   0.10094377,
         -0.81883767,  -0.33299371],
       [ -0.81263064,   0.59930001,  -0.33800407, ...,   0.31303049,
         -1.8421089 ,   3.2080607 ]])

In [21]:
mlflow.set_experiment("Credit Card Fraud Detection Models_v1")


<Experiment: artifact_location='file:///c:/Users/ftesfaye/Desktop/KIFIYA/KIFIYA_PROJECT_WEEK_8/Improved-Fraud-Detection-for-E-commerce-and-Bank-Transactions/notebooks/mlruns/794011329361410109', creation_time=1729753314429, experiment_id='794011329361410109', last_update_time=1729753314429, lifecycle_stage='active', name='Credit Card Fraud Detection Models_v1', tags={}>

In [23]:
# Train and evaluate models for credit card data
for model in models:
    with mlflow.start_run(run_name=f"{model.__class__.__name__}_creditcard"):
        print(f"Training and evaluating {model.__class__.__name__} on credit card data...")
        trained_model = train_and_evaluate_model(model, X_creditcard_train_transformed, X_creditcard_test_transformed, y_creditcard_train, y_creditcard_test)
        
        # Log model and performance metrics to MLflow
        mlflow.sklearn.log_model(trained_model, f"{model.__class__.__name__}_creditcard_model")
        y_pred = trained_model.predict(X_creditcard_test_transformed)
        acc = accuracy_score(y_creditcard_test, y_pred)
        precision = precision_score(y_creditcard_test, y_pred)
        recall = recall_score(y_creditcard_test, y_pred)
        f1 = f1_score(y_creditcard_test, y_pred)
        
        # Log metrics
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)
        mlflow.log_params(model.get_params())

Training and evaluating LogisticRegression on credit card data...
LogisticRegression Accuracy: 0.9991
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.86      0.57      0.69        98

    accuracy                           1.00     56962
   macro avg       0.93      0.79      0.84     56962
weighted avg       1.00      1.00      1.00     56962





Training and evaluating DecisionTreeClassifier on credit card data...
DecisionTreeClassifier Accuracy: 0.9991
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.73      0.79      0.76        98

    accuracy                           1.00     56962
   macro avg       0.87      0.89      0.88     56962
weighted avg       1.00      1.00      1.00     56962





Training and evaluating RandomForestClassifier on credit card data...
RandomForestClassifier Accuracy: 0.9996
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.96      0.78      0.86        98

    accuracy                           1.00     56962
   macro avg       0.98      0.89      0.93     56962
weighted avg       1.00      1.00      1.00     56962





Training and evaluating GradientBoostingClassifier on credit card data...
GradientBoostingClassifier Accuracy: 0.9989
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.74      0.60      0.66        98

    accuracy                           1.00     56962
   macro avg       0.87      0.80      0.83     56962
weighted avg       1.00      1.00      1.00     56962





Training and evaluating MLPClassifier on credit card data...
MLPClassifier Accuracy: 0.9994
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.81      0.83      0.82        98

    accuracy                           1.00     56962
   macro avg       0.90      0.91      0.91     56962
weighted avg       1.00      1.00      1.00     56962





In [19]:
import pickle

# Load the   model from the specified path
model_path = r'C:\Users\ftesfaye\Desktop\KIFIYA\KIFIYA_PROJECT_WEEK_8\Improved-Fraud-Detection-for-E-commerce-and-Bank-Transactions\notebooks\DecisionTreeClassifier().pkl'
with open(model_path, 'rb') as file:
    model = pickle.load(file)

# Now you can use this `model` for SHAP and LIME analysis
print("Model loaded successfully for explainability tasks.")

Model loaded successfully for explainability tasks.


In [20]:
# Function to split the data into train and test sets
# For credit card data
X_creditcard, y_creditcard = separate_features_target_creditcard(creditcard_df)
X_creditcard_train, X_creditcard_test, y_creditcard_train, y_creditcard_test = split_train_test(X_creditcard, y_creditcard)

# For fraud data (merged fraud data)
X_fraud, y_fraud = separate_features_target_fraud(fraud_df_cleaned)
X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test = split_train_test(X_fraud, y_fraud)

In [None]:
import pickle
import shap
import matplotlib.pyplot as plt

# Load the model
model_path = r'C:\Users\ftesfaye\Desktop\KIFIYA\KIFIYA_PROJECT_WEEK_8\Improved-Fraud-Detection-for-E-commerce-and-Bank-Transactions\notebooks\DecisionTreeClassifier().pkl'
with open(model_path, 'rb') as file:
    model = pickle.load(file)

# Separate features and target for credit card data
X_creditcard, y_creditcard = separate_features_target_creditcard(creditcard_df)
X_creditcard_train, X_creditcard_test, y_creditcard_train, y_creditcard_test = split_train_test(X_creditcard, y_creditcard)

# Initialize SHAP
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_creditcard_test)

# SHAP Summary Plot
shap.summary_plot(shap_values[1] if len(shap_values) > 1 else shap_values, X_creditcard_test)
plt.show()

# Force Plot (single prediction)
if len(shap_values) > 1:
    shap.force_plot(explainer.expected_value[1], shap_values[1][0, :], X_creditcard_test.iloc[0, :])
else:
    shap.force_plot(explainer.expected_value, shap_values[0, :], X_creditcard_test.iloc[0, :])

# Dependence Plot for a specific feature (e.g., 'age')
shap.dependence_plot("purchase_value", shap_values[1] if len(shap_values) > 1 else shap_values, X_creditcard_test)
plt.show()



In [22]:
import pickle
import lime
import lime.lime_tabular
import numpy as np

# Load the saved model
model_path = r'C:\Users\ftesfaye\Desktop\KIFIYA\KIFIYA_PROJECT_WEEK_8\Improved-Fraud-Detection-for-E-commerce-and-Bank-Transactions\notebooks\DecisionTreeClassifier().pkl'
with open(model_path, 'rb') as file:
    model = pickle.load(file)

# Separate features and target for fraud data
X_fraud, y_fraud = separate_features_target_fraud(fraud_df_cleaned)
X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test = split_train_test(X_fraud, y_fraud)

# Initialize LIME Explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    training_data=X_fraud_train.values,
    feature_names=X_fraud_train.columns,
    class_names=['Non-Fraud', 'Fraud'],
    mode='classification'
)

# Pick an instance to explain (e.g., first instance in the test set)
i = 0
exp = explainer.explain_instance(
    data_row=X_fraud_test.iloc[i],
    predict_fn=model.predict_proba
)

# Display the explanation for this instance
exp.show_in_notebook()  # If using Jupyter, or use exp.as_list() for a list output
print(exp.as_list())  # Prints the feature importance for the instance


  ret[feature] = int(self.lambdas[feature](ret[feature]))
  ret[feature] = int(self.lambdas[feature](ret[feature]))
  binary_column = (inverse_column == first_row[column]).astype(int)


ValueError: X has 14 features, but DecisionTreeClassifier is expecting 95649 features as input.