In [1]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [3]:
#Dataframe
dataset =pd.read_csv('trans_cleaned_df.csv')


In [4]:
#faire une copie 
df = dataset.copy()

In [5]:
# Features and target
x = df.drop('isFraud', axis=1)
y = df['isFraud']

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

# Apply class weighting
class_weights = 'balanced'


In [6]:
# Random Forest Classifier
rf_model = RandomForestClassifier(class_weight=class_weights)
rf_cv_scores = cross_val_score(rf_model, X_train, y_train, cv=5, scoring='accuracy')

# Decision Tree Classifier
dt_model = DecisionTreeClassifier(class_weight=class_weights)
dt_cv_scores = cross_val_score(dt_model, X_train, y_train, cv=5, scoring='accuracy')

# K-Nearest Neighbors Classifier
knn_model = KNeighborsClassifier()
knn_cv_scores = cross_val_score(knn_model, X_train, y_train, cv=5, scoring='accuracy')


In [7]:
print("Random Forest Cross-Validation Scores:", rf_cv_scores)
print("Decision Tree Cross-Validation Scores:", dt_cv_scores)
print("K-Nearest Neighbors Cross-Validation Scores:", knn_cv_scores)


Random Forest Cross-Validation Scores: [0.98574498 0.98570519 0.98573503 0.98574498 0.98580467]
Decision Tree Cross-Validation Scores: [0.97405646 0.97374809 0.97394704 0.9736685  0.97349939]
K-Nearest Neighbors Cross-Validation Scores: [0.98574498 0.9856853  0.98569524 0.98560571 0.98567535]


In [11]:
# Train the models on the full training set
rf_model.fit(X_train, y_train)
dt_model.fit(X_train, y_train)
knn_model.fit(X_train, y_train)

# Make predictions on the test set
rf_pred = rf_model.predict(X_test)
dt_pred = dt_model.predict(X_test)
knn_pred = knn_model.predict(X_test)

# Evaluer le modele de random forest
rf_accuracy = accuracy_score(y_test, rf_pred)
rf_precision = precision_score(y_test, rf_pred)
rf_recall = recall_score(y_test, rf_pred)
rf_pred_prob = rf_model.predict_proba(X_test)[:, 1]  # Compute predicted probabilities for the positive class
rf_roc_auc = roc_auc_score(y_test, rf_pred_prob)

print("Random Forest Test :")
print("Accuracy:", rf_accuracy)
print("Precision:", rf_precision)
print("Recall:", rf_recall)
print("ROC AUC:", rf_roc_auc)
print('____________________________')

# Evaluer le modele de decision tree
dt_accuracy = accuracy_score(y_test, dt_pred)
dt_precision = precision_score(y_test, dt_pred)
dt_recall = recall_score(y_test, dt_pred)
dt_pred_prob = dt_model.predict_proba(X_test)[:, 1]  # Compute predicted probabilities for the positive class
dt_roc_auc = roc_auc_score(y_test, dt_pred_prob)

print("Decision Tree Test :")
print("Accuracy:", dt_accuracy)
print("Precision:", dt_precision)
print("Recall:", dt_recall)
print("ROC AUC:", dt_roc_auc)
print('____________________________')

# Evaluer le modele de K-Nearest Neighbors
knn_accuracy = accuracy_score(y_test, knn_pred)
knn_precision = precision_score(y_test, knn_pred)
knn_recall = recall_score(y_test, knn_pred)
knn_pred_prob = knn_model.predict_proba(X_test)[:, 1]  # Compute predicted probabilities for the positive class
knn_roc_auc = roc_auc_score(y_test, knn_pred_prob)

print("K-Nearest Neighbors Test :")
print("Accuracy:", knn_accuracy)
print("Precision:", knn_precision)
print("Recall:", knn_recall)
print("ROC AUC:", knn_roc_auc)

# Print the test set accuracies


Random Forest Test :
Accuracy: 0.9856515303442678
Precision: 0.09090909090909091
Recall: 0.0005574136008918618
ROC AUC: 0.6808397728590878
____________________________
Decision Tree Test :
Accuracy: 0.9734358337710293
Precision: 0.05835240274599542
Recall: 0.056856187290969896
ROC AUC: 0.5217837094822091
____________________________
K-Nearest Neighbors Test :
Accuracy: 0.9855799073676169
Precision: 0.0
Recall: 0.0
ROC AUC: 0.5351081441968466
