# The wisdom of the crowd

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
#Dataframe
dataset =pd.read_csv('trans_cleaned_df.csv')


In [3]:
#faire une copie 
df = dataset.copy()

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier

from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier


# Features and target
x = df.drop('isFraud', axis=1)
y = df['isFraud']

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)


# voting

In [5]:
# Créer les modèles
model_1 = SGDClassifier(random_state=0)
model_2 = DecisionTreeClassifier(random_state=0)
model_3 = KNeighborsClassifier(n_neighbors=2)

model_4 = VotingClassifier([('SGD', model_1),
                            ('Tree', model_2),
                            ('KNN', model_3)],
                          voting='hard')

# Liste des modèles
models = [model_1, model_2, model_3, model_4]

# Entraîner et évaluer chaque modèle
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)
    
    print(f"Model: {model.__class__.__name__}")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"ROC AUC: {roc_auc:.2f}")
    print("=" * 30)

Model: SGDClassifier
Accuracy: 0.99
Precision: 0.00
Recall: 0.00
ROC AUC: 0.50
Model: DecisionTreeClassifier
Accuracy: 0.97
Precision: 0.06
Recall: 0.07
ROC AUC: 0.53
Model: KNeighborsClassifier
Accuracy: 0.99
Precision: 0.07
Recall: 0.00
ROC AUC: 0.50
Model: VotingClassifier
Accuracy: 0.99
Precision: 0.12
Recall: 0.00
ROC AUC: 0.50


# bagging

In [6]:
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.model_selection import train_test_split
import pandas as pd


# Instancier les modèles individuels
model_1 = SGDClassifier(random_state=0)
model_2 = DecisionTreeClassifier(random_state=0)
model_3 = KNeighborsClassifier(n_neighbors=2)

# Instancier le BaggingClassifier en utilisant les modèles individuels
bagging_model = BaggingClassifier(base_estimator=None, n_estimators=10, random_state=0)

# Entraîner le BaggingClassifier
bagging_model.fit(X_train, y_train)

# Faire des prédictions avec le BaggingClassifier
y_pred = bagging_model.predict(X_test)

# Calculer et afficher les métriques pour le modèle ensembliste
print("Bagging Classifier Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_pred))


Bagging Classifier Metrics:
Accuracy: 0.9861182272319713
Precision: 0.3559322033898305
Recall: 0.0122306348281887
ROC AUC: 0.5059610903756594


# stacking

In [7]:
# features
x = df.drop('isFraud', axis=1)
# target
y = df['isFraud']

# Diviser l'ensemble d'entraînement en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

# Instancier les modèles individuels
model_1 = SGDClassifier(random_state=0)
model_2 = DecisionTreeClassifier(random_state=0)
model_3 = KNeighborsClassifier(n_neighbors=2)

# Instancier le modèle de méta-apprentissage
meta_model = RandomForestClassifier(random_state=0)

# Créer le modèle de stacking en utilisant les modèles individuels et le méta-modèle
stacking_model = StackingClassifier(
    estimators=[('SGD', model_1), ('Tree', model_2), ('KNN', model_3)],
    final_estimator=meta_model,
    stack_method='auto'  # Vous pouvez également utiliser 'predict_proba' ou 'decision_function'
)

# Entraîner le modèle de stacking
stacking_model.fit(X_train, y_train)

# Faire des prédictions avec le modèle de stacking
y_pred = stacking_model.predict(X_test)

# Calculer et afficher les métriques pour le modèle de stacking
print("Stacking Classifier Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_pred))


Stacking Classifier Metrics:
Accuracy: 0.9730210067887793
Precision: 0.014679976512037582
Recall: 0.014560279557367502
ROC AUC: 0.500469798449896
