In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd

# Chargement des données nettoyées
df = pd.read_csv("../data/processed/cleaned_creditcard.csv")

# Séparation features / target
X = df.drop('Class', axis=1)
y = df['Class']

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Normalisation
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Modèle
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Prédictions
y_pred = model.predict(X_test_scaled)

# Évaluation
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=4))


[[56851    13]
 [   34    64]]
              precision    recall  f1-score   support

           0     0.9994    0.9998    0.9996     56864
           1     0.8312    0.6531    0.7314        98

    accuracy                         0.9992     56962
   macro avg     0.9153    0.8264    0.8655     56962
weighted avg     0.9991    0.9992    0.9991     56962



In [6]:
from joblib import dump
dump(model, '..\deployment\model\model.pkl')
dump(scaler, '..\deployment\model\scaler.pkl')

['..\\deployment\\model\\scaler.pkl']