In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

In [2]:
# Load the dataset
data = pd.read_csv('creditcard.csv')
print("Dataset loaded. Shape:", data.shape)

# Features (all columns except 'Class') and target ('Class': 0 = legit, 1 = fraud)
X = data.drop('Class', axis=1)
y = data['Class']

Dataset loaded. Shape: (284807, 31)


In [3]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("Model Performance:\n", classification_report(y_test, y_pred))

# Save the model
joblib.dump(model, 'fraud_model.pkl')
print("Model saved as 'fraud_model.pkl'")

Model Performance:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.94      0.82      0.87        98

    accuracy                           1.00     56962
   macro avg       0.97      0.91      0.94     56962
weighted avg       1.00      1.00      1.00     56962

Model saved as 'fraud_model.pkl'
