In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from imblearn.over_sampling import SMOTE  # For handling class imbalance

In [2]:
# Load the data
data = pd.read_csv('./creditcard.csv')  # Replace with your file path

In [3]:
# Separate features and target variable
X = data.drop(columns=['Class'])  # Features
y = data['Class']  # Target variable (0 = legitimate, 1 = fraudulent)

In [4]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [5]:
# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
# Handle class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

In [7]:
# Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train_res, y_train_res)

In [8]:
# Make predictions
y_pred = model.predict(X_test)

In [9]:
# Evaluate the model
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

In [10]:
# Print results
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", conf_matrix)

Precision: 0.8923076923076924
Recall: 0.7837837837837838
F1 Score: 0.8345323741007195
Confusion Matrix:
 [[85281    14]
 [   32   116]]
