In [None]:

# CREDIT CARD FRAUD DETECTION NOTEBOOK

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score, f1_score

df = pd.read_csv("creditcard.csv")
print("Shape:", df.shape)
df.head()


In [None]:

plt.figure(figsize=(5,4))
sns.countplot(data=df, x='Class')
plt.title("Class Distribution (0 = Normal, 1 = Fraud)")
plt.show()

fraud_ratio = df['Class'].value_counts(normalize=True) * 100
print("Class Distribution (%):\n", fraud_ratio)


In [None]:

X = df.drop("Class", axis=1)
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training Set Fraud Ratio:", y_train.mean())
print("Testing Set Fraud Ratio:", y_test.mean())


In [None]:

dt = DecisionTreeClassifier(class_weight="balanced", max_depth=6, random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)
dt_prob = dt.predict_proba(X_test)[:, 1]

print("Decision Tree Metrics")
print("F1-score:", f1_score(y_test, dt_pred))
print("ROC-AUC:", roc_auc_score(y_test, dt_prob))
print(classification_report(y_test, dt_pred))


In [None]:

rf = RandomForestClassifier(
    n_estimators=200, class_weight="balanced", random_state=42
)

rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
rf_prob = rf.predict_proba(X_test)[:, 1]

print("Random Forest Metrics")
print("F1-score:", f1_score(y_test, rf_pred))
print("ROC-AUC:", roc_auc_score(y_test, rf_prob))
print(classification_report(y_test, rf_pred))


In [None]:

cm = confusion_matrix(y_test, rf_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix - Random Forest")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:

fpr, tpr, thresholds = roc_curve(y_test, rf_prob)

plt.figure(figsize=(6,4))
plt.plot(fpr, tpr, label=f"AUC = {roc_auc_score(y_test, rf_prob):.4f}")
plt.plot([0,1], [0,1], 'r--')
plt.title("ROC Curve - Random Forest")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.show()
