# 🛡️ Credit Card Fraud Detection using Machine Learning

In [None]:
# 📦 1. Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

from imblearn.over_sampling import SMOTE


In [None]:
# 📂 2. Load the Dataset
df = pd.read_csv("creditcard.csv")  # Ensure the dataset is in your working directory
df.head()

In [None]:
# 📊 3. Dataset Exploration
print("Dataset shape:", df.shape)
print("Missing values:
", df.isnull().sum())
print("Class distribution:
", df['Class'].value_counts())

# Countplot for imbalance
sns.countplot(x='Class', data=df)
plt.title("Class Distribution (0 = Genuine, 1 = Fraud)")
plt.show()

In [None]:
# ⚙️ 4. Preprocessing & Normalization
scaler = StandardScaler()
df['Amount'] = scaler.fit_transform(df[['Amount']])
df.drop('Time', axis=1, inplace=True)
df.head()

In [None]:
# ✂️ 5. Train-Test Split
X = df.drop('Class', axis=1)
y = df['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# ⚖️ 6. Handling Class Imbalance using SMOTE
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X_train, y_train)

print("Before SMOTE:", y_train.value_counts())
print("After SMOTE:", y_resampled.value_counts())

In [None]:
# 🧠 7. Model Training - Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_resampled, y_resampled)
y_pred_lr = lr.predict(X_test)

print("Logistic Regression Results:")
print(classification_report(y_test, y_pred_lr))

In [None]:
# 🌲 8. Model Training - Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_resampled, y_resampled)
y_pred_rf = rf.predict(X_test)

print("Random Forest Results:")
print(classification_report(y_test, y_pred_rf))

In [None]:
# 📈 9. Confusion Matrix
cm = confusion_matrix(y_test, y_pred_rf)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["Genuine", "Fraud"], yticklabels=["Genuine", "Fraud"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - Random Forest")
plt.show()

In [None]:
# 🔍 10. ROC AUC Score
print("ROC AUC Score (Random Forest):", roc_auc_score(y_test, rf.predict_proba(X_test)[:, 1]))