<a href="https://colab.research.google.com/github/mrsaurabhtanwer/Credit-Card-Fraud-Detection-System/blob/main/Credit_Card_Fraud_Detection_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE
import joblib

In [2]:
# Step 2: Create Synthetic Fraud Dataset
np.random.seed(42)
n_samples = 10000


In [3]:

# Features
features = pd.DataFrame({
    'amount': np.random.exponential(scale=100, size=n_samples),
    'old_balance': np.random.uniform(0, 5000, size=n_samples),
    'new_balance': np.random.uniform(0, 5000, size=n_samples),
    'transaction_type': np.random.choice([0, 1], size=n_samples),  # 0 = debit, 1 = credit
})


In [4]:
# Fraud label: 1% fraud
features['is_fraud'] = np.random.choice([0, 1], size=n_samples, p=[0.99, 0.01])
features.to_csv("synthetic_creditcard.csv", index=False)

In [5]:
# Step 3: Load and Preprocess Data
data = pd.read_csv("synthetic_creditcard.csv")
X = data.drop('is_fraud', axis=1)
y = data['is_fraud']


In [6]:

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [7]:
# Handle Class Imbalance
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_scaled, y)


In [8]:
# Step 4: Train Models
# Logistic Regression
lr = LogisticRegression()
lr.fit(X_res, y_res)



In [9]:
# Random Forest
rf = RandomForestClassifier()
rf.fit(X_res, y_res)


In [10]:
# Step 5: Evaluate Models
y_pred_lr = lr.predict(X_scaled)
y_pred_rf = rf.predict(X_scaled)

print("Logistic Regression Report:\n", classification_report(y, y_pred_lr))
print("Random Forest Report:\n", classification_report(y, y_pred_rf))
print("Random Forest AUC:", roc_auc_score(y, rf.predict_proba(X_scaled)[:, 1]))


Logistic Regression Report:
               precision    recall  f1-score   support

           0       0.99      0.54      0.70      9886
           1       0.01      0.55      0.03       114

    accuracy                           0.54     10000
   macro avg       0.50      0.55      0.36     10000
weighted avg       0.98      0.54      0.69     10000

Random Forest Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      9886
           1       1.00      1.00      1.00       114

    accuracy                           1.00     10000
   macro avg       1.00      1.00      1.00     10000
weighted avg       1.00      1.00      1.00     10000

Random Forest AUC: 0.9999999999999999


In [11]:
# Step 6: Save Model
joblib.dump(rf, "fraud_model.pkl")
joblib.dump(scaler, "fraud_scaler.pkl")


['fraud_scaler.pkl']