# Credit Card Fraud Detection

**Import Libraries**

In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score
from imblearn.over_sampling import SMOTE

In [12]:
df = pd.read_csv('/content/sample_data/creditcard.csv')
print(df.head())
print(df.isnull().sum())
print(df.describe())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0     0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1     0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2     1 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3     1 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4     2 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

**Data Preprocessing**

In [13]:
scaler = StandardScaler()
df['Amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))
df.drop(['Time'], axis=1, inplace=True)
x = df.drop(['Class'], axis=1)
y=df['Class']

**Handle Class Imbalance**

In [17]:
print(df['Class'].isnull().sum())
df.dropna(subset=['Class'], inplace=True)
smote = SMOTE(random_state=42)
# Extract x and y again after dropping NaN values
x = df.drop(['Class'], axis=1)  # Extract features again
y = df['Class']  # Extract target variable again
x_res, y_res = smote.fit_resample(x, y)
print(y_res.value_counts())

0
Class
0.0    9926
1.0    9926
Name: count, dtype: int64


In [18]:
x_train, x_test, y_train, y_test = train_test_split(x_res, y_res, test_size=0.5, random_state=42)

**Train the Model**

In [19]:
ref = LogisticRegression()
ref.fit(x_train, y_train)

In [20]:
rf= RandomForestClassifier()
rf.fit(x_train, y_train)

**Evaluate the Model**

In [23]:
y_pred_ref = ref.predict(x_test)
print("Logistic Regression Classification Report:\n",classification_report(y_test, y_pred_ref))
print("Logistic Regression Confusion Matrix:\n",confusion_matrix(y_test, y_pred_ref))
print("Logistic Regression ROC AUC Score:", roc_auc_score(y_test, y_pred_ref))

Logistic Regression Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      5009
         1.0       1.00      1.00      1.00      4917

    accuracy                           1.00      9926
   macro avg       1.00      1.00      1.00      9926
weighted avg       1.00      1.00      1.00      9926

Logistic Regression Confusion Matrix:
 [[5004    5]
 [   0 4917]]
Logistic Regression ROC AUC Score: 0.9995008983829108


In [24]:
y_pred_rf = rf.predict(x_test)
print("Random Forest Classification Report:\n",classification_report(y_test, y_pred_rf))
print("Random Forest Confusion Matrix:\n",confusion_matrix(y_test, y_pred_rf))
print("Random Forest ROC AUC Score:", roc_auc_score(y_test, y_pred_rf))

Random Forest Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      5009
         1.0       1.00      1.00      1.00      4917

    accuracy                           1.00      9926
   macro avg       1.00      1.00      1.00      9926
weighted avg       1.00      1.00      1.00      9926

Random Forest Confusion Matrix:
 [[5009    0]
 [   0 4917]]
Random Forest ROC AUC Score: 1.0
