importing libraries and data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data = pd.read_csv("creditcardfraud.csv")
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,82450,1.314539,0.590643,-0.666593,0.716564,0.301978,-1.125467,0.388881,-0.28839,-0.132137,...,-0.170307,-0.429655,-0.141341,-0.200195,0.639491,0.399476,-0.034321,0.031692,0.76,0
1,50554,-0.798672,1.185093,0.904547,0.694584,0.219041,-0.319295,0.495236,0.139269,-0.760214,...,0.202287,0.578699,-0.092245,0.013723,-0.246466,-0.380057,-0.39603,-0.112901,4.18,0
2,55125,-0.391128,-0.24554,1.122074,-1.308725,-0.639891,0.008678,-0.701304,-0.027315,-2.628854,...,-0.133485,0.117403,-0.191748,-0.488642,-0.309774,0.0081,0.163716,0.239582,15.0,0
3,116572,-0.060302,1.065093,-0.987421,-0.029567,0.176376,-1.348539,0.775644,0.134843,-0.149734,...,0.355576,0.90757,-0.018454,-0.126269,-0.339923,-0.150285,-0.023634,0.04233,57.0,0
4,90434,1.848433,0.373364,0.269272,3.866438,0.088062,0.970447,-0.721945,0.235983,0.683491,...,0.103563,0.620954,0.197077,0.692392,-0.20653,-0.021328,-0.019823,-0.042682,0.0,0


dropping Time columns from the data

In [3]:
data.drop('Time', axis=1, inplace=True)

Using StandardScaler to scale the "Amount" column

In [4]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data[['Amount']] = scaler.fit_transform(data[['Amount']])

Dividing the dataset into train and test

In [5]:
from sklearn.model_selection import train_test_split
y = data['Class']
X = data.drop('Class', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Logistic regression using default hyperparamenters

In [6]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train, y_train)

In [7]:
pred = lr.predict(X_test)

Confusion matrix and classification report

In [8]:
from sklearn.metrics import confusion_matrix, classification_report
conf_matrix1 = confusion_matrix(y_test, pred)
print("Confusion Matrix:")
print(conf_matrix1)

class_report1 = classification_report(y_test, pred)
print("\nClassification Report:")
print(class_report1)

Confusion Matrix:
[[59  3]
 [ 4 54]]

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.95      0.94        62
           1       0.95      0.93      0.94        58

    accuracy                           0.94       120
   macro avg       0.94      0.94      0.94       120
weighted avg       0.94      0.94      0.94       120



SVM using default hyperparamenters

In [9]:
from sklearn.svm import SVC

svm_model = SVC()
svm_model.fit(X_train, y_train)

In [10]:
y_pred_svm = svm_model.predict(X_test)

Confusion matrix and classification report

In [11]:
conf_matrix_svm1 = confusion_matrix(y_test, y_pred_svm)
print("SVM Confusion Matrix:")
print(conf_matrix_svm1)

class_report_svm1 = classification_report(y_test, y_pred_svm)
print("\nSVM Classification Report:")
print(class_report_svm1)

SVM Confusion Matrix:
[[62  0]
 [ 6 52]]

SVM Classification Report:
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        62
           1       1.00      0.90      0.95        58

    accuracy                           0.95       120
   macro avg       0.96      0.95      0.95       120
weighted avg       0.95      0.95      0.95       120



Grid Search CV for both models

In [12]:
lr_params = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'max_iter': [1000, 2000, 3000, 4000, 5000],
}

svm_params = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

In [13]:
from sklearn.model_selection import GridSearchCV

In [14]:
logistic_regression_grid = GridSearchCV(
    estimator=LogisticRegression(),
    param_grid=lr_params,
    scoring='accuracy',
    cv=15
)

svm_grid = GridSearchCV(
    estimator=SVC(),
    param_grid=svm_params,
    scoring='accuracy',
    cv=15
)

logistic_regression_grid.fit(X_train, y_train)
svm_grid.fit(X_train, y_train)

best_logistic_regression = logistic_regression_grid.best_estimator_
best_svm = svm_grid.best_estimator_

In [15]:
print("Best Logistic Regression Hyperparameters:")
print(logistic_regression_grid.best_params_)
print("Best Logistic Regression Accuracy Score:", logistic_regression_grid.best_score_)

print("\nBest SVM Hyperparameters:")
print(svm_grid.best_params_)
print("Best SVM Accuracy Score:", svm_grid.best_score_)

Best Logistic Regression Hyperparameters:
{'C': 0.1, 'max_iter': 1000}
Best Logistic Regression Accuracy Score: 0.9458333333333333

Best SVM Hyperparameters:
{'C': 10, 'kernel': 'rbf'}
Best SVM Accuracy Score: 0.94375


In [16]:
best_logistic_regression.fit(X_train, y_train)
y_pred = best_logistic_regression.predict(X_test)

In [17]:
conf_matrix2 = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix2)

class_report2 = classification_report(y_test, y_pred)
print("\nClassification Report:")
print(class_report2)

Confusion Matrix:
[[61  1]
 [ 4 54]]

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.98      0.96        62
           1       0.98      0.93      0.96        58

    accuracy                           0.96       120
   macro avg       0.96      0.96      0.96       120
weighted avg       0.96      0.96      0.96       120



In [18]:
best_svm.fit(X_train, y_train)
y_pred_svm = best_svm.predict(X_test)

In [19]:
conf_matrix_svm2 = confusion_matrix(y_test, y_pred_svm)
print("SVM Confusion Matrix:")
print(conf_matrix_svm2)

class_report_svm2 = classification_report(y_test, y_pred_svm)
print("\nSVM Classification Report:")
print(class_report_svm2)

SVM Confusion Matrix:
[[61  1]
 [ 4 54]]

SVM Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.98      0.96        62
           1       0.98      0.93      0.96        58

    accuracy                           0.96       120
   macro avg       0.96      0.96      0.96       120
weighted avg       0.96      0.96      0.96       120



General report of overall performance

In [20]:
print("SVM Confusion Matrix before hyperparameter tuning :")
print(conf_matrix_svm1)

print("SVM Confusion Matrix after hyperparameter tuning :")
print(conf_matrix_svm2)

SVM Confusion Matrix before hyperparameter tuning :
[[62  0]
 [ 6 52]]
SVM Confusion Matrix after hyperparameter tuning :
[[61  1]
 [ 4 54]]


In [21]:
print("Confusion Matrix before hyperparameter tuning :")
print(conf_matrix1)

print("Confusion Matrix after hyperparameter tuning:")
print(conf_matrix2)

Confusion Matrix before hyperparameter tuning :
[[59  3]
 [ 4 54]]
Confusion Matrix after hyperparameter tuning:
[[61  1]
 [ 4 54]]
