In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [9]:
# Load the Credit Card Fraud Detection dataset

In [10]:
credit_card_df = pd.read_csv("creditcardfraud.csv")

In [11]:
credit_card_df

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,82450,1.314539,0.590643,-0.666593,0.716564,0.301978,-1.125467,0.388881,-0.288390,-0.132137,...,-0.170307,-0.429655,-0.141341,-0.200195,0.639491,0.399476,-0.034321,0.031692,0.76,0
1,50554,-0.798672,1.185093,0.904547,0.694584,0.219041,-0.319295,0.495236,0.139269,-0.760214,...,0.202287,0.578699,-0.092245,0.013723,-0.246466,-0.380057,-0.396030,-0.112901,4.18,0
2,55125,-0.391128,-0.245540,1.122074,-1.308725,-0.639891,0.008678,-0.701304,-0.027315,-2.628854,...,-0.133485,0.117403,-0.191748,-0.488642,-0.309774,0.008100,0.163716,0.239582,15.00,0
3,116572,-0.060302,1.065093,-0.987421,-0.029567,0.176376,-1.348539,0.775644,0.134843,-0.149734,...,0.355576,0.907570,-0.018454,-0.126269,-0.339923,-0.150285,-0.023634,0.042330,57.00,0
4,90434,1.848433,0.373364,0.269272,3.866438,0.088062,0.970447,-0.721945,0.235983,0.683491,...,0.103563,0.620954,0.197077,0.692392,-0.206530,-0.021328,-0.019823,-0.042682,0.00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,160243,-2.783865,1.596824,-2.084844,2.512986,-1.446749,-0.828496,-0.732262,-0.203329,-0.347046,...,0.203563,0.293268,0.199568,0.146868,0.163602,-0.624085,-1.333100,0.428634,156.00,1
596,110547,-1.532810,2.232752,-5.923100,3.386708,-0.153443,-1.419748,-3.878576,1.444656,-1.465542,...,0.632505,-0.070838,-0.490291,-0.359983,0.050678,1.095671,0.471741,-0.106667,0.76,1
597,70071,-0.440095,1.137239,-3.227080,3.242293,-2.033998,-1.618415,-3.028013,0.764555,-1.801937,...,0.764187,-0.275578,-0.343572,0.233085,0.606434,-0.315433,0.768291,0.459623,227.30,1
598,93879,-13.086519,7.352148,-18.256576,10.648505,-11.731476,-3.659167,-14.873658,8.810473,-5.418204,...,2.761157,-0.266162,-0.412861,0.519952,-0.743909,-0.167808,-2.498300,-0.711066,30.31,1


In [12]:
# Checking Null Values

In [13]:
credit_card_df.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [14]:
# Drop the Time column

In [15]:
credit_card_df = credit_card_df.drop(columns=["Time"])

In [16]:
# Scale the Amount column using a standard scaler

In [17]:
scaler = StandardScaler()
credit_card_df["Amount"] = scaler.fit_transform(credit_card_df[["Amount"]])

In [18]:
# Separating features (x) and target variable (y: Class)

In [19]:
x = credit_card_df.drop(columns=["Class"])

In [20]:
y = credit_card_df["Class"]

In [21]:
# Split the data into training and test sets (80:20 split ratio)

In [22]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [36]:
# Train a logistic regression model on the training set

In [23]:
logistic_model = LogisticRegression()
logistic_model.fit(x_train, y_train)

In [37]:
# Evaluate logistic regression model on the test set

In [24]:
y_pred_logistic = logistic_model.predict(x_test)

In [38]:
# Evaluate performance using confusion matrix and classification report

In [25]:
print("Logistic Regression Evaluation: Credit Card Fraud Detection")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_logistic))
print("\nClassification Report:\n", classification_report(y_test, y_pred_logistic))
print("Accuracy Score:", accuracy_score(y_test, y_pred_logistic))

Logistic Regression Evaluation: Credit Card Fraud Detection
Confusion Matrix:
 [[59  3]
 [ 4 54]]

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.95      0.94        62
           1       0.95      0.93      0.94        58

    accuracy                           0.94       120
   macro avg       0.94      0.94      0.94       120
weighted avg       0.94      0.94      0.94       120

Accuracy Score: 0.9416666666666667


In [39]:
# Train an SVM model on the training set

In [26]:
svm_model = SVC()
svm_model.fit(x_train, y_train)

In [40]:
# Evaluate SVM model on the test set

In [27]:
y_pred_svm = svm_model.predict(x_test)

In [41]:
# Evaluate performance using confusion matrix and classification report

In [28]:
print("\nSVM Evaluation: Credit Card Fraud Detection")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("\nClassification Report:\n", classification_report(y_test, y_pred_svm))
print("Accuracy Score:", accuracy_score(y_test, y_pred_svm))


SVM Evaluation: Credit Card Fraud Detection
Confusion Matrix:
 [[62  0]
 [ 6 52]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      1.00      0.95        62
           1       1.00      0.90      0.95        58

    accuracy                           0.95       120
   macro avg       0.96      0.95      0.95       120
weighted avg       0.95      0.95      0.95       120

Accuracy Score: 0.95


In [42]:
# Tune hyperparameters using grid search cross-validation
# For Logistic Regression

In [29]:
param_grid_logistic = {'C': [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search_logistic = GridSearchCV(LogisticRegression(), param_grid_logistic, cv=5, scoring='accuracy')
grid_search_logistic.fit(x_train, y_train)
best_logistic_model = grid_search_logistic.best_estimator_

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [43]:
# For SVM

In [30]:
param_grid_svm = {'C': [0.1, 1, 10], 'gamma': [0.1, 1, 10]}
grid_search_svm = GridSearchCV(SVC(), param_grid_svm, cv=5, scoring='accuracy')
grid_search_svm.fit(x_train, y_train)
best_svm_model = grid_search_svm.best_estimator_

In [44]:
# Train models with optimal hyperparameters and evaluate on the test set

In [31]:
best_logistic_model.fit(x_train, y_train)
best_svm_model.fit(x_train, y_train)

In [32]:
y_pred_logistic_best = best_logistic_model.predict(x_test)
y_pred_svm_best = best_svm_model.predict(x_test)

In [45]:
# Evaluate performance

In [33]:
print("\nBest Logistic Regression Model Evaluation: Credit Card Fraud Detection")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_logistic_best))
print("\nClassification Report:\n", classification_report(y_test, y_pred_logistic_best))
print("Accuracy Score:", accuracy_score(y_test, y_pred_logistic_best))


Best Logistic Regression Model Evaluation: Credit Card Fraud Detection
Confusion Matrix:
 [[61  1]
 [ 4 54]]

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.98      0.96        62
           1       0.98      0.93      0.96        58

    accuracy                           0.96       120
   macro avg       0.96      0.96      0.96       120
weighted avg       0.96      0.96      0.96       120

Accuracy Score: 0.9583333333333334


In [34]:
print("\nBest SVM Model Evaluation: Credit Card Fraud Detection")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm_best))
print("\nClassification Report:\n", classification_report(y_test, y_pred_svm_best))
print("Accuracy Score:", accuracy_score(y_test, y_pred_svm_best))


Best SVM Model Evaluation: Credit Card Fraud Detection
Confusion Matrix:
 [[54  8]
 [ 1 57]]

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.87      0.92        62
           1       0.88      0.98      0.93        58

    accuracy                           0.93       120
   macro avg       0.93      0.93      0.92       120
weighted avg       0.93      0.93      0.92       120

Accuracy Score: 0.925


In [46]:
# Compare the performance of the models

In [35]:
print("\nModel Comparison: Credit Card Fraud Detection")
print("Logistic Regression Model Accuracy:", accuracy_score(y_test, y_pred_logistic_best))
print("SVM Model Accuracy:", accuracy_score(y_test, y_pred_svm_best))


Model Comparison: Credit Card Fraud Detection
Logistic Regression Model Accuracy: 0.9583333333333334
SVM Model Accuracy: 0.925
