In [1]:
#import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# load the data
df = pd.read_csv('/Users/saimasultana/Downloads/creditcard_2023.csv')

In [3]:
# feature selection
X = df.drop(columns=['id', 'Class'],axis=1)
y = df['Class'] 

In [4]:
# scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [6]:
# logistic regression model
logistic_model = LogisticRegression(random_state=42)

In [7]:
# train the model
logistic_model.fit(X_train, y_train)

In [8]:
# predict on the test set
logistic_pred = logistic_model.predict(X_test)

In [9]:
# random forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)


In [12]:
# train the model
rf_model.fit(X_train, y_train)

In [13]:
# predict on the test set
rf_pred = rf_model.predict(X_test)

In [14]:
# evaluate logistic regression
logistic_accuracy = accuracy_score(y_test, logistic_pred)
logistic_precision = precision_score(y_test, logistic_pred)
logistic_recall = recall_score(y_test, logistic_pred)
logistic_f1 = f1_score(y_test, logistic_pred)


In [15]:
# evaluate random forest
rf_accuracy = accuracy_score(y_test, rf_pred)
rf_precision = precision_score(y_test, rf_pred)
rf_recall = recall_score(y_test, rf_pred)
rf_f1 = f1_score(y_test, rf_pred)


In [16]:
# print evaluation metrics
print("Logistic Regression:")
print(f'Accuracy: {logistic_accuracy}')
print(f'Precision: {logistic_precision}')
print(f'Recall: {logistic_recall}')
print(f'F1 Score: {logistic_f1}')

print("\nRandom Forest:")
print(f'Accuracy: {rf_accuracy}')
print(f'Precision: {rf_precision}')
print(f'Recall: {rf_recall}')
print(f'F1 Score: {rf_f1}')

Logistic Regression:
Accuracy: 0.965214638692999
Precision: 0.9773652177044694
Recall: 0.9526291771974165
F1 Score: 0.9648386810061328

Random Forest:
Accuracy: 0.9998856901675958
Precision: 0.9997718858025233
Recall: 1.0
F1 Score: 0.999885929890756


In [17]:
# cross-validation
logistic_cv_scores = cross_val_score(logistic_model, X_scaled, y, cv=10, scoring='accuracy')
rf_cv_scores = cross_val_score(rf_model, X_scaled, y, cv=10, scoring='accuracy')

print(f'\nLogistic Regression Cross-Validation Accuracy: {logistic_cv_scores.mean()}')
print(f'Random Forest Cross-Validation Accuracy: {rf_cv_scores.mean()}')


Logistic Regression Cross-Validation Accuracy: 0.9631887167402352
Random Forest Cross-Validation Accuracy: 0.9998100698169285
