In [1]:
# Step 1: Import the required libraries
import pandas as pd
from pandas.plotting import scatter_matrix
import numpy as np
import matplotlib.pyplot as plt
import os
from imblearn.over_sampling import ADASYN 
from collections import Counter
import seaborn as sn


ModuleNotFoundError: No module named 'pandas'

In [None]:
# Step 2: Import the plot_functions module
import plot_functions as pf

In [None]:
# Step 3: Import scikit-learn packages
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import BernoulliNB 
from sklearn import metrics

In [None]:
# Step 4: Configure settings for visualizations
%matplotlib inline
sn.set_style("dark")
sn.set_palette("colorblind")

In [None]:
# Step 5: Load the credit card dataset from a CSV file
df = pd.read_csv("data/creditcard.csv")


In [None]:
# Step 6: Display the first few rows of the dataset
df.head()


In [None]:
# Step 7: Print information about the dataset
df.info()

In [None]:
# Step 8: Print the counts of normal and fraudulent transactions
print('Normal transactions count: ', df['Class'].value_counts().values[0])
print('Fraudulent transactions count: ', df['Class'].value_counts().values[1])

In [None]:
# Step 9: Create the feature matrix X and target vector y
X = df.iloc[:, :-1]
y = df['Class']

In [None]:
# Step 10: Standardize the feature data
scaler = StandardScaler()
scaled_X = scaler.fit_transform(X)

In [None]:
# Step 11: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.33, random_state=42)

In [None]:
# Step 12: Perform oversampling on the training data using ADASYN
ada = ADASYN(random_state=42)
print('Original dataset shape {}'.format(Counter(y_train)))
X_res, y_res = ada.fit_sample(X_train, y_train)
print('Resampled dataset shape {}'.format(Counter(y_res)))

In [None]:
# Step 13: Update the training data with the resampled data
X_train, y_train = X_res, y_res 

In [None]:
# Step 14: Train the LogisticRegression model
LGR_Classifier = LogisticRegression()
LGR_Classifier.fit(X_train, y_train)

In [None]:
# Step 15: Train the RandomForestClassifier model
RDF_Classifier = RandomForestClassifier(random_state=0)
RDF_Classifier.fit(X_train, y_train)


In [None]:
# Step 16: Train the BernoulliNB model
BNB_Classifier = BernoulliNB()
BNB_Classifier.fit(X_train, y_train)

In [None]:
# Step 17: Create a list of models for evaluation
modlist = [('RandomForest Classifier', RDF_Classifier),('LogisticRegression', LGR_Classifier), ('Naive Baiye Classifier', BNB_Classifier)] 
models = [j for j in modlist]

In [None]:
# Step 18: Print model evaluation results
print('\n========================== Model Evaluation Results ========================\n')
for i, v in models:
    scores = cross_val_score(v, X_train, y_train, cv=10)
    accuracy = metrics.accuracy_score(y_train, v.predict(X_train))
    confusion_matrix = metrics.confusion_matrix(y_train, v.predict(X_train))
    classification = metrics.classification_report(y_train, v.predict(X_train))
    print('===== {} ====='.format(i))
    print()
    print("Cross Validation Mean Score: ", '{}%'.format(np.round(scores.mean(), 3) * 100))
    print()
    print("Model Accuracy: ", '{}%'.format(np.round(accuracy, 3) * 100))
    print()
    print("Confusion Matrix:\n", confusion_matrix)
    print()
    print("Classification Report:\n", classification)
    print()

In [None]:
# Step 19: Test the models on the testing data and print the results
classdict = {'normal':0, 'fraudulent':1}
print('\n========================== Model Test Results ========================\n')
for i, v in models:
    accuracy = metrics.accuracy_score(y_test, v.predict(X_test))
    confusion_matrix = metrics.confusion_matrix(y_test, v.predict(X_test))
    classification = metrics.classification_report(y_test, v.predict(X_test))
    print('=== {} ==='.format(i))
    print()
    print("Model Accuracy: ", '{}%'.format(np.round(accuracy, 3) * 100))
    print()
    print("Confusion Matrix:\n", confusion_matrix)
    print()
    pf.plot_confusion_matrix(confusion_matrix, classes = list(classdict.keys()), title='Confusion Matrix Plot', cmap=plt.cm.summer)
    print()
    print("Classification Report:\n", classification)
    print()

In [None]:
# Step 20: Plot the ROC curve
print('\n============================= ROC Curve ===============================\n')
pf.plot_roc_auc(arg1=models, arg2=X_test, arg3=y_test)