In [None]:
# pip install catboost


Collecting catboost
  Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2.2


In [None]:
pip install --upgrade tensorflow

Collecting tensorflow
  Downloading tensorflow-2.15.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow
  Attempting uninstall: tensorflow
    Found existing installation: tensorflow 2.15.0
    Uninstalling tensorflow-2.15.0:
      Successfully uninstalled tensorflow-2.15.0
Successfully installed tensorflow-2.15.0.post1


In [None]:

from google.colab import drive
drive.mount('/content/drive')

# Import necessary libraries
import pandas as pd
import numpy as np
from scipy.io.arff import loadarff
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc, accuracy_score, confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
# from catboost import CatBoostClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM, GRU, Bidirectional, Conv1D, Dense
from sklearn.metrics import balanced_accuracy_score

#Import the data from Google Drive
df = pd.read_csv('/content/drive/My Drive/Datasets/creditcard.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Preprocess the dataset (feature scaling, handling missing values, etc.)

# Split the dataset into training and testing sets
X = df.drop("Class", axis=1)
y = df["Class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [None]:
# Initialize CatBoostClassifier for feature importance
catboost_model = CatBoostClassifier()
catboost_model.fit(X_train, y_train)

# Plot feature importance
feature_importance = catboost_model.feature_importances_
plt.barh(df.columns[:-1], feature_importance)
plt.xlabel('Feature Importance')
plt.ylabel('Features')
plt.title('CatBoost Feature Importance')
plt.show()

In [None]:

# Define a function to plot ROC curve and calculate AUC
def plot_roc_auc_multiple(models, X_test, y_test):
    plt.figure(figsize=(8, 6))

    for model_name, model in models.items():
        y_prob = model.predict(X_test.reshape(-1, X_test.shape[1], 1))
        fpr, tpr, thresholds = roc_curve(y_test, y_prob)
        roc_auc = auc(fpr, tpr)

        plt.plot(fpr, tpr, label=f'{model_name} (AUC = {roc_auc:.2f})')

    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve Comparison')
    plt.legend(loc="lower right")
    plt.show()

# Create a dictionary of trained models
models = {
    "SimpleRNN": simple_rnn_model,
    "LSTM": lstm_model,
    "GRU": gru_model,
    "BiLSTM": bi_lstm_model,
    "BiGRU": bi_gru_model,
    "CNN": cnn_model,
    "DNN": dnn_model
}

# Call the function to plot ROC curves for all models
plot_roc_auc_multiple(models, X_test, y_test)



NameError: name 'simple_rnn_model' is not defined

In [None]:
# Initialize and train different machine learning models
models = {
    "SimpleRNN": Sequential([SimpleRNN(64, input_shape=(X_train.shape[1], 1)), Dense(1, activation='sigmoid')]),
    "LSTM": Sequential([LSTM(64, input_shape=(X_train.shape[1], 1)), Dense(1, activation='sigmoid')]),
    "GRU": Sequential([GRU(64, input_shape=(X_train.shape[1], 1)), Dense(1, activation='sigmoid')]),
    "BiLSTM": Sequential([Bidirectional(LSTM(64), input_shape=(X_train.shape[1], 1)), Dense(1, activation='sigmoid')]),
    "BiGRU": Sequential([Bidirectional(GRU(64), input_shape=(X_train.shape[1], 1)), Dense(1, activation='sigmoid')]),
    "CNN": Sequential([Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)), Dense(1, activation='sigmoid')]),
    "DNN": Sequential([Dense(64, activation='relu', input_shape=(X_train.shape[1],)), Dense(1, activation='sigmoid')])
}

# Train and evaluate each model
results = {}
for model_name, model in models.items():
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X_train.reshape(-1, X_train.shape[1], 1), y_train, epochs=10, batch_size=64, verbose=0)
    y_pred_binary = (model.predict(X_test.reshape(-1, X_test.shape[1], 1)) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred_binary)
    confusion = confusion_matrix(y_test, y_pred_binary)
    sensitivity = confusion[1, 1] / (confusion[1, 0] + confusion[1, 1])
    f1 = f1_score(y_test, y_pred_binary)
    balanced_acc = balanced_accuracy_score(y_test, y_pred_binary)

    results[model_name] = {
        "Accuracy": accuracy,
        "Sensitivity": sensitivity,
        "Specificity": specificity,
        "F1 Score": f1,
        "Balanced Accuracy": balanced_acc
    }

    plot_roc_auc(model, X_test.reshape(-1, X_test.shape[1], 1), y_test, label=model_name)

# Display performance metrics
for model_name, metrics in results.items():
    print(f"Model: {model_name}")
    print(f"Accuracy: {metrics['Accuracy']:.2f}")
    print(f"Sensitivity: {metrics['Sensitivity']:.2f}")
    print(f"Specificity: {metrics['Specificity']:.2f}")
    print(f"F1 Score: {metrics['F1 Score']:.2f}")
    print(f"Balanced Accuracy: {metrics['Balanced Accuracy']:.2f}")
    print("="*30)



NameError: name 'specificity' is not defined