In [83]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM, GRU, Dropout
import mlflow
import mlflow.tensorflow

In [84]:
# Load preprocessed data
pre_credit_data = pd.read_csv('../data/preprocessed_creditcard_data.csv')
pre_fraud_data_df = pd.read_csv('../data/preprocessed_fraud_data.csv')

In [87]:
 #Define functions for model building and evaluation

def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    return model

In [88]:
def build_rnn_model(input_shape):
    model = Sequential()
    model.add(GRU(units=64, return_sequences=True, input_shape=input_shape))
    model.add(GRU(units=32))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [89]:
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=64, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(units=32))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [90]:
def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, model_name, experiment_name):
    # MLflow tracking
    mlflow.set_tracking_uri('http://localhost:5000')  # Set your MLflow tracking server
    mlflow.set_experiment(experiment_name)
    with mlflow.start_run():
        mlflow.log_param('model', model_name)

        # Compile the model
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

        # Train the model
        history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

        # Evaluate the model
        y_pred = model.predict(X_test)
        y_pred_binary = (y_pred > 0.5).astype(int)

        accuracy = accuracy_score(y_test, y_pred_binary)
        precision = precision_score(y_test, y_pred_binary)
        recall = recall_score(y_test, y_pred_binary)
        f1 = f1_score(y_test, y_pred_binary)
        roc_auc = roc_auc_score(y_test, y_pred)

        # Log metrics to MLflow
        mlflow.log_metric('accuracy', accuracy)
        mlflow.log_metric('precision', precision)
        mlflow.log_metric('recall', recall)
        mlflow.log_metric('f1_score', f1)
        mlflow.log_metric('roc_auc', roc_auc)

        # Log model to MLflow
        mlflow.tensorflow.log_model(model, artifact_path='model')

        print(f'Model: {model_name}')
        print(f'Accuracy: {accuracy:.4f}')
        print(f'Precision: {precision:.4f}')
        print(f'Recall: {recall:.4f}')
        print(f'F1 Score: {f1:.4f}')
        print(f'ROC AUC: {roc_auc:.4f}')


In [8]:
# Data Preparation

# Credit Card Data
X_credit = pre_credit_data.drop('Class', axis=1)
y_credit = pre_credit_data['Class']
X_credit_train, X_credit_test, y_credit_train, y_credit_test = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)


In [15]:
print("Credit Card Data - LSTM")
lstm_model_credit = build_lstm_model(X_credit_train.shape[1:])
train_and_evaluate_model(lstm_model_credit, X_credit_train, y_credit_train, X_credit_test, y_credit_test, 'LSTM', 'Credit Card Fraud Detection')


Credit Card Data - LSTM


  super().__init__(**kwargs)


Epoch 1/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 21ms/step - accuracy: 0.9979 - loss: 0.0136 - val_accuracy: 0.9993 - val_loss: 0.0039
Epoch 2/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 19ms/step - accuracy: 0.9994 - loss: 0.0035 - val_accuracy: 0.9994 - val_loss: 0.0037
Epoch 3/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 19ms/step - accuracy: 0.9994 - loss: 0.0033 - val_accuracy: 0.9990 - val_loss: 0.0042
Epoch 4/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 26ms/step - accuracy: 0.9993 - loss: 0.0035 - val_accuracy: 0.9994 - val_loss: 0.0038
Epoch 5/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 29ms/step - accuracy: 0.9993 - loss: 0.0033 - val_accuracy: 0.9994 - val_loss: 0.0037
Epoch 6/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 21ms/step - accuracy: 0.9993 - loss: 0.0036 - val_accuracy: 0.9992 - val_loss: 0.004



Model: LSTM
Accuracy: 0.9994
Precision: 0.9375
Recall: 0.6667
F1 Score: 0.7792
ROC AUC: 0.9561


In [14]:
print("Credit Card Data - RNN")
rnn_model_credit = build_rnn_model(X_credit_train.shape[1:])
train_and_evaluate_model(rnn_model_credit, X_credit_train, y_credit_train, X_credit_test, y_credit_test, 'RNN', 'Credit Card Fraud Detection')

Credit Card Data - RNN


  super().__init__(**kwargs)


Epoch 1/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 20ms/step - accuracy: 0.9974 - loss: 0.0161 - val_accuracy: 0.9994 - val_loss: 0.0039
Epoch 2/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 20ms/step - accuracy: 0.9994 - loss: 0.0035 - val_accuracy: 0.9994 - val_loss: 0.0037
Epoch 3/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 19ms/step - accuracy: 0.9994 - loss: 0.0033 - val_accuracy: 0.9993 - val_loss: 0.0038
Epoch 4/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 24ms/step - accuracy: 0.9994 - loss: 0.0030 - val_accuracy: 0.9995 - val_loss: 0.0037
Epoch 5/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 36ms/step - accuracy: 0.9994 - loss: 0.0034 - val_accuracy: 0.9994 - val_loss: 0.0037
Epoch 6/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 22ms/step - accuracy: 0.9995 - loss: 0.0030 - val_accuracy: 0.9994 - val_loss: 0.003



Model: RNN
Accuracy: 0.9994
Precision: 0.9508
Recall: 0.6444
F1 Score: 0.7682
ROC AUC: 0.9447


In [13]:
# Model Training and Evaluation

# Credit Card Data
print("Credit Card Data - CNN")
cnn_model_credit = build_cnn_model(X_credit_train.shape[1:])
train_and_evaluate_model(cnn_model_credit, X_credit_train, y_credit_train, X_credit_test, y_credit_test, 'CNN', 'Credit Card Fraud Detection')

Credit Card Data - CNN


2024/06/22 11:23:33 INFO mlflow.tracking.fluent: Experiment with name 'Credit Card Fraud Detection' does not exist. Creating a new experiment.


Epoch 1/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 2ms/step - accuracy: 0.9851 - loss: 13.3743 - val_accuracy: 0.9984 - val_loss: 3.7045
Epoch 2/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9980 - loss: 0.9271 - val_accuracy: 0.9981 - val_loss: 0.4358
Epoch 3/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9983 - loss: 0.4000 - val_accuracy: 0.9992 - val_loss: 0.0731
Epoch 4/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9986 - loss: 0.2576 - val_accuracy: 0.9994 - val_loss: 0.0791
Epoch 5/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9987 - loss: 0.1440 - val_accuracy: 0.9994 - val_loss: 0.0410
Epoch 6/10
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step - accuracy: 0.9991 - loss: 0.0441 - val_accuracy: 0.9993 - val_loss: 0.0166
Epoch 7/1



Model: CNN
Accuracy: 0.9994
Precision: 0.9524
Recall: 0.6667
F1 Score: 0.7843
ROC AUC: 0.9095
