In [None]:
# Set up the notebook to import modules from relative paths
import os, sys

#'/home/user/example/parent/child'
current_path = os.path.abspath('.')

#'/home/user/example/parent'
parent_path = os.path.dirname(current_path)

sys.path.append(parent_path)

In [None]:
from data_pipeline import ETL_Pipeline 

dp = ETL_Pipeline('/workspace/shared-data/')
dp.process('transactions-1.csv')

In [None]:
import pandas as pd
import numpy as np
import sklearn
import seaborn as sns

from IPython.display import display, HTML

# Display Properties
from IPython.display import display, HTML
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 2)
pd.set_option('display.float_format', lambda x: '%.3f' % x)


sns.set(style="ticks", color_codes=True)

In [None]:
df = pd.read_csv('/workspace/shared-data/transformed_data.csv')
df.head()

In [None]:
from dataset import Fraud_Dataset

# Create a fraud dataset with 5 folds 
fd = Fraud_Dataset(df,'is_fraud')

In [None]:
from model import Fraud_Detector_Model
from metrics import Metrics

metrics = Metrics()

# Get the training, validation and test datasets
X_train, y_train = fd.get_training_dataset(0)
X_test, y_test = fd.get_testing_dataset(0)
X_val, y_val = fd.get_validation_dataset(0)

# List of classifiers
classifiers = ['RF', 'GB', 'AB']

# Initialize Metric Arrays 
accs, acc_bals, specificitys, sensitivitys, precs, recalls, f1s, roc_aucs, avg_precs = [], [], [], [], [], [], [], [], []

for classifier in classifiers:
    # Train and test the model
    model = Fraud_Detector_Model(classifier)
    
    if (classifier == 'AB'):
        model.train(X_train, y_train)
    else:
        model.train(X_train, y_train, X_val, y_val)
    
    # Obtain the metrics 
    acc, acc_bal, specificity, sensitivity, prec, recall, f1, roc_auc, avg_prec = model.test(X_test, y_test)
    
    # Collect the metrics 
    accs += [acc]
    acc_bals += [acc_bal]
    specificitys += [specificity]
    sensitivitys += [sensitivity]
    precs += [prec]
    recalls += [recall]
    f1s += [f1]
    roc_aucs += [roc_auc]
    avg_precs += [avg_prec]

# Generate the Report
metrics.generate_report(accs, acc_bals, specificitys, sensitivitys, precs, recalls, f1s, roc_aucs, avg_precs, classifiers, '/workspace/shared-data/hemdev-705.603Spring24/FraudDetection/results/model-results.txt')


In [None]:
f = open('/workspace/shared-data/hemdev-705.603Spring24/FraudDetection/results/model-results.txt', "w")

i = 0
for cls in classifiers:
    acc         = accs[i]
    acc_bal     = acc_bals[i]
    specificity = specificitys[i]
    sensitivity = sensitivitys[i]
    prec        = precs[i]
    recall      = recalls[i]
    f1          = f1s[i]
    roc_auc     = roc_aucs[i]
    avg_prec    = avg_precs[i]

    i = i + 1

    f.write(f"Model Results for {cls}:\n")
    f.write(f"\t\tAccuracy = {acc:.2%}")
    f.write(f"\t\tBalanced Accuracy = {acc_bal:.2%}")
    f.write(f"\t\tSpecificity = {specificity:.2%}")
    f.write(f"\t\tSensitivity = {sensitivity:.2%}")
    f.write(f"\t\tPrecision = {prec:.2%}")
    f.write(f"\t\tRecall = {recall:.2%}")
    f.write(f"\t\tF1 Score = {f1:.2%}")
    f.write(f"\t\tROC AUC Score = {roc_auc:.2%}")
    f.write(f"\t\tAverage Precision Score = {avg_prec:.2%}")

f.close() 