# SVM - MLPClassifier - Gaussian Naive Bayes  - Randon Forest on NewFeatures

# Train Test Split

## Stratified Cross validation (k = 5)

P1 Train and P2 Test, P1 Train and P3 Test, P1 Train and P4 Test, P1 Train and P5 Test
P2 Train and P3 Test, P2 Train and P4 Test, P2 Train and P5 Test
P3 Train and P4 Test, P3 Train and P5 Test
P4 Train and P5 Test

# Evaluation Metrics

In [1]:
# float("{:.2f}".format(13.949999999999999))

def TSS(TP,TN,FP,FN):
    TSS_value = (TP / (TP + FN)) - (FP / (FP + TN))
    return TSS_value

def HSS1(TP,TN,FP,FN):
    HSS1_value = (2 * (TP * TN - FP * FN)) / ((TP + FN) * (FN + TN) + (TP + FP) * (FP + TN))
    return HSS1_value
    
def HSS2(TP,TN,FP,FN):
    HSS2_value = (2 * (TP * TN - FP * FN)) / ((TP + FP) * (FN + TN) + (TP + FN) * (FP + TN))
    return HSS2_value

def GSS(TP,TN,FP,FN):
    GSS_value = (TP - (TP + FP) * (TP + FN) / (TP + FP + FN + TN))
    return GSS_value

def Recall(TP,TN,FP,FN):
    Recall_value = (TP) / (TP + FN)
    return Recall_value

def FPR(TP,TN,FP,FN):
    fpr_value = (FP) / (FP + TN)
    return fpr_value

def Accuracy(TP,TN,FP,FN):
    accuracy_value = (TP + TN) / (TP + TN + FP + FN)
    return accuracy_value

def Precision(TP,TN,FP,FN):
    precision_value = (FP) / (TP + FP)
    return precision_value

# Loading the Final Datasets

## New Features 

### LSBZM

In [2]:
import pickle
import numpy as np

data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/5_2_FinalData_NewFeatures_LSBZM_KnnImputation/"
X_train_NewF_LSBZM = []
Y_train_NewF_LSBZM = []


num_partitions = 5

for i in range(0,num_partitions):
    with open(data_dir + "Partition" + str(i+1) + "_NewFeatures_LSBZM_KnnImputation" + ".pkl", 'rb') as f:
        X_train_NewF_LSBZM.append(pickle.load(f))
    print("P"+str(i+1)+" Nan-Value: "+ str(np.isnan(X_train_NewF_LSBZM[i]).any() or np.isinf(X_train_NewF_LSBZM[i]).any()))
    
    with open(data_dir + "Partition" + str(i+1) + "_Labels_NewFeatures_LSBZM_KnnImputation" + ".pkl", 'rb') as f:
        Y_train_NewF_LSBZM.append(pickle.load(f))

P1 Nan-Value: False
P2 Nan-Value: False
P3 Nan-Value: False
P4 Nan-Value: False
P5 Nan-Value: False


# Useful Functions

In [3]:
def kfold_training(name, X_train, Y_train, training_func, num, rocket_kernels= 1500, tsf_estimator=25):
    kfold = np.array([[1,2],[1,3],[1,4],[1,5],[2,3],[2,4],[2,5],[3,4],[3,5],[4,5]])
    metrics = []
    metrics_values = np.array([])
    
    for i in range(0, num):
        train_index = kfold[i,0]
        test_index = kfold[i,1]
        metrics_values = training_func(X_train[train_index-1], Y_train[train_index-1], X_train[test_index-1], Y_train[test_index-1])
        while (metrics_values[0] == 0):
            metrics_values = training_func(X_train[train_index-1], Y_train[train_index-1], X_train[test_index-1], Y_train[test_index-1])
        
        metrics.append(np.append(np.append(train_index, test_index), metrics_values))
    return metrics

# Training

## SVM

In [4]:
# Import necessary libraries
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

def svm_model(X_train, Y_train, X_test, Y_test):
    
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/models/"


    # Create an SVM classifier (you can choose different kernels like 'linear', 'rbf', etc.)
    svm_classifier = SVC(kernel='rbf', C=1.0)
    svm_classifier.fit(X_train, Y_train)
    y_pred = svm_classifier.predict(X_test)
    
    
    print(str(X_train.shape)+': SVM Classifier is Done! \n')
    

    confusion = confusion_matrix(Y_test, y_pred)
    tn, fp, fn, tp = confusion.ravel()

    tss = TSS(tp,tn,fp,fn)
    hss1 = HSS1(tp,tn,fp,fn)
    hss2 = HSS2(tp,tn,fp,fn)
    gss = GSS(tp,tn,fp,fn)
    recall = Recall(tp,tn,fp,fn)
    precision = Precision(tp,tn,fp,fn)
    
    output_values = np.array([tp, fn, fp, tn, tss, hss1, hss2, gss, recall, precision])


    #joblib.dump(classifier, data_dir + "svm_model.pkl")

    #loaded_svm_model = joblib.load(data_dir + "svm_model.pkl")
    
    return output_values

## MPLClassifier 

In [5]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import confusion_matrix

def mlp_model(X_train, Y_train, X_test, Y_test):
    
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/models/"

    # Define the MLP model
    # Define the MLP model with four hidden layers
    model = keras.Sequential([
        layers.Input(shape=(216,)),  # Input layer with 216 features
        layers.Dense(64, activation='relu'),  # Hidden layer with 64 units and ReLU activation
        layers.Dense(32, activation='relu'),  # Hidden layer with 32 units and ReLU activation
        layers.Dense(16, activation='relu'),  # Hidden layer with 16 units and ReLU activation
        layers.Dense(8, activation='relu'),  # Hidden layer with 16 units and ReLU activation
        layers.Dense(1, activation='sigmoid')  # Output layer with 1 unit and sigmoid activation (binary classification)
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=keras.metrics.Recall(name='recall'))

    # Train the model
    model.fit(X_train, Y_train, epochs=15, batch_size=32, verbose=0)  # Adjust epochs and batch_size as needed

    y_pred = model.predict(X_test)
    threshold = 0.35  # Adjust the threshold as needed
    y_pred_binary = (y_pred > threshold).astype(int)
    
    print(str(X_train.shape)+': MLP Classifier is Done! \n')


    confusion = confusion_matrix(Y_test, y_pred_binary)
    tn, fp, fn, tp = confusion.ravel()

    tss = TSS(tp,tn,fp,fn)
    hss1 = HSS1(tp,tn,fp,fn)
    hss2 = HSS2(tp,tn,fp,fn)
    gss = GSS(tp,tn,fp,fn)
    recall = Recall(tp,tn,fp,fn)
    precision = Precision(tp,tn,fp,fn)
    
    output_values = np.array([tp, fn, fp, tn, tss, hss1, hss2, gss, recall, precision])


    #joblib.dump(classifier, data_dir + "mlp_model.pkl")

    #loaded_mlp_model = joblib.load(data_dir + "mlp_model.pkl")
    
    return output_values

# Complement Naive Bayes 

In [6]:
import numpy as np
from sklearn.naive_bayes import ComplementNB
from sklearn.metrics import confusion_matrix

def naive_bayes_model(X_train, Y_train, X_test, Y_test):
    
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/models/"

    # Create a Gaussian Naive Bayes classifier
    nb_classifier = ComplementNB(force_alpha=True)
    nb_classifier.fit(X_train, Y_train)
    y_pred = nb_classifier.predict(X_test)
    
    print(str(X_train.shape) + ': Naive Bayes Classifier is Done! \n')

    confusion = confusion_matrix(Y_test, y_pred)
    tn, fp, fn, tp = confusion.ravel()

    tss = TSS(tp,tn,fp,fn)
    hss1 = HSS1(tp,tn,fp,fn)
    hss2 = HSS2(tp,tn,fp,fn)
    gss = GSS(tp,tn,fp,fn)
    recall = Recall(tp,tn,fp,fn)
    precision = Precision(tp,tn,fp,fn)
    
    output_values = np.array([tp, fn, fp, tn, tss, hss1, hss2, gss, recall, precision])

    #joblib.dump(nb_classifier, data_dir + "naive_bayes_model.pkl")

    #loaded_nb_model = joblib.load(data_dir + "naive_bayes_model.pkl")

    return output_values


# Random Forest

In [7]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

def random_forest_model(X_train, Y_train, X_test, Y_test):
    
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/models/"

    # Create a Random Forest classifier
    # You can adjust 'n_estimators' and other parameters as needed
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train, Y_train)
    y_pred = rf_classifier.predict(X_test)
    
    print(str(X_train.shape) + ': Random Forest Classifier is Done! \n')

    confusion = confusion_matrix(Y_test, y_pred)
    tn, fp, fn, tp = confusion.ravel()

    tss = TSS(tp, tn, fp, fn)
    hss1 = HSS1(tp, tn, fp, fn)
    hss2 = HSS2(tp, tn, fp, fn)
    gss = GSS(tp, tn, fp, fn)
    recall = Recall(tp, tn, fp, fn)
    precision = Precision(tp, tn, fp, fn)
    
    output_values = np.array([tp, fn, fp, tn, tss, hss1, hss2, gss, recall, precision])

    #joblib.dump(rf_classifier, data_dir + "random_forest_model.pkl")

    #loaded_rf_model = joblib.load(data_dir + "random_forest_model.pkl")

    return output_values


# Results

In [8]:
def save_results(reslut, name):
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/results/"

    with open(data_dir + name + ".pkl", 'wb') as f:
        pickle.dump(reslut, f)

## SVM

In [9]:
# SVM NewFeatures
svm_newf = kfold_training('SVM', X_train_NewF_LSBZM, Y_train_NewF_LSBZM, svm_model, 10)

(73492, 216): SVM Classifier is Done! 

(73492, 216): SVM Classifier is Done! 

(73492, 216): SVM Classifier is Done! 

(73492, 216): SVM Classifier is Done! 

(88557, 216): SVM Classifier is Done! 

(88557, 216): SVM Classifier is Done! 

(88557, 216): SVM Classifier is Done! 

(42510, 216): SVM Classifier is Done! 

(42510, 216): SVM Classifier is Done! 

(51261, 216): SVM Classifier is Done! 



In [10]:
save_results(svm_newf, "SVM_NewFeatures_Results")

## MLPClassifier

In [11]:
# MLPClassifier NewFeatures
mlp_newf = kfold_training('MLP', X_train_NewF_LSBZM, Y_train_NewF_LSBZM, mlp_model, 10)

(73492, 216): MLP Classifier is Done! 

(73492, 216): MLP Classifier is Done! 

(73492, 216): MLP Classifier is Done! 

(73492, 216): MLP Classifier is Done! 

(88557, 216): MLP Classifier is Done! 

(88557, 216): MLP Classifier is Done! 

(88557, 216): MLP Classifier is Done! 

(88557, 216): MLP Classifier is Done! 

(42510, 216): MLP Classifier is Done! 

(42510, 216): MLP Classifier is Done! 

(51261, 216): MLP Classifier is Done! 



In [12]:
save_results(mlp_newf, "MLPClassifier_NewFeatures_Results")

### Naive Bayes

In [15]:
naive_newf = kfold_training('NaiveBayes', X_train_NewF_LSBZM, Y_train_NewF_LSBZM, naive_bayes_model, 10)

(73492, 216): Naive Bayes Classifier is Done! 

(73492, 216): Naive Bayes Classifier is Done! 

(73492, 216): Naive Bayes Classifier is Done! 

(73492, 216): Naive Bayes Classifier is Done! 

(88557, 216): Naive Bayes Classifier is Done! 

(88557, 216): Naive Bayes Classifier is Done! 

(88557, 216): Naive Bayes Classifier is Done! 

(42510, 216): Naive Bayes Classifier is Done! 

(42510, 216): Naive Bayes Classifier is Done! 

(51261, 216): Naive Bayes Classifier is Done! 



In [16]:
save_results(naive_newf, "NaiveBayes_NewFeatures_Results")

### Random Forest

In [13]:
forest_newf = kfold_training('RandomForest', X_train_NewF_LSBZM, Y_train_NewF_LSBZM, random_forest_model, 10)

(73492, 216): Random Forest Classifier is Done! 

(73492, 216): Random Forest Classifier is Done! 

(73492, 216): Random Forest Classifier is Done! 

(73492, 216): Random Forest Classifier is Done! 

(88557, 216): Random Forest Classifier is Done! 

(88557, 216): Random Forest Classifier is Done! 

(88557, 216): Random Forest Classifier is Done! 

(42510, 216): Random Forest Classifier is Done! 

(42510, 216): Random Forest Classifier is Done! 

(51261, 216): Random Forest Classifier is Done! 



In [14]:
save_results(forest_newf, "RandomForest_NewFeatures_Results")

# Comparison 

In [17]:
data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/results/"
with open(data_dir + 'SVM_NewFeatures_Results' + ".pkl", 'rb') as f:
    svm_newf=pickle.load(f)
with open(data_dir + 'MLPClassifier_NewFeatures_Results' + ".pkl", 'rb') as f:
    mlp_newf=pickle.load(f)
with open(data_dir + 'NaiveBayes_NewFeatures_Results' + ".pkl", 'rb') as f:
    naive_newf=pickle.load(f)
with open(data_dir + 'RandomForest_NewFeatures_Results' + ".pkl", 'rb') as f:
    forest_newf=pickle.load(f)



names = ['SVM', 'MLP', 'NaiveBayes', 'RandomForest']
values = np.array([svm_newf, mlp_newf, naive_newf, forest_newf])

In [18]:
def compare_results(names, values):
    np.printoptions(precision=4, suppress=True)
    for i in range(0, values.shape[1]):
        print("P_Train = "+ str(values[0,i,0]) + " & " + "P_Test = " + str(values[0,i,1]))
        for j in range(0, values.shape[0]):
            print(names[j] + ' :' +  ' TP={:.0f}'.format(values[j,i,2]) + ' FN={:.0f}'.format(values[j,i,3]) + ' FP={:.0f}'.format(values[j,i,4])
                 + ' TN={:.0f}'.format(values[j,i,5]) + ' TSS={:.3f}'.format(values[j,i,6]) + ' HSS1={:.3f}'.format(values[j,i,7]) + ' HSS2={:.3f}'.format(values[j,i,8])
                 + ' GSS={:.3f}'.format(values[j,i,9]) + ' Recall={:.3f}'.format(values[j,i,10]) + ' Precision={:.3f}'.format(values[j,i,11]))
        print('\n')


In [19]:
compare_results(names, values)

P_Train = 1.0 & P_Test = 2.0
SVM : TP=100 FN=1301 FP=79 TN=87077 TSS=0.070 HSS1=0.123 HSS2=0.125 GSS=97.168 Recall=0.071 Precision=0.441
MLP : TP=573 FN=828 FP=719 TN=86437 TSS=0.401 HSS1=0.417 HSS2=0.417 GSS=552.560 Recall=0.409 Precision=0.557
NaiveBayes : TP=1334 FN=67 FP=18509 TN=68647 TSS=0.740 HSS1=0.099 HSS2=0.122 GSS=1020.077 Recall=0.952 Precision=0.933
RandomForest : TP=224 FN=1177 FP=199 TN=86957 TSS=0.158 HSS1=0.240 HSS2=0.241 GSS=217.308 Recall=0.160 Precision=0.470


P_Train = 1.0 & P_Test = 3.0
SVM : TP=80 FN=1344 FP=17 TN=41069 TSS=0.056 HSS1=0.101 HSS2=0.104 GSS=76.751 Recall=0.056 Precision=0.175
MLP : TP=583 FN=841 FP=671 TN=40415 TSS=0.393 HSS1=0.417 HSS2=0.417 GSS=540.994 Recall=0.409 Precision=0.535
NaiveBayes : TP=1345 FN=79 FP=7685 TN=33401 TSS=0.757 HSS1=0.212 HSS2=0.246 GSS=1042.513 Recall=0.945 Precision=0.851
RandomForest : TP=73 FN=1351 FP=112 TN=40974 TSS=0.049 HSS1=0.084 HSS2=0.086 GSS=66.803 Recall=0.051 Precision=0.605


P_Train = 1.0 & P_Test = 4.0
SVM