# SVM - MLPClassifier - Rocket - TSF - LSTM on NewFeatures (After Resampling)

# Train Test Split

## Stratified Cross validation (k = 5)

P1 Train and P2 Test, P1 Train and P3 Test, P1 Train and P4 Test, P1 Train and P5 Test
P2 Train and P3 Test, P2 Train and P4 Test, P2 Train and P5 Test
P3 Train and P4 Test, P3 Train and P5 Test
P4 Train and P5 Test

# Evaluation Metrics

In [20]:
# float("{:.2f}".format(13.949999999999999))

def TSS(TP,TN,FP,FN):
    TSS_value = (TP / (TP + FN)) - (FP / (FP + TN))
    return TSS_value

def HSS1(TP,TN,FP,FN):
    HSS1_value = (2 * (TP * TN - FP * FN)) / ((TP + FN) * (FN + TN) + (TP + FP) * (FP + TN))
    return HSS1_value
    
def HSS2(TP,TN,FP,FN):
    HSS2_value = (2 * (TP * TN - FP * FN)) / ((TP + FP) * (FN + TN) + (TP + FN) * (FP + TN))
    return HSS2_value

def GSS(TP,TN,FP,FN):
    GSS_value = (TP - (TP + FP) * (TP + FN) / (TP + FP + FN + TN))
    return GSS_value

def Recall(TP,TN,FP,FN):
    Recall_value = (TP) / (TP + FN)
    return Recall_value

def FPR(TP,TN,FP,FN):
    fpr_value = (FP) / (FP + TN)
    return fpr_value

def Accuracy(TP,TN,FP,FN):
    accuracy_value = (TP + TN) / (TP + TN + FP + FN)
    return accuracy_value

def Precision(TP,TN,FP,FN):
    precision_value = (FP) / (TP + FP)
    return precision_value

# Loading the Final Datasets

## New Features 

### ZM Norm

In [21]:
import pickle
import numpy as np

data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/8_1_FinalData_OUSampling_NewFeatures_ZM_KnnImputation/"
X_train_NewF_ZM = []
Y_train_NewF_ZM = []


num_partitions = 5

for i in range(0,num_partitions):
    with open(data_dir + "RUS_Tomek_Adasyn_" +"Partition" + str(i+1) + "_OUSampling_NewFeatures_ZM_KnnImputation" +".pkl", 'rb') as f:
        X_train_NewF_ZM.append(pickle.load(f))
    with open(data_dir + "RUS_Tomek_Adasyn_" + "Partition" + str(i+1) + "_Labels_OUSampling_NewFeatures_ZM_KnnImputation" +".pkl", 'rb') as f:
        Y_train_NewF_ZM.append(pickle.load(f))
    print("P"+str(i+1)+" Nan-Value: "+ str(np.isnan(X_train_NewF_ZM[i]).any() or np.isinf(X_train_NewF_ZM[i]).any()))

P1 Nan-Value: False
P2 Nan-Value: False
P3 Nan-Value: False
P4 Nan-Value: False
P5 Nan-Value: False


In [22]:
import pickle
import numpy as np

data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/5_1_FinalData_NewFeatures_ZM_KnnImputation/"
X_test_NewF_ZM = []
Y_test_NewF_ZM = []


num_partitions = 5

for i in range(0,num_partitions):
    with open(data_dir + "Partition" + str(i+1) + "_NewFeatures_ZM_KnnImputation" + ".pkl", 'rb') as f:
        X_test_NewF_ZM.append(pickle.load(f))
    print("P"+str(i+1)+" Nan-Value: "+ str(np.isnan(X_test_NewF_ZM[i]).any() or np.isinf(X_test_NewF_ZM[i]).any()))
    
    with open(data_dir + "Partition" + str(i+1) + "_Labels_NewFeatures_ZM_KnnImputation" + ".pkl", 'rb') as f:
        Y_test_NewF_ZM.append(pickle.load(f))

P1 Nan-Value: False
P2 Nan-Value: False
P3 Nan-Value: False
P4 Nan-Value: False
P5 Nan-Value: False


### LSBZM

# Useful Functions

In [23]:
def kfold_training(name, X_train, Y_train, X_test,Y_test, training_func, num):
    kfold = np.array([[1,2],[1,3],[1,4],[1,5],[2,3],[2,4],[2,5],[3,4],[3,5],[4,5]])
    metrics = []
    metrics_values = np.array([])
    
    for i in range(0, num):
        train_index = kfold[i,0]
        test_index = kfold[i,1]
        metrics_values = training_func(X_train[train_index-1], Y_train[train_index-1], X_test[test_index-1], Y_test[test_index-1])

        metrics.append(np.append(np.append(train_index, test_index), metrics_values))
    return metrics

# Training

## SVM

In [24]:
# Import necessary libraries
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

def svm_model(X_train, Y_train, X_test, Y_test):
    
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/models/"


    # Create an SVM classifier (you can choose different kernels like 'linear', 'rbf', etc.)
    svm_classifier = SVC(kernel='linear', C=1.0)
    svm_classifier.fit(X_train, Y_train)
    y_pred = svm_classifier.predict(X_test)
    
    
    print(str(X_train.shape)+': SVM Classifier is Done! \n')
    

    confusion = confusion_matrix(Y_test, y_pred)
    tn, fp, fn, tp = confusion.ravel()

    tss = TSS(tp,tn,fp,fn)
    hss1 = HSS1(tp,tn,fp,fn)
    hss2 = HSS2(tp,tn,fp,fn)
    gss = GSS(tp,tn,fp,fn)
    recall = Recall(tp,tn,fp,fn)
    precision = Precision(tp,tn,fp,fn)
    
    output_values = np.array([tp, fn, fp, tn, tss, hss1, hss2, gss, recall, precision])


    #joblib.dump(classifier, data_dir + "svm_model.pkl")

    #loaded_svm_model = joblib.load(data_dir + "svm_model.pkl")
    
    return output_values

## MPLClassifier 

In [25]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import confusion_matrix

def mlp_model(X_train, Y_train, X_test, Y_test):
    
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/models/"

    # Define the MLP model
    # Define the MLP model with four hidden layers
    model = keras.Sequential([
        layers.Input(shape=(216,)),  # Input layer with 216 features
        layers.Dense(64, activation='relu'),  # Hidden layer with 64 units and ReLU activation
        layers.Dense(32, activation='relu'),  # Hidden layer with 32 units and ReLU activation
        layers.Dense(16, activation='relu'),  # Hidden layer with 16 units and ReLU activation
        layers.Dense(8, activation='relu'),  # Hidden layer with 16 units and ReLU activation
        layers.Dense(1, activation='sigmoid')  # Output layer with 1 unit and sigmoid activation (binary classification)
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=keras.metrics.Recall(name='recall'))

    # Train the model
    model.fit(X_train, Y_train, epochs=15, batch_size=32, verbose=0)  # Adjust epochs and batch_size as needed

    y_pred = model.predict(X_test)
    threshold = 0.35  # Adjust the threshold as needed
    y_pred_binary = (y_pred > threshold).astype(int)
    
    print(str(X_train.shape)+': MLP Classifier is Done! \n')


    confusion = confusion_matrix(Y_test, y_pred_binary)
    tn, fp, fn, tp = confusion.ravel()

    tss = TSS(tp,tn,fp,fn)
    hss1 = HSS1(tp,tn,fp,fn)
    hss2 = HSS2(tp,tn,fp,fn)
    gss = GSS(tp,tn,fp,fn)
    recall = Recall(tp,tn,fp,fn)
    precision = Precision(tp,tn,fp,fn)
    
    output_values = np.array([tp, fn, fp, tn, tss, hss1, hss2, gss, recall, precision])


    #joblib.dump(classifier, data_dir + "mlp_model.pkl")

    #loaded_mlp_model = joblib.load(data_dir + "mlp_model.pkl")
    
    return output_values

## Rocket (RidgeClassifierCV)  

In [26]:
# ROCKET with RidgeClassifierCV

import numpy as np
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket
from sklearn.metrics import confusion_matrix
import joblib
from sktime.datatypes._panel._convert import from_2d_array_to_nested

def rocket_model(X_train, Y_train, X_test, Y_test, rocket_kernels=1500):

    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/models/rocket/"
    
    X_train = from_2d_array_to_nested(X_train)
    X_test = from_2d_array_to_nested(X_test)

    rocket = Rocket(num_kernels=rocket_kernels)
    rocket.fit(X_train)
    X_train_transform = rocket.transform(X_train)
        
    classifier = RidgeClassifierCV()
    classifier.fit(X_train_transform, Y_train)
    
    X_test_transform = rocket.transform(X_test)
    
    y_pred = classifier.predict(X_test_transform)
    
    print(str(X_train.shape)+': Rocket Classifier is Done! \n')

    confusion = confusion_matrix(Y_test, y_pred)
    tn, fp, fn, tp = confusion.ravel()

    tss = TSS(tp,tn,fp,fn)
    hss1 = HSS1(tp,tn,fp,fn)
    hss2 = HSS2(tp,tn,fp,fn)
    gss = GSS(tp,tn,fp,fn)
    recall = Recall(tp,tn,fp,fn)
    precision = Precision(tp,tn,fp,fn)
    
    output_values = np.array([tp, fn, fp, tn, tss, hss1, hss2, gss, recall, precision])


    #joblib.dump(classifier, data_dir + "rocket_model.pkl")

    #loaded_rocket_model = joblib.load(data_dir + "rocket_model_sgd.pkl")
    
    return output_values

## TimeSeriesForest

In [27]:
# TimeSeriesForest

from sktime.classification.interval_based import TimeSeriesForestClassifier
import numpy as np
from sklearn.metrics import confusion_matrix
import joblib

def tsf_model(X_train, Y_train, X_test, Y_test, tsf_estimator=25):

    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/models/TSF/"
    
    
    tsf_classifier = TimeSeriesForestClassifier(n_estimators=tsf_estimator)
    tsf_classifier.fit(X_train, Y_train)
    y_pred = tsf_classifier.predict(X_test)
    
    print(str(X_train.shape)+': TSF Classifier is Done! \n')
    

    confusion = confusion_matrix(Y_test, y_pred)
    tn, fp, fn, tp = confusion.ravel()

    tss = TSS(tp,tn,fp,fn)
    hss1 = HSS1(tp,tn,fp,fn)
    hss2 = HSS2(tp,tn,fp,fn)
    gss = GSS(tp,tn,fp,fn)
    recall = Recall(tp,tn,fp,fn)
    precision = Precision(tp,tn,fp,fn)
    
    output_values = np.array([tp, fn, fp, tn, tss, hss1, hss2, gss, recall, precision])


    #joblib.dump(classifier, data_dir + "tsf_model.pkl")

    #loaded_rocket_model = joblib.load(data_dir + "tsf_model.pkl")
    
    return output_values

# Results

In [28]:
def save_results(reslut, name):
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/results/"

    with open(data_dir + name + ".pkl", 'wb') as f:
        pickle.dump(reslut, f)

## SVM

In [29]:
# SVM NewFeatures
svm_newf = kfold_training('SVM', X_train_NewF_ZM, Y_train_NewF_ZM, X_test_NewF_ZM, Y_test_NewF_ZM, svm_model, 10)

(14024, 216): SVM Classifier is Done! 

(14024, 216): SVM Classifier is Done! 

(14024, 216): SVM Classifier is Done! 

(14024, 216): SVM Classifier is Done! 

(13979, 216): SVM Classifier is Done! 

(13979, 216): SVM Classifier is Done! 

(13979, 216): SVM Classifier is Done! 

(14029, 216): SVM Classifier is Done! 

(14029, 216): SVM Classifier is Done! 

(13982, 216): SVM Classifier is Done! 



In [30]:
save_results(svm_newf, "SVM_OUSampling_WithoutB&C_NewFeatures_Results")

## MLPClassifier

In [31]:
# MLPClassifier NewFeatures
mlp_newf = kfold_training('MLP', X_train_NewF_ZM, Y_train_NewF_ZM, X_test_NewF_ZM, Y_test_NewF_ZM, mlp_model, 10)

(14024, 216): MLP Classifier is Done! 

(14024, 216): MLP Classifier is Done! 

(14024, 216): MLP Classifier is Done! 

(14024, 216): MLP Classifier is Done! 

(13979, 216): MLP Classifier is Done! 

(13979, 216): MLP Classifier is Done! 

(13979, 216): MLP Classifier is Done! 

(14029, 216): MLP Classifier is Done! 

(14029, 216): MLP Classifier is Done! 

(13982, 216): MLP Classifier is Done! 



In [32]:
save_results(mlp_newf, "MLPClassifier_OUSampling_WithoutB&C_NewFeatures_Results")

### Rocket 

In [33]:
rocket_newf = kfold_training('Rocket', X_train_NewF_ZM, Y_train_NewF_ZM, X_test_NewF_ZM, Y_test_NewF_ZM, rocket_model, 10)

(14024, 1): Rocket Classifier is Done! 

(14024, 1): Rocket Classifier is Done! 

(14024, 1): Rocket Classifier is Done! 

(14024, 1): Rocket Classifier is Done! 

(13979, 1): Rocket Classifier is Done! 

(13979, 1): Rocket Classifier is Done! 

(13979, 1): Rocket Classifier is Done! 

(14029, 1): Rocket Classifier is Done! 

(14029, 1): Rocket Classifier is Done! 

(13982, 1): Rocket Classifier is Done! 



In [34]:
save_results(rocket_newf, "Rocket_OUSampling_WithoutB&C_NewFeatures_Results")

### TimeSeriesForest

In [35]:
tsf_newf = kfold_training('TSF', X_train_NewF_ZM, Y_train_NewF_ZM, X_test_NewF_ZM, Y_test_NewF_ZM, tsf_model, 10)

(14024, 216): TSF Classifier is Done! 

(14024, 216): TSF Classifier is Done! 

(14024, 216): TSF Classifier is Done! 

(14024, 216): TSF Classifier is Done! 

(13979, 216): TSF Classifier is Done! 

(13979, 216): TSF Classifier is Done! 

(13979, 216): TSF Classifier is Done! 

(14029, 216): TSF Classifier is Done! 

(14029, 216): TSF Classifier is Done! 

(13982, 216): TSF Classifier is Done! 



In [36]:
save_results(tsf_newf, "TSF_OUSampling_WithoutB&C_NewFeatures_Results")

# Comparison 

In [40]:
data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/results/"
with open(data_dir + 'SVM_OUSampling_WithoutB&C_NewFeatures_Results' + ".pkl", 'rb') as f:
    svm_newf=pickle.load(f)
with open(data_dir + 'MLPClassifier_OUSampling_WithoutB&C_NewFeatures_Results' + ".pkl", 'rb') as f:
    mlp_newf=pickle.load(f)
with open(data_dir + 'Rocket_OUSampling_WithoutB&C_NewFeatures_Results' + ".pkl", 'rb') as f:
    rocket_newf=pickle.load(f)
with open(data_dir + 'TSF_OUSampling_WithoutB&C_NewFeatures_Results' + ".pkl", 'rb') as f:
    tsf_newf=pickle.load(f)



names = ['SVM', 'MLP', 'Rocket', 'TSF']
values = np.array([svm_newf, mlp_newf, rocket_newf, tsf_newf])

In [41]:
def compare_results(names, values):
    np.printoptions(precision=4, suppress=True)
    for i in range(0, values.shape[1]):
        print("P_Train = "+ str(values[0,i,0]) + " & " + "P_Test = " + str(values[0,i,1]))
        for j in range(0, values.shape[0]):
            print(names[j] + ' :' +  ' TP={:.0f}'.format(values[j,i,2]) + ' FN={:.0f}'.format(values[j,i,3]) + ' FP={:.0f}'.format(values[j,i,4])
                 + ' TN={:.0f}'.format(values[j,i,5]) + ' TSS={:.3f}'.format(values[j,i,6]) + ' HSS1={:.3f}'.format(values[j,i,7]) + ' HSS2={:.3f}'.format(values[j,i,8])
                 + ' GSS={:.3f}'.format(values[j,i,9]) + ' Recall={:.3f}'.format(values[j,i,10]) + ' Precision={:.3f}'.format(values[j,i,11]))
        print('\n')


In [42]:
compare_results(names, values)

P_Train = 1.0 & P_Test = 2.0
SVM : TP=1369 FN=32 FP=26696 TN=60460 TSS=0.671 HSS1=0.065 HSS2=0.090 GSS=925.003 Recall=0.977 Precision=0.951
MLP : TP=966 FN=435 FP=5903 TN=81253 TSS=0.622 HSS1=0.213 HSS2=0.222 GSS=857.330 Recall=0.690 Precision=0.859
Rocket : TP=394 FN=1007 FP=1441 TN=85715 TSS=0.265 HSS1=0.230 HSS2=0.230 GSS=364.970 Recall=0.281 Precision=0.785
TSF : TP=774 FN=627 FP=2013 TN=85143 TSS=0.529 HSS1=0.356 HSS2=0.358 GSS=729.909 Recall=0.552 Precision=0.722


P_Train = 1.0 & P_Test = 3.0
SVM : TP=1413 FN=11 FP=13261 TN=27825 TSS=0.670 HSS1=0.122 HSS2=0.168 GSS=921.450 Recall=0.992 Precision=0.904
MLP : TP=1096 FN=328 FP=3729 TN=37357 TSS=0.679 HSS1=0.315 HSS2=0.331 GSS=934.372 Recall=0.770 Precision=0.773
Rocket : TP=417 FN=1007 FP=904 TN=40182 TSS=0.271 HSS1=0.281 HSS2=0.281 GSS=372.749 Recall=0.293 Precision=0.684
TSF : TP=933 FN=491 FP=2858 TN=38228 TSS=0.586 HSS1=0.325 HSS2=0.334 GSS=806.009 Recall=0.655 Precision=0.754


P_Train = 1.0 & P_Test = 4.0
SVM : TP=1160 FN=5 