# SVM - MLPClassifier on NewFeatures

# Train Test Split

## Stratified Cross validation (k = 5)

P1 Train and P2 Test, P1 Train and P3 Test, P1 Train and P4 Test, P1 Train and P5 Test
P2 Train and P3 Test, P2 Train and P4 Test, P2 Train and P5 Test
P3 Train and P4 Test, P3 Train and P5 Test
P4 Train and P5 Test

# Evaluation Metrics

In [13]:
# float("{:.2f}".format(13.949999999999999))

def TSS(TP,TN,FP,FN):
    TSS_value = (TP / (TP + FN)) - (FP / (FP + TN))
    return TSS_value

def HSS1(TP,TN,FP,FN):
    HSS1_value = (2 * (TP * TN - FP * FN)) / ((TP + FN) * (FN + TN) + (TP + FP) * (FP + TN))
    return HSS1_value
    
def HSS2(TP,TN,FP,FN):
    HSS2_value = (2 * (TP * TN - FP * FN)) / ((TP + FP) * (FN + TN) + (TP + FN) * (FP + TN))
    return HSS2_value

def GSS(TP,TN,FP,FN):
    GSS_value = (TP - (TP + FP) * (TP + FN) / (TP + FP + FN + TN))
    return GSS_value

def Recall(TP,TN,FP,FN):
    Recall_value = (TP) / (TP + FN)
    return Recall_value

def FPR(TP,TN,FP,FN):
    fpr_value = (FP) / (FP + TN)
    return fpr_value

def Accuracy(TP,TN,FP,FN):
    accuracy_value = (TP + TN) / (TP + TN + FP + FN)
    return accuracy_value

def Precision(TP,TN,FP,FN):
    precision_value = (FP) / (TP + FP)
    return precision_value

# Loading the Final Datasets

## New Features 

### ZM Norm

In [14]:
import pickle
import numpy as np

data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/5_1_FinalData_NewFeatures_ZM_KnnImputation/"
X_train_NewF_ZM = []
Y_train_NewF_ZM = []


num_partitions = 5

for i in range(0,num_partitions):
    with open(data_dir + "Partition" + str(i+1) + "_NewFeatures_ZM_KnnImputation" + ".pkl", 'rb') as f:
        X_train_NewF_ZM.append(pickle.load(f))
    print("P"+str(i+1)+" Nan-Value: "+ str(np.isnan(X_train_NewF_ZM[i]).any() or np.isinf(X_train_NewF_ZM[i]).any()))
    
    with open(data_dir + "Partition" + str(i+1) + "_Labels_NewFeatures_ZM_KnnImputation" + ".pkl", 'rb') as f:
        Y_train_NewF_ZM.append(pickle.load(f))

P1 Nan-Value: False
P2 Nan-Value: False
P3 Nan-Value: False
P4 Nan-Value: False
P5 Nan-Value: False


### LSBZM

In [7]:
import pickle
import numpy as np

data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/5_2_FinalData_NewFeatures_LSBZM_KnnImputation/"
X_train_NewF_LSBZM = []
Y_train_NewF_LSBZM = []


num_partitions = 5

for i in range(0,num_partitions):
    with open(data_dir + "Partition" + str(i+1) + "_NewFeatures_LSBZM_KnnImputation" + ".pkl", 'rb') as f:
        X_train_NewF_LSBZM.append(pickle.load(f))
    print("P"+str(i+1)+" Nan-Value: "+ str(np.isnan(X_train_NewF_LSBZM[i]).any() or np.isinf(X_train_NewF_LSBZM[i]).any()))
    
    with open(data_dir + "Partition" + str(i+1) + "_Labels_NewFeatures_LSBZM_KnnImputation" + ".pkl", 'rb') as f:
        Y_train_NewF_LSBZM.append(pickle.load(f))

P1 Nan-Value: False
P2 Nan-Value: False
P3 Nan-Value: False
P4 Nan-Value: False
P5 Nan-Value: False


# Useful Functions

In [15]:
def kfold_training(name, X_train, Y_train, training_func, num, rocket_kernels= 1500, tsf_estimator=25):
    kfold = np.array([[1,2],[1,3],[1,4],[1,5],[2,3],[2,4],[2,5],[3,4],[3,5],[4,5]])
    metrics = []
    metrics_values = np.array([])
    
    for i in range(0, num):
        train_index = kfold[i,0]
        test_index = kfold[i,1]
        metrics_values = training_func(X_train[train_index-1], Y_train[train_index-1], X_train[test_index-1], Y_train[test_index-1])

        metrics.append(np.append(np.append(train_index, test_index), metrics_values))
    return metrics

# Training

## SVM

In [16]:
# Import necessary libraries
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

def svm_model(X_train, Y_train, X_test, Y_test):
    
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/models/"


    # Create an SVM classifier (you can choose different kernels like 'linear', 'rbf', etc.)
    svm_classifier = SVC(kernel='linear', C=1.0)
    svm_classifier.fit(X_train, Y_train)
    y_pred = svm_classifier.predict(X_test)
    
    
    print(str(X_train.shape)+': SVM Classifier is Done! \n')
    

    confusion = confusion_matrix(Y_test, y_pred)
    tn, fp, fn, tp = confusion.ravel()

    tss = TSS(tp,tn,fp,fn)
    hss1 = HSS1(tp,tn,fp,fn)
    hss2 = HSS2(tp,tn,fp,fn)
    gss = GSS(tp,tn,fp,fn)
    recall = Recall(tp,tn,fp,fn)
    precision = Precision(tp,tn,fp,fn)
    
    output_values = np.array([tp, fn, fp, tn, tss, hss1, hss2, gss, recall, precision])


    #joblib.dump(classifier, data_dir + "svm_model.pkl")

    #loaded_svm_model = joblib.load(data_dir + "svm_model.pkl")
    
    return output_values

## MPLClassifier 

In [17]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import confusion_matrix

def mlp_model(X_train, Y_train, X_test, Y_test):
    
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/models/"

    # Define the MLP model
    # Define the MLP model with four hidden layers
    model = keras.Sequential([
        layers.Input(shape=(216,)),  # Input layer with 216 features
        layers.Dense(64, activation='relu'),  # Hidden layer with 64 units and ReLU activation
        layers.Dense(32, activation='relu'),  # Hidden layer with 32 units and ReLU activation
        layers.Dense(16, activation='relu'),  # Hidden layer with 16 units and ReLU activation
        layers.Dense(8, activation='relu'),  # Hidden layer with 16 units and ReLU activation
        layers.Dense(1, activation='sigmoid')  # Output layer with 1 unit and sigmoid activation (binary classification)
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=keras.metrics.Recall(name='recall'))

    # Train the model
    model.fit(X_train, Y_train, epochs=15, batch_size=32, verbose=0)  # Adjust epochs and batch_size as needed

    y_pred = model.predict(X_test)
    threshold = 0.35  # Adjust the threshold as needed
    y_pred_binary = (y_pred > threshold).astype(int)
    
    print(str(X_train.shape)+': MLP Classifier is Done! \n')


    confusion = confusion_matrix(Y_test, y_pred_binary)
    tn, fp, fn, tp = confusion.ravel()

    tss = TSS(tp,tn,fp,fn)
    hss1 = HSS1(tp,tn,fp,fn)
    hss2 = HSS2(tp,tn,fp,fn)
    gss = GSS(tp,tn,fp,fn)
    recall = Recall(tp,tn,fp,fn)
    precision = Precision(tp,tn,fp,fn)
    
    output_values = np.array([tp, fn, fp, tn, tss, hss1, hss2, gss, recall, precision])


    #joblib.dump(classifier, data_dir + "mlp_model.pkl")

    #loaded_mlp_model = joblib.load(data_dir + "mlp_model.pkl")
    
    return output_values

# Results

In [22]:
def save_results(reslut, name):
    data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/results/"

    with open(data_dir + name + ".pkl", 'wb') as f:
        pickle.dump(reslut, f)

## SVM

In [11]:
# SVM NewFeatures
svm_newf = kfold_training('SVM', X_train_NewF_ZM, Y_train_NewF_ZM, svm_model, 10)

(73492, 216): SVM Classifier is Done! 

(73492, 216): SVM Classifier is Done! 

(73492, 216): SVM Classifier is Done! 

(73492, 216): SVM Classifier is Done! 

(88557, 216): SVM Classifier is Done! 

(88557, 216): SVM Classifier is Done! 

(88557, 216): SVM Classifier is Done! 

(42510, 216): SVM Classifier is Done! 

(42510, 216): SVM Classifier is Done! 

(51261, 216): SVM Classifier is Done! 



In [26]:
save_results(svm_newf, "SVM_NewFeatures_Results")

## MLPClassifier

In [23]:
# MLPClassifier NewFeatures
mlp_newf = kfold_training('MLP', X_train_NewF_ZM, Y_train_NewF_ZM, mlp_model, 10)

(73492, 216): MLP Classifier is Done! 

(73492, 216): MLP Classifier is Done! 

(73492, 216): MLP Classifier is Done! 

(73492, 216): MLP Classifier is Done! 

(88557, 216): MLP Classifier is Done! 

(88557, 216): MLP Classifier is Done! 

(88557, 216): MLP Classifier is Done! 

(42510, 216): MLP Classifier is Done! 

(42510, 216): MLP Classifier is Done! 

(51261, 216): MLP Classifier is Done! 



In [25]:
save_results(mlp_newf, "MLPClassifier_NewFeatures_Results")

### Rocket 

In [14]:
rocket_newf = kfold_training('Rocket', X_train_NewF_ZM, Y_train_NewF_ZM, rocket_model, 10)

(73492, 1): Rocket Classifier is Done! 

(73492, 1): Rocket Classifier is Done! 

(73492, 1): Rocket Classifier is Done! 

(73492, 1): Rocket Classifier is Done! 

(88557, 1): Rocket Classifier is Done! 

(88557, 1): Rocket Classifier is Done! 

(88557, 1): Rocket Classifier is Done! 

(42510, 1): Rocket Classifier is Done! 

(42510, 1): Rocket Classifier is Done! 

(51261, 1): Rocket Classifier is Done! 



In [15]:
save_results(rocket_newf, "Rocket_NewFeatures_Results")

### TimeSeriesForest

In [16]:
tsf_newf = kfold_training('TSF', X_train_NewF_ZM, Y_train_NewF_ZM, tsf_model, 10)

(73492, 216): TSF Classifier is Done! 

(73492, 216): TSF Classifier is Done! 

(73492, 216): TSF Classifier is Done! 

(73492, 216): TSF Classifier is Done! 

(88557, 216): TSF Classifier is Done! 

(88557, 216): TSF Classifier is Done! 

(88557, 216): TSF Classifier is Done! 

(42510, 216): TSF Classifier is Done! 

(42510, 216): TSF Classifier is Done! 

(51261, 216): TSF Classifier is Done! 



In [17]:
save_results(tsf_newf, "TSF_NewFeatures_Results")

### LSTM 

In [31]:
num_partitions = 5
X_train_NewF_ZM_3D = []
for i in range(0,num_partitions):
    X_train_NewF_ZM_3D.append(X_train_NewF_ZM[i].reshape(X_train_NewF_ZM[i].shape[0], 24, 9).transpose(0, 2, 1))

In [32]:
lstm_newf = kfold_training('LSTM', X_train_NewF_ZM_3D, Y_train_NewF_ZM, lstm_model, 10)

(73492, 9, 24): LSTM Classifier is Done! 

(73492, 9, 24): LSTM Classifier is Done! 

(73492, 9, 24): LSTM Classifier is Done! 

(73492, 9, 24): LSTM Classifier is Done! 

(88557, 9, 24): LSTM Classifier is Done! 

(88557, 9, 24): LSTM Classifier is Done! 

(88557, 9, 24): LSTM Classifier is Done! 

(42510, 9, 24): LSTM Classifier is Done! 

(42510, 9, 24): LSTM Classifier is Done! 

(51261, 9, 24): LSTM Classifier is Done! 



In [33]:
save_results(lstm_newf, "LSTM_NewFeatures_Results")

### CNN 

In [29]:
num_partitions = 5
X_train_NewF_ZM_3D = []
for i in range(0,num_partitions):
    X_train_NewF_ZM_3D.append(X_train_NewF_ZM[i].reshape(X_train_NewF_ZM[i].shape[0], 24, 9).transpose(0, 2, 1))

In [None]:
cnn_newf = kfold_training('CNN', X_train_NewF_ZM_3D, Y_train_NewF_ZM, cnn_model, 10)

In [None]:
save_results(cnn_newf, "CNN_NewFeatures_Results")

# Comparison 

In [27]:
data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/results/"
with open(data_dir + 'SVM_NewFeatures_Results' + ".pkl", 'rb') as f:
    svm_newf=pickle.load(f)
with open(data_dir + 'MLPClassifier_NewFeatures_Results' + ".pkl", 'rb') as f:
    mlp_newf=pickle.load(f)
with open(data_dir + 'Rocket_NewFeatures_Results' + ".pkl", 'rb') as f:
    rocket_newf=pickle.load(f)
with open(data_dir + 'TSF_NewFeatures_Results' + ".pkl", 'rb') as f:
    tsf_newf=pickle.load(f)



names = ['SVM', 'MLP', 'Rocket', 'TSF']
values = np.array([svm_newf, mlp_newf, rocket_newf, tsf_newf])

In [28]:
def compare_results(names, values):
    np.printoptions(precision=4, suppress=True)
    for i in range(0, values.shape[1]):
        print("P_Train = "+ str(values[0,i,0]) + " & " + "P_Test = " + str(values[0,i,1]))
        for j in range(0, values.shape[0]):
            print(names[j] + ' :' +  ' TP={:.0f}'.format(values[j,i,2]) + ' FN={:.0f}'.format(values[j,i,3]) + ' FP={:.0f}'.format(values[j,i,4])
                 + ' TN={:.0f}'.format(values[j,i,5]) + ' TSS={:.3f}'.format(values[j,i,6]) + ' HSS1={:.3f}'.format(values[j,i,7]) + ' HSS2={:.3f}'.format(values[j,i,8])
                 + ' GSS={:.3f}'.format(values[j,i,9]) + ' Recall={:.3f}'.format(values[j,i,10]) + ' Precision={:.3f}'.format(values[j,i,11]))
        print('\n')


In [29]:
compare_results(names, values)

P_Train = 1.0 & P_Test = 2.0
SVM : TP=206 FN=1195 FP=176 TN=86980 TSS=0.145 HSS1=0.226 HSS2=0.227 GSS=199.957 Recall=0.147 Precision=0.461
MLP : TP=215 FN=1186 FP=366 TN=86790 TSS=0.149 HSS1=0.210 HSS2=0.210 GSS=205.808 Recall=0.153 Precision=0.630
Rocket : TP=33 FN=1368 FP=66 TN=87090 TSS=0.023 HSS1=0.042 HSS2=0.043 GSS=31.434 Recall=0.024 Precision=0.667
TSF : TP=140 FN=1261 FP=186 TN=86970 TSS=0.098 HSS1=0.157 HSS2=0.158 GSS=134.843 Recall=0.100 Precision=0.571


P_Train = 1.0 & P_Test = 3.0
SVM : TP=67 FN=1357 FP=8 TN=41078 TSS=0.047 HSS1=0.086 HSS2=0.089 GSS=64.488 Recall=0.047 Precision=0.107
MLP : TP=58 FN=1366 FP=70 TN=41016 TSS=0.039 HSS1=0.070 HSS2=0.071 GSS=53.712 Recall=0.041 Precision=0.547
Rocket : TP=93 FN=1331 FP=164 TN=40922 TSS=0.061 HSS1=0.101 HSS2=0.103 GSS=84.391 Recall=0.065 Precision=0.638
TSF : TP=176 FN=1248 FP=163 TN=40923 TSS=0.120 HSS1=0.189 HSS2=0.192 GSS=164.644 Recall=0.124 Precision=0.481


P_Train = 1.0 & P_Test = 4.0
SVM : TP=40 FN=1125 FP=2 TN=50094 T