In [1]:
from sklearnex import patch_sklearn
patch_sklearn()

ModuleNotFoundError: No module named 'sklearnex'

In [None]:
%matplotlib widget 

import os
import matplotlib.pyplot as plt
import matplotlib
import matplotlib as mpl
import pandas as pd
import numpy as np
import hyperopt
import sklearn.metrics as mt
import pickle

from xgboost import Booster
from xgboost import XGBClassifier
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from pylab import *
pd.set_option('display.max_rows', 500)

In [None]:
#################################################################################################
###### The function load_window_dataset() that takes in input window data file, and 
###### label to be assigned and returns numpy arrays with features and labels
#################################################################################################

def load_window_dataset(X, y, filename, label):
#Inputs: - X: current matrix of datapoints where we want to APPEND the datapoints retrieved from filename (only features)
#        - y: current matrix of datapoints where we want to APPEND the datapoints retrieved from filename (only labels)
#        - filename: full name (with path) of the file to be read (it must be a window dataset file created above)
#        - label: integer, label to be assigned to the datapoints retrieved from filename; it may differ from labels already included in current y
#Outputs: - X: updated X (including features for the new data points retrieved from filename)
#         - y: updated y (including labels for the new data points)
#This function to X and y in input the new datapoints retrieved from filename and return updated X and y
#The function handle the case when X and y are empty (initialized as None)

    data = pd.read_csv(filename)
    
    if X is None:
        X = data.to_numpy()
        # full() function puts in all X.shape[0] elements the value "label"
        y = np.full(X.shape[0], label)
    else:
        X_temp = data.to_numpy()
        y_temp = np.full(X_temp.shape[0], label)
        X = np.append(X, X_temp, axis = 0) #F: axis=0-->stack X and X_temp vertically (increase no of rows)
        y = np.append(y, y_temp)

    return X, y

In [None]:
def train_classifier_XGB(X_train, y_train): 
    xgb = XGBClassifier(use_label_encoder=False, eta = 0.7, max_depth= 7, subsample = 1, verbosity = 0)
    xgb.fit(X_train, y_train)

    return xgb

In [None]:
def train_classifier_DNN(X_train, y_train): 
    size = (50,) * 2
    dnn = MLPClassifier(hidden_layer_sizes=size, activation='tanh',
                        solver='adam', learning_rate='invscaling', max_iter=1000)
    dnn.fit(X_train, y_train)

    return dnn

In [None]:
def train_classifier_KNN(X_train, y_train): 
    knn = KNeighborsClassifier(leaf_size=43, p=1, n_neighbors=14)
    knn.fit(X_train, y_train)

    return knn

In [None]:
########################################################################################################
###### The function performance_eval() takes in input ground truth and predicted labels, 
###### prints results in a result file passed in input, and returns global metrics
########################################################################################################

def performance_eval(y_true, y_pred, lab, l_names):

    #Compute metrics and print/write them
    accuracy = mt.accuracy_score(y_true, y_pred)
    precision = mt.precision_score(y_true, y_pred, labels=lab, average=None) #F: average=None gives per-class results
    global_precision = mt.precision_score(y_true, y_pred, labels=lab, average='weighted') 
    recall = mt.recall_score(y_true, y_pred, labels=lab, average=None)
    global_recall = mt.recall_score(y_true, y_pred, labels=lab, average='weighted') 
    f1score = mt.f1_score(y_true, y_pred, labels=lab, average=None)
    global_f1score = mt.f1_score(y_true, y_pred, labels=lab, average='weighted')

    return accuracy, global_precision, global_recall, global_f1score 

In [None]:
noise_intervals = [20, 60, 120, 300, 600]
noise_lengths = [3, 5, 7, 15]
noise_means = [1, 3, 5, 7, 12]

A_XGB = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])
GP_XGB = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])
GR_XGB = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])
GF1_XGB = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])

A_DNN = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])
GP_DNN = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])
GR_DNN = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])
GF1_DNN = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])

A_KNN = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])
GP_KNN = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])
GR_KNN = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])
GF1_KNN = np.zeros([len(noise_intervals),len(noise_lengths),len(noise_means)])

def training_loop(noise_lengths, noise_intervals, noise_means):
    global A_XGB, GP_XGB, GR_XGB, GF1_XGB
    global A_DNN, GP_DNN, GR_DNN, GF1_DNN
    global A_KNN, GP_KNN, GR_KNN, GF1_KNN
    
    lbl = [0, 1]
    label_names=['Attenuation', 'Filtering']

    spacing = 1
    w_length = 10
    sampling = 1
    
    for i, n_interval in enumerate(noise_intervals): #enumerate(range(minsp,maxsp+1,stepsp)):
        for j, n_length in enumerate(noise_lengths): #enumerate(range(minlength,maxlength+1,steplength)):
            for m, n_mean in enumerate(noise_means):
                print('********************************')
                print('Iteration for spacing={}; window length={}; noise interval={}; noise length={}; noise mean={}'.format(spacing, w_length, n_interval, n_length, n_mean))

                ####### 1) Load dataset #######
                print('1) Loading dataset into (XX,yy)...')

                XX = None
                yy = None
                folderpath='../Features_1_3'

                for filename in os.listdir(folderpath):
                    if filename.endswith('_nm' + str(n_mean) + '_ni' + str(n_interval) + '_nl' + str(n_length) 
                                         + '_sa' + str(sampling) + '_sp' + str(spacing) + '_w' + str(w_length) + '.dat'):
                        label = 0
                        if int(filename[9]) > 5:
                            label = 1
                        fullname = folderpath + '/' + filename
                        XX, yy = load_window_dataset(XX, yy, fullname, label)

                scaler = StandardScaler()
                XX = scaler.fit_transform(XX)

                X_train, X_test, y_train, y_test = train_test_split(XX, yy, stratify=yy, test_size=0.99, random_state=42)
                print('Training XGB...')
                xgb = train_classifier_XGB(X_train, y_train)

                print('Training DNN...')
                dnn = train_classifier_DNN(X_train, y_train)

                print('Training KNN...')
                knn = train_classifier_KNN(X_train, y_train)

                y_pred_XGB = xgb.predict(X_test)
                y_pred_DNN = dnn.predict(X_test)
                y_pred_KNN = knn.predict(X_test)

                A_XGB[i,j,m], GP_XGB[i,j,m], GR_XGB[i,j,m], GF1_XGB[i,j,m] = performance_eval(y_test, y_pred_XGB, lbl, label_names)
                print(A_XGB[i,j,m], GP_XGB[i,j,m], GR_XGB[i,j,m], GF1_XGB[i,j,m])
                A_DNN[i,j,m], GP_DNN[i,j,m], GR_DNN[i,j,m], GF1_DNN[i,j,m] = performance_eval(y_test, y_pred_DNN, lbl, label_names)
                print(A_DNN[i,j,m], GP_DNN[i,j,m], GR_DNN[i,j,m], GF1_DNN[i,j,m])
                A_KNN[i,j,m], GP_KNN[i,j,m], GR_KNN[i,j,m], GF1_KNN[i,j,m] = performance_eval(y_test, y_pred_KNN, lbl, label_names)
                print(A_KNN[i,j,m], GP_KNN[i,j,m], GR_KNN[i,j,m], GF1_KNN[i,j,m])


In [None]:
fig_folder = '1_3_Figures'
if not os.path.exists(fig_folder):
    os.makedirs(fig_folder)

training_loop(noise_lengths, noise_intervals, noise_means)

In [None]:
fig_folder = '1_3_Figures'
if not os.path.exists(fig_folder):
    os.makedirs(fig_folder)

noise_intervals_x = np.repeat(noise_intervals, len(noise_lengths)*len(noise_means))
noise_lengths_y = list(np.repeat(noise_lengths, len(noise_means))) * len(noise_intervals)
noise_means_z = noise_means * (len(noise_lengths)*len(noise_intervals))

A_XGB = A_XGB.reshape(-1)
GP_XGB = GP_XGB.reshape(-1)
GR_XGB = GR_XGB.reshape(-1)
GF1_XGB = GF1_XGB.reshape(-1)

A_DNN = A_DNN.reshape(-1)
GP_DNN = GP_DNN.reshape(-1)
GR_DNN = GR_DNN.reshape(-1)
GF1_DNN = GF1_DNN.reshape(-1)

A_KNN = A_KNN.reshape(-1)
GP_KNN = GP_KNN.reshape(-1)
GR_KNN = GR_KNN.reshape(-1)
GF1_KNN = GF1_KNN.reshape(-1)

df = pd.DataFrame(list(zip(noise_intervals_x, noise_lengths_y, noise_means_z, A_XGB)),
               columns =['X', 'Y', 'Z', 'Accuracy'])
#display(df)

# creating 3d figures
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111,projection='3d')

# Value Normalization
colors = cm.hsv((A_XGB-min(A_XGB))/(max(A_XGB)-min(A_XGB)))

# configuring colorbar
color_map = cm.ScalarMappable(cmap=cm.hsv)
color_map.set_array(A_XGB)

# creating the heatmap
img = ax.scatter3D(noise_intervals_x, noise_lengths_y, noise_means_z, c=colors, marker='o')
plt.colorbar(color_map)

# adding title and labels
title = "XGB Impact of Noise"
ax.set_title(title)
ax.set_xlabel('X - Noise Interval')
ax.set_ylabel('Y - Noise Length')
ax.set_zlabel('Z - Noise Mean')

plt.show()
fig.savefig(fig_folder+'/'+title.replace(" ", "_")+'.png')