In [None]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
import matplotlib.pyplot as plt
from sklearn import metrics
from tensorflow.keras import layers,models
from tensorflow.keras import callbacks
from tensorflow.keras.utils import plot_model
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer
from sklearn.metrics import mean_squared_error
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import seaborn as sns
from sklearn import svm
from sklearn.utils import class_weight
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.utils import shuffle
import timeit

plt.rcParams.update({'font.size': 18})
plt.rcParams.update({'font.family': 'Arial'})

plt.rcParams['agg.path.chunksize'] = 10000
np.random.seed(7)

# Feed forward neural network

In [None]:
load_data = pd.read_csv(r'M:\THESIS_IPT\MRIDUL\data_overlapping_windows\points1000_20ms\Combined_final_ALL_noOutliers0_10.csv')
load_data['avg_peak'].value_counts()

In [None]:
# dropping some zero values to balance the dataset
zero_index = []
last = load_data.columns.get_loc("avg_peak")
for i in range(load_data.shape[0]):
    if load_data.iloc[i,last]==0:
        zero_index.append(i)
        
rand_zero_index = random.sample(zero_index,12587)
load_data.drop(load_data.index[rand_zero_index], axis=0, inplace=True)

load_data['avg_peak'].value_counts()

In [None]:
# renaming the target features such that it starts with 0

load_data['avg_peak'] = load_data['avg_peak'].replace(1,0)
load_data['avg_peak'] = load_data['avg_peak'].replace(2,1)
load_data['avg_peak'] = load_data['avg_peak'].replace(3,2)
load_data['avg_peak'] = load_data['avg_peak'].replace(4,3)
load_data['avg_peak'] = load_data['avg_peak'].replace(5,4)
load_data['avg_peak'] = load_data['avg_peak'].replace(6,5)
load_data['avg_peak'] = load_data['avg_peak'].replace(7,6)
load_data['avg_peak'] = load_data['avg_peak'].replace(8,7)
load_data['avg_peak'] = load_data['avg_peak'].replace(9,8)
load_data['avg_peak'] = load_data['avg_peak'].replace(10,9)
load_data['avg_peak'].value_counts()

In [None]:
Y_nn = load_data['avg_peak']
Y_nn = np.ravel(Y_nn)

# label encoding of target variable (one hot encoding)
encoder = LabelEncoder()
encoder.fit(Y_nn)

# dividing data into training and test sets
X_nn = load_data.drop(['avg_peak'], axis=1)

X_TrainVal, X_test, Y_TrainVal, Y_test = train_test_split(X_nn, Y_nn, 
                                                    test_size = 0.2, 
                                                    random_state = 3,
                                                    shuffle = True)

X_train, X_val, Y_train, Y_val = train_test_split(X_TrainVal, Y_TrainVal, 
                                                    test_size = 0.2, 
                                                    random_state = 3,
                                                    shuffle = True)


In [None]:
# scaling the input to neural network
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

# scaling the input to neural network
scaler2 = Normalizer().fit(X_train)
X_train = scaler2.transform(X_train)
X_test = scaler2.transform(X_test)
X_val = scaler2.transform(X_val)

# one hot encoding of target variable
Y_train = encoder.transform(Y_train)
Y_train = to_categorical(Y_train)
Y_val = encoder.transform(Y_val)
Y_val = to_categorical(Y_val)
Y_test = encoder.transform(Y_test)
Y_test = to_categorical(Y_test)

In [None]:
a = X_nn.shape[1]

# neural network model

model = Sequential()
model.add(Dense(a//2, activation = 'relu', input_shape = (a,)))
model.add(Dropout(0.1))
model.add(Dense(units = a//2, activation = 'relu'))
model.add(Dense(units = a//2, activation = 'relu'))
model.add(Dense(units = 10, activation = 'softmax'))
print(model.summary())

# Compile model
model.compile(optimizer = 'adam',
              loss = 'categorical_crossentropy', 
              metrics='accuracy'
             )


# Fit the model
history = model.fit(X_train, Y_train, batch_size=5000, epochs=300, validation_data=(X_val,Y_val), verbose=1)#, class_weight=class_weights)

In [None]:
# training curves

plt.figure(figsize = (10,5))
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
plt.legend()
plt.show()

plt.figure(figsize = (10,5))
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='validation')
plt.legend()
plt.show()

In [None]:
# evaluting model performance
model.evaluate(X_test, Y_test, verbose=1)

In [None]:
# model predictions
Y_prediction = model.predict(X_test)
Y_prediction = Y_prediction.argmax(axis=1)
Y_test = Y_test.argmax(axis=1)

In [None]:
# confusion matrix

x_axis_labels = [1,2,3,4,5,6,7,8,9,10] # labels for x-axis
y_axis_labels = [1,2,3,4,5,6,7,8,9,10] # labels for y-axis

sns.set(rc = {'figure.figsize':(8,4)})
sn.heatmap(confusion_matrix(Y_test, Y_prediction), 
           annot=True,fmt='g', cmap='Greens', cbar=False,
          xticklabels=x_axis_labels, yticklabels=y_axis_labels)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
#plt.savefig(r'M:\THESIS_IPT\REPORT\images\nn_10.png',bbox_inches='tight',dpi=1000)

# XGBoost classifier, Random forest classifier, Support vector machine

In [None]:
load_data = pd.read_csv(r'M:\THESIS_IPT\MRIDUL\data_overlapping_windows\points1000_20ms\Combined_final_ALL_noOutliers0_10.csv')

In [None]:
# dropping some zero values to balance the dataset
zero_index = []
last = load_data.columns.get_loc("avg_peak")
for i in range(load_data.shape[0]):
    if load_data.iloc[i,last]==0:
        zero_index.append(i)
        
rand_zero_index = random.sample(zero_index,12000)
load_data.drop(load_data.index[rand_zero_index], axis=0, inplace=True)

load_data['avg_peak'].value_counts()

In [None]:
# renaming the target features such that it starts with 0

load_data['avg_peak'] = load_data['avg_peak'].replace(1,0)
load_data['avg_peak'] = load_data['avg_peak'].replace(2,1)
load_data['avg_peak'] = load_data['avg_peak'].replace(3,2)
load_data['avg_peak'] = load_data['avg_peak'].replace(4,3)
load_data['avg_peak'] = load_data['avg_peak'].replace(5,4)
load_data['avg_peak'] = load_data['avg_peak'].replace(6,5)
load_data['avg_peak'] = load_data['avg_peak'].replace(7,6)
load_data['avg_peak'] = load_data['avg_peak'].replace(8,7)
load_data['avg_peak'] = load_data['avg_peak'].replace(9,8)
load_data['avg_peak'] = load_data['avg_peak'].replace(10,9)
load_data['avg_peak'].value_counts()

In [None]:
Y_nn = load_data['avg_peak']
X_nn = load_data.drop(['avg_peak'], axis=1)

# dividing data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X_nn, Y_nn, 
                                                    test_size = 0.2, 
                                                    random_state = 3,
                                                    shuffle = True)


# scaling the input to neural network
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# select the classifier

# i = 0 : XGBoost
# i = 1 : Random forest
# i = 2 : SVM

i = 0

In [None]:
if i==0:
    # XGBoost classifier
    model = XGBClassifier(objective='multi:softmax')
    start_time = timeit.default_timer()
    model.fit(X_train,Y_train)
    end_time = timeit.default_timer()
    print(end_time - start_time)
    y_pred = model.predict(X_test)
    
    
if i==1:
    # Random forest classifier
    model = RandomForestClassifier(class_weight='balanced_subsample')
    model.fit(X_train,Y_train)
    y_pred = model.predict(X_test)
    
    
if i==2:
    # SVM
    clf = svm.SVC(kernel='rbf', gamma=0.1, class_weight='balanced') # set Kernel: ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’
    clf.fit(X_train, Y_train)
    y_pred = clf.predict(X_test)

In [None]:
print("Accuracy:",metrics.accuracy_score(Y_test, y_pred))


print('classification report')
print(classification_report(Y_test, y_pred))


print('confusion matrix')
print(confusion_matrix(Y_test, y_pred))

In [None]:
# confusion matrix

x_axis_labels = [1,2,3,4,5,6,7,8,9,10] # labels for x-axis
y_axis_labels = [1,2,3,4,5,6,7,8,9,10] # labels for y-axis

sns.set(rc = {'figure.figsize':(8,4)})
sn.heatmap(confusion_matrix(Y_test, y_pred), 
           annot=True,fmt='g', cmap='Greens', cbar=False,
          xticklabels=x_axis_labels, yticklabels=y_axis_labels)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
#plt.savefig(r'M:\THESIS_IPT\REPORT\images\svm_10.png',bbox_inches='tight',dpi=1000)