In [26]:
#Import packages
import numpy as np
import pandas as pd

from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.neural_network import MLPClassifier

class DataPreprocessing():
    def __init__(self):
        pass

    def load_data(self):
        #Load normal operation data
        data = pd.read_excel("Data/Know_Normal_Operation.xlsx", header = 0).dropna()

        data = data.to_numpy()

        return data

    def split_data(self, data):
        #Split normal operation data
        x = data[:, :-1]
        y = data[:, -1]
        #Converts y to int
        y = y.astype(int)

        #Data preprocessing
        scaler = StandardScaler()
        x = scaler.fit_transform(x)

        #Create train and test datasets
        X_train, X_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.25,
                                                            shuffle=True)

        return X_train, X_test, y_train, y_test

    def split_data_fault(self, data):
        #Split actual data
        x = data[:, :-1]
        y = data[:, -1]
        #Converts y to int
        y = y.astype(int)

        #Data preprocessing
        scaler = StandardScaler()
        x = scaler.fit_transform(x)

        #Create train and test datasets
        X_train, X_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.25,
                                                            shuffle=True)

        return X_train, X_test, y_train, y_test
        
    def load_data_fault(self):
        #Load actual data
        data = pd.read_excel("Data/RawData.xlsx", header = 0).dropna()

        data = data.to_numpy()

        return data


preprocessor = DataPreprocessing()
knowndata = preprocessor.load_data()

X_train, X_test, y_train, y_test = preprocessor.split_data(knowndata) #Splits for normal operation

clf = svm.SVC(kernel='linear', C=1.0) #SVM model
clf.fit(X_train, y_train)

faultdata = preprocessor.load_data_fault()
X_trainf, X_testf, y_trainf, y_testf = preprocessor.split_data_fault(faultdata) #Splits for actual data

y_pred = clf.predict(X_testf)

score = accuracy_score(y_testf,y_pred)

print("SVM Accuracy =",score)

classifier=MLPClassifier(hidden_layer_sizes=(150,),max_iter=300, activation='relu',solver='lbfgs',random_state=1) 
#First set of hyperparameters tested

classifier.fit(X_train, y_train) #ANN model

y_pred=classifier.predict(X_trainf)

score2 = accuracy_score(y_trainf,y_pred)
print("ANN Accuracy =",score2)





SVM Accuracy = 0.24048096192384769
ANN Accuracy = 0.22794117647058823


In [27]:
classifier=MLPClassifier(hidden_layer_sizes=(150,),max_iter=300, activation='identity',solver='lbfgs',random_state=1) 
#Second set of hyperparameters tested, identity activation

classifier.fit(X_train, y_train) #ANN model

y_pred=classifier.predict(X_trainf)

score2 = accuracy_score(y_trainf,y_pred)
print("ANN Accuracy =",score2)

ANN Accuracy = 0.20788770053475936


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [28]:
classifier=MLPClassifier(hidden_layer_sizes=(150,),max_iter=300, activation='logistic',solver='lbfgs',random_state=1) 
#Third set of hyperparameters tested, logistic activation

classifier.fit(X_train, y_train) #ANN model

y_pred=classifier.predict(X_trainf)

score2 = accuracy_score(y_trainf,y_pred)
print("ANN Accuracy =",score2)

ANN Accuracy = 0.20989304812834225


In [29]:
classifier=MLPClassifier(hidden_layer_sizes=(100,),max_iter=300, activation='tanh',solver='lbfgs',random_state=1) 
#Fourth set of hyperparameters tested, tanh activation

classifier.fit(X_train, y_train) #ANN model

y_pred=classifier.predict(X_trainf)

score2 = accuracy_score(y_trainf,y_pred)
print("ANN Accuracy =",score2)

ANN Accuracy = 0.21657754010695188


In [30]:
classifier=MLPClassifier(hidden_layer_sizes=(100,),max_iter=300, activation='relu',solver='lbfgs',random_state=1) 
#Fifth set of hyperparameters tested, varying hidden layer sizes to 100

classifier.fit(X_train, y_train) #ANN model

y_pred=classifier.predict(X_trainf)

score2 = accuracy_score(y_trainf,y_pred)
print("ANN Accuracy =",score2)

ANN Accuracy = 0.25


In [31]:
classifier=MLPClassifier(hidden_layer_sizes=(200,),max_iter=300, activation='relu',solver='lbfgs',random_state=1) 
#Sixth set of hyperparameters tested, varying hidden layer sizes to 200

classifier.fit(X_train, y_train) #ANN model

y_pred=classifier.predict(X_trainf)

score2 = accuracy_score(y_trainf,y_pred)
print("ANN Accuracy =",score2)

ANN Accuracy = 0.24398395721925134
