# Loading of Datasets

In [4]:
from utils import *
import pandas as pd
import numpy as np

# Getting all the processed data
combined_df = load_from_pickle("unprocessed_data") # Original and untouched

clustered_data_load= load_from_pickle("direct_clustered_data") # Clustering performed without applying PCA first
clustered_data_1= clustered_data_load["cir_data_pca"]

clustered_data_load2 = load_from_pickle("pca_clustered_data") # Clustering performed but WITH PCA applied first
clustered_data_2 = clustered_data_load2["cir_data_pca"]

dbscan_data_1 = load_from_pickle("8_features_DBSCAN_data") # DBSCAN with Method 1: Drop all CIR columns, then PCA
dbscan_data_2 = load_from_pickle("direct_PCA_DBSCAN_data")  # DBSCAN with Method 2: Apply PCA directly
dbscan_data_3 = load_from_pickle("unscaled_DBSCAN_data") # DBSCAN with Method 3: DF Without PCA

dataset_1 = load_from_pickle("dataset_1")
dataset_2 = load_from_pickle("dataset_2")
dataset_3 = load_from_pickle("dataset_3")
dataset_4 = load_from_pickle("dataset_4")

# Multi-Layer Perceptron (MLP)

## Dataset 4

Setting up the training and testing dataset with a 70:30 split

In [5]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler

# Select the features to be used for Support Vector Classification
X = dataset_4.drop(columns = 'NLOS')
Y = dataset_4[['NLOS']].to_numpy()
Y = Y.reshape(-1)

# Standardizing the data
scaler = StandardScaler()
x_scaled = scaler.fit_transform(X)

# Split dataset into 70% training and 30% test
x_train, x_test, y_train, y_test = train_test_split(x_scaled, Y, test_size = 0.3, random_state = 12)

### Hyperparameter Tuning for MLPClassifier

The hyperparameter tuning helps to test all the various combinations that we have defined within the parameter space and provides the parameters that provides the most optimal results.

In [6]:
mlp = MLPClassifier(max_iter = 1000)

parameter_space = {
    'hidden_layer_sizes': [(50), (50, 50), (50, 50, 50), (50, 100, 50), (100, 100, 100), (50, 50 ,50), (50, 50, 50, 50), (100, 100, 100, 100), (100, 100), (100)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant', 'adaptive']
}

In [7]:
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(mlp, parameter_space, n_jobs = 1, cv = 3)
clf.fit(x_train, y_train)



In [None]:
print("Best Parameters Found: \n", clf.best_params_)

In [None]:
# Displays the mean, sd and paremeters of the training scores
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

In [None]:
clf = MLPClassifier(activation = 'relu', solver = 'adam', hidden_layer_sizes = (50), random_state = 12, max_iter = 1000)

clf.fit(x_train, y_train)

y_train_pred = clf.predict(x_train)
y_test_pred = clf.predict(x_test)


save_to_pickle(f'{MODEL_FOLDER}/mlp_optimised.pkl', clf, complete_path=False)

In [None]:
print("Training Accuracy: %.4f" % accuracy_score(y_train, y_train_pred))
print("Testing Accuracy: %.4f" % accuracy_score(y_test, y_test_pred))

### Classification Metrics of the Training Dataset

In [None]:
classifier_metrics(list(y_train), y_train_pred, print_results = True)

### Classification Metrics of the Testing Dataset

In [None]:
classifier_metrics(list(y_test), y_test_pred, print_results = True)

### Classification Report of the Testing Dataset

In [None]:
y_test_pred = clf.predict(x_test)

print(classification_report(y_test, y_test_pred))

### Confusion Matrix of Training Dataset

In [None]:
import matplotlib.pyplot as plt

predictions = clf.predict(x_train)
cm = confusion_matrix(y_train, predictions, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = clf.classes_)
disp.plot()
plt.show()

### Confusion Matrix of Testing Dataset

In [None]:
predictions = clf.predict(x_test)
cm = confusion_matrix(y_test, predictions, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = clf.classes_)
disp.plot()
plt.show()

### Loss Curve of MLP Classifier

In [None]:
plt.plot(clf.loss_curve_)
plt.title("Loss Curve")
plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.show()