### IMPORT LIBRARIES

In [1]:
import file_reader as fr 
import numpy as np 
import time 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 
from sklearn import metrics 

### LOAD THE DATASET

use a scaler to transform your data so it can be easily used by all the algorithms.

In [4]:
training_set_file = '/Users/umakantmanore/Desktop/amu/Dev_Enviroment2023/test_env/Optical_networks/dataset/balanced-20372.csv'
testing_set_file = '/Users/umakantmanore/Desktop/amu/Dev_Enviroment2023/test_env/Optical_networks/dataset/testset-2351.csv'

In [5]:
X_train, y_train = fr.FileReader.read_array_three_class(fr.FileReader(), training_set_file)

X_test, y_test = fr.FileReader.read_array_three_class(fr.FileReader(), testing_set_file)

Class A samples : 5093 
Class B samples : 5092 
Class C samples : 5093 
Class D samples : 5093
Class A samples : 864 
Class B samples : 378 
Class C samples : 499 
Class D samples : 609


In [6]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

### PARAMETERS

In [8]:
names = ["Nearest Neighbors", "Logistic Regression", "Decision Tree", "Neural Network"]
ensemble_name = ["Random Forest", "AdaBoost", "Bagging"]

parameters = {
    "Nearest Neighbors": {"n_neighbors" : [1]},
    "Logistic Regression" : {"solver":["lbfgs"], "multi_class" : ["multinomial"], "random_state": [1]},
    "Decision Tree" : {'max_depth': [5]}
}

ensemble_parameters = {
    "Random Forest": {"max_dept":[5], "n_estimators":[10], "max_features":[1]},
    "AdaBoost": {'n_estimator': [10]},
    "Bagging": {'n_estimator':[100], 'max_samples':[0.8], 'max_features':[0.8]}
}

classifiers = [
    KNeighborsClassifier(1),
    LogisticRegression(),
    DecisionTreeClassifier()
]

ensemble_classifier = [
    RandomForestClassifier(),
    AdaBoostClassifier(),
    BaggingClassifier()
]

In [9]:
y_train2 = np.argmax(y_train, axis = 1)
y_test2 = np.argmax(y_test, axis = 1)

y2_clfs = ["Logistic Regression", "Decision Tree", "AdaBoost", "Bagging"]

classifier_stats = {}

### FITTING THE DATA INTO MODELS

In [10]:
for name, clf in zip(names, classifiers):
    print(f"Running execution for classifier: {name}")
    clf_grid = GridSearchCV(clf, parameters[name], n_jobs=10, cv=5)

    if name in y2_clfs:
        ts = time.time()
        clf_grid.fit(X_train, y_train2)
        new_ts = time.time()
        total_time = new_ts - ts 
        score = clf_grid.score(X_test, y_test2)
        y_pred = clf_grid.predict(X_test)
        f1_score = metrics.f1_score(y_test2, y_pred, average='micro')
    else:
        ts = time.time()
        clf_grid.fit(X_train, y_train)
        new_ts = time.time()
        total_time = new_ts - ts 
        score = clf_grid.score(X_test, y_test)
        y_pred = clf_grid.predict(X_test)
        f1_score = metrics.f1_score(y_test, y_pred, average='micro')
    classifier_stats[name] = (score, f1_score, total_time)

print("\n\n")

for clfs in classifier_stats:
    (score, f1_score, total_time) = classifier_stats[clfs]
    print(f"Classifier: {clfs} \nF1_Score: {f1_score} \nExecution Time: {total_time}")
    print("\n")

Running execution for classifier: Nearest Neighbors
Running execution for classifier: Logistic Regression




Running execution for classifier: Decision Tree



Classifier: Nearest Neighbors 
F1_Score: 0.3485106382978723 
Execution Time: 7.271537780761719


Classifier: Logistic Regression 
F1_Score: 0.3872340425531915 
Execution Time: 4.914545059204102


Classifier: Decision Tree 
F1_Score: 0.4651063829787234 
Execution Time: 1.2636759281158447


