# Classifiers Training

In [None]:
import numpy as np
import pandas as pd
import pickle
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import recall_score, f1_score, precision_score

The *input_id* corresponds to the dataset used in this analysis. (See *Input* directory for more information)

In [None]:
input_id = 1

### Load Reduced Features

In [None]:
with open('Kernel/reduced_features_{}.pkl'.format(input_id), 'rb') as f:
    reduced_features = pickle.load(f)

### Load GA_parameters

In [None]:
with open('Kernel/GA_parameters_{}.pkl'.format(input_id), 'rb') as f:
    GA_parameters = pickle.load(f)

### Load Original Target

In [None]:
with open('Kernel/final_target_{}.pkl'.format(input_id), 'rb') as f:
    target = pickle.load(f)

### Classifiers Train

In [None]:
# Initialize Classifiers

classifiers = [
        KNeighborsClassifier(3),
        SVC(gamma='scale'),
        SVC(gamma=2, C=1),
        GaussianProcessClassifier(1.0 * RBF(1.0)),
        DecisionTreeClassifier(),
        RandomForestClassifier(n_estimators=100),
        MLPClassifier(alpha=1,max_iter=500),
        AdaBoostClassifier(),
        GaussianNB(),
        QuadraticDiscriminantAnalysis()]

classifiers_names = [
        'Nearest Neighbors',
        'Linear SVM',
        'RBF SVM',
        'Gaussian Process',
        'Decision Tree',
        'Random Forest',
        'Neural Net',
        'AdaBoost',
        'Naive Bayes',
        'QDA'
        ]

In [None]:
results_dict = {}

for gra in GA_parameters:
    print('-------------------------------------')
    print(gra)

    results_dict[gra] = {
        'Nearest Neighbors':{},
        'Linear SVM':{},
        'RBF SVM':{},
        'Gaussian Process':{},
        'Decision Tree':{},
        'Random Forest':{},
        'Neural Net':{},
        'AdaBoost':{},
        'Naive Bayes':{},
        'QDA':{}
        }

    # preprocess dataset, split into training and test part
    Accuracy = np.zeros((len(classifiers)))
    Precision = np.zeros((len(classifiers)))
    Recall = np.zeros((len(classifiers)))
    F1 = np.zeros((len(classifiers)))

    y_train = GA_parameters[gra]
        
    # iterate over classifiers

    for name, clf in zip(results_dict[gra], classifiers):
        results_dict[gra][name] = {
                'metrics':{
                    'Accuracy': 0,
                    'Precision':0,
                    'Recall':0,
                    'F1':0
                },
                'predict':0
            }
        try:
            clf.fit(reduced_features, y_train)
            score = clf.score(reduced_features, target)
            results_dict[gra][name]['predict'] = y_predict = list(clf.predict(reduced_features))
            results_dict[gra][name]['metrics']['Accuracy'] = score
            results_dict[gra][name]['metrics']['Precision'] = precision_score(target, y_predict,zero_division=0)
            results_dict[gra][name]['metrics']['Recall'] = recall_score(target, y_predict,zero_division=0)
            results_dict[gra][name]['metrics']['F1'] = f1_score(target, y_predict,zero_division=0)
            
        except:
            results_dict[gra]= 'Error: Only one class'
            break

In [None]:
df = pd.DataFrame()

for gra in results_dict.keys():
    if type(results_dict[gra]) != str:
        n_classifiers = len(results_dict[gra].keys())
        for clf in results_dict[gra].keys():
            aux_dict = results_dict[gra][clf]['metrics']
            aux_dict['Granularity'] = int(gra.split('_')[-1])
            aux_dict['Classifier'] = clf
            df = df.append(aux_dict, ignore_index=True)

df = df.set_index(['Classifier', 'Granularity'])*100


In [None]:
gra_list = [int(x[-1]) for x in GA_parameters.keys()]

In [None]:
df

In [None]:
df.to_csv('Classification/results__{}__min_gra_{}__max_gra__{}.csv'.format(input_id, min(gra_list), max(gra_list)))

### Chose Granularity and Classifier Model for Online Stage

In [None]:
Granularity = 4
Model = 'Neural Net'

In [None]:
X = reduced_features
y = GA_parameters['granularity_{}'.format(Granularity)]

In [None]:
idx = classifiers_names.index(Model)

In [None]:
clf = classifiers[idx]

clf.fit(X, y)

In [None]:
with open('Kernel/Classifier_{}.pkl'.format(input_id), 'wb') as f:
    pickle.dump(clf, f)

In [12]:
def foo():
    try:
        return a
    except:
        raise Exception('Model not fitted!')

In [13]:
b = foo()

Exception: Model not fitted

In [7]:
b