# Scopo del notebook di Testing

Lo scopo di questa parte è misurare le performance del modello supervisionato su dati di test non utilizzati durante il suo addestramento, oppure esplorare come il modello non supervisionato si comporta sul dataset di training.

# Settings

In [None]:
# IMPORTAZIONE LIBRERIE
import pandas as pd
import pickle
import utils

# Dataset Loading

In [None]:
X = pd.read_pickle('../Data/Prepared/test_prep.pkl.zip', compression='zip')
X.head()

In [None]:
Y = X.pop('Target')
Y.head()

# Dummy Classifier

## Model loading

In [None]:
with open('../Models/dummy.pkl', 'rb') as file:
    model_dummy = pickle.load(file)
model_dummy

# Predizione sui valori di test
y_pred_DUMMY = model_dummy.predict(X)

## Model Performances on Test set

In [None]:
# Performances
acc_DUMMY, prec_DUMMY, rec_DUMMY, f1_DUMMY, best_est_DUMMY, best_score_DUMMY, best_para_DUMMY, roc_DUMMY, cm_DUMMY  = utils.performance(Y, y_pred_DUMMY, model_dummy, True)

# Logistic regression

## Model loading

In [None]:
with open('../Models/log_reg.pkl', 'rb') as file:
    model_log_reg = pickle.load(file)
model_log_reg

# Predizione sui valori di test
y_pred_LOG_REG = model_log_reg.predict(X)

## Model Performances on Test set

In [None]:
acc_LOG_REG, prec_LOG_REG, rec_LOG_REG, f1_LOG_REG, best_est_LOG_REG, best_score_LOG_REG, best_para_LOG_REG, roc_LOG_REG, cm_LOG_REG  = utils.performance(Y, y_pred_LOG_REG, model_log_reg, False)

# Linear Discriminant Analysis

## Model loading

In [None]:
with open('../Models/linear_discriminant_analysis.pkl', 'rb') as file:
    model_linear_discriminant_analysis = pickle.load(file)
model_linear_discriminant_analysis

# Predizione sui valori di test
y_pred_LIN_DISC = model_linear_discriminant_analysis.predict(X)

## Model Performances on Test set

In [None]:
acc_LIN_DISC, prec_LIN_DISC, rec_LIN_DISC, f1_LIN_DISC, best_est_LIN_DISC, best_score_LIN_DISC, best_para_LIN_DISC, roc_LIN_DISC, cm_LIN_DISC  = utils.performance(Y, y_pred_LIN_DISC, model_linear_discriminant_analysis, False)

# K-Nearest Neighbors

## Model loading

In [None]:
with open('../Models/knn.pkl', 'rb') as file:
    model_knn = pickle.load(file)
model_knn

# Predizione sui valori di test
y_pred_KNN = model_knn.predict(X)

## Model Performances on Test set

In [None]:
acc_KNN, prec_KNN, rec_KNN, f1_KNN, best_est_KNN, best_score_KNN, best_para_KNN, roc_KNN, cm_KNN  = utils.performance(Y, y_pred_KNN, model_knn, False)

# Decision Tree

## Model loading

In [None]:
with open('../Models/decision_tree.pkl', 'rb') as file:
    model_decision_tree = pickle.load(file)
model_decision_tree

# Predizione sui valori di test
y_pred_DEC_TREE = model_decision_tree.predict(X)

## Model Performances on Test set

In [None]:
acc_DEC_TREE, prec_DEC_TREE, rec_DEC_TREE, f1_DEC_TREE, best_est_DEC_TREE, best_score_DEC_TREE, best_para_DEC_TREE, roc_DEC_TREE, cm_DEC_TREE  = utils.performance(Y, y_pred_DEC_TREE, model_decision_tree, False)

# Random Forest

## Model loading

In [None]:
with open('../Models/random_forest.pkl', 'rb') as file:
    model_random_forest = pickle.load(file)
model_random_forest

# Predizione sui valori di test
y_pred_RAND_FOR = model_random_forest.predict(X)

## Model Performances on Test set

In [None]:
acc_RAND_FOR, prec_RAND_FOR, rec_RAND_FOR, f1_RAND_FOR, best_est_RAND_FOR, best_score_RAND_FOR, best_para_RAND_FOR, roc_RAND_FOR, cm_RAND_FOR  = utils.performance(Y, y_pred_RAND_FOR, model_random_forest, False)

# Naive Bayes

## Model loading

In [None]:
with open('../Models/naive_bayes.pkl', 'rb') as file:
    model_naive_bayes = pickle.load(file)
model_naive_bayes

# Predizione sui valori di test
y_pred_NB = model_naive_bayes.predict(X)

## Model Performances on Test set

In [None]:
acc_NB, prec_NB, rec_NB, f1_NB, best_est_NB, best_score_NB, best_para_NB, roc_NB, cm_NB  = utils.performance(Y, y_pred_NB, model_naive_bayes, False)

# Support Vector Machine

## Model loading

In [None]:
with open('../Models/support_vector_machine.pkl', 'rb') as file:
    model_support_vector_machine = pickle.load(file)
model_support_vector_machine

# Predizione sui valori di test
y_pred_SVM = model_support_vector_machine.predict(X)

## Model Performances on Test set

In [None]:
acc_SVM, prec_SVM, rec_SVM, f1_SVM, best_est_SVM, best_score_SVM, best_para_SVM, roc_SVM, cm_SVM  = utils.performance(Y, y_pred_SVM, model_support_vector_machine, False)

# Summary delle Confusion matrix

In [None]:
models_name = ['Dummy Classifier',
               'Logistic Regression',
               'Linear Discriminant Analysis',
               'KNN',
               'Decision Tree',
               'Random Forest',
               'Naive Bayes',
               'Support Vector Machine']

pred_list = [y_pred_DUMMY,
             y_pred_LOG_REG,
             y_pred_LIN_DISC,
             y_pred_KNN,
             y_pred_DEC_TREE,
             y_pred_RAND_FOR,
             y_pred_NB,
             y_pred_SVM]

# Confusion Matrix per tutti i modelli

cm_list = [cm_DUMMY, cm_LOG_REG, cm_LIN_DISC, cm_KNN, cm_DEC_TREE, cm_RAND_FOR, cm_NB, cm_SVM]
utils.plot_conf_matrix(models_name, cm_list, nrows=2, ncols=4, fig_a=10, fig_b=8)

# Summary delle ROC curves

In [None]:
# ROC curves per tutti i modelli

utils.roc_auc_curve(models_name, pred_list, Y)