In [None]:
import os
import sys
sys.path.append("../")
from utils import settings
from utils.file_managers import get_json_content
from utils.customized_utils import get_top_chart, get_feature_elimination_chart,\
                                   confusion_matrix, get_rfe_comparison_chart,\
                                   get_accuracy_distribution, get_friedman_chart,\
                                   get_jaccard_matrix, get_representative_slices, \
                                   execute_test_5x2_ftest, get_jaccard_matrix_chb_vs_siena

In [None]:
database = "chb-mit"
feature_set = 1

settings = settings[database]
stats_directory = settings["stats"].format(feature_set)
base_directory = os.path.join(stats_directory, "..")
features_map_file = settings["features_map_file"].format(feature_set)
classes = ["ictal", "no_ictal"]

In [None]:
get_representative_slices(base_directory,
                          ["ann", "svm", "tree", "forest", "knn"],
                          ["tree", "svm", "lime", "shap", "embeddedrandomforest", "reciprocalranking"],
                         start=450, end=50, num_slices=4)

In [None]:
confusion_matrix(base_directory, classes, 100, "knn", "tree")

In [None]:
fs_model = "reciprocalranking"
ml_model = "ann"
num_features = 5
image_name = "../../Images/features_fs_{}_db_{}_fsm_{}_ml_{}.png".format(feature_set, database,
                                                                         fs_model, ml_model)

print(f"Valor asignado a los {num_features} atributos más relevantes, se usa {fs_model}\n"\
"como seleccionador de características.")
features_map = get_json_content(features_map_file)
get_top_chart(base_directory, features_map, num_features, ml_model, fs_model, image_name)

In [None]:
fs_model = "reciprocalranking"
classifier = "knn"
image_name = "../../Images/RFE_fs_{}_db_{}_fsm_{}_ml_{}.png".format(feature_set, database,
                                                                    fs_model, classifier)

print(f"Desempeño al usar los X atributos más importantes. Se usa {fs_model} como clasificador\n"
f"y {classifier} como seleccionador de características.")
get_feature_elimination_chart(base_directory, classifier, fs_model, image_name)

In [None]:
classifier = ["tree", "svm", "ann", "forest", "knn"]
image_name = "../../Images/AccuracyOverFeatures_fs_{}_db_{}.png".format(feature_set, database)

print("Desempeño para diferente cantidad de atributos y métodos de selección de características.\n"\
f"Se usa {classifier} como clasificador.")
get_rfe_comparison_chart(base_directory, classifier,
                         ["tree", "svm", "lime", "shap", "embeddedrandomforest", "reciprocalranking"],
                         image_name)

In [None]:
features_slices = [500, 150, 100, 50]
image_name = "../../Images/AccuracyComp_fs_{}_db_{}.png".format(feature_set, database)

print("Precisión obtenida mediante las diversas combinaciones de modelos clasificadores\n"\
f"y seleccionadores de características. Se usa el top {features_slices} de atributos.")
get_accuracy_distribution(base_directory, features_slices, save=image_name)

In [None]:
features_slices = [500, 150, 100, 50]
image_name = "../../Images/FriedmanChart_fs_{}_db_{}.png".format(feature_set, database)

print(f"Comparación entre modelos de selección de características; se muestra el ranking de Friedman,\n"
"que es el ranking (de acuerdo al performance) promedio del modelo seleccionador\n"\
f"al usar diversos clasificadores y los {features_slices} atributos más importantes. En el\n"\
"otro eje se muestra la desviación estándar del ranking.")
get_friedman_chart(base_directory, features_slices, save=image_name)

In [None]:
features_slices = [500, 25, 12, 6]
_model = "tree"
image_name = "../../Images/FriedmanChart{}_fs_{}_db_{}.png".format(_model, feature_set, database)

print(f"Comparación de los {features_slices} atributos de mayor importancia de acuerdo con cada\n"\
"modelo seleccionador de características. La similitud se calcula mediante el índice de Jaccard.")
features_map = get_json_content(features_map_file)
get_jaccard_matrix(base_directory, features_map, features_slices, _model,
                   ["tree", "svm", "lime", "shap", "embeddedrandomforest",
                    "reciprocalranking"], save=image_name)

In [None]:
features_slices = [500, 25, 12, 6]
_model = "knn"
image_name = "../../Images/JaccardIndex{}_fs_{}.png".format(_model, feature_set)
chbmit_path = "../../mldata/MLDataChbmit_fs{}/".format(feature_set)
siena_path = "../../mldata/MLDataSiena_fs1{}/".format(feature_set)

print(f"Comparación de los {features_slices} atributos de mayor importancia de acuerdo con cada\n"\
"modelo seleccionador de características. La similitud se calcula mediante el índice de Jaccard.")
features_map = get_json_content(features_map_file)
get_jaccard_matrix_chb_vs_siena(chbmit_path, siena_path, features_map, features_slices, _model,
                               ["tree", "svm", "lime", "shap", "embeddedrandomforest",
                                "reciprocalranking"], save=image_name)

In [None]:
classification_method = "knn"
best_fsm = "embeddedrandomforest"
num_features = "6"
compare_with = "reciprocalranking"

execute_test_5x2_ftest(base_directory, classification_method, best_fsm, compare_with, num_features)