In [1]:
# numerical computation
import numpy as np
import itertools
from random import randint
# import sklearn

from sklearn.tree import DecisionTreeClassifier

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import hamming_loss
from sklearn.model_selection import GridSearchCV

# dataframe management
import pandas as pd

from sklearn.exceptions import DataConversionWarning
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=DataConversionWarning)
from IPython.display import clear_output


%autosave 60


Autosaving every 60 seconds


## Funciones a utilizar

In [2]:
def opened (path=''):
    
    X_training=[]
    X_testing=[]
    y_training=[]
    y_testing=[]
           
    for j in range(0, 50):
        X_training.append(pd.read_csv('test_train_dataset{}{}_X_train.csv'.format(path,j)))
        X_testing.append(pd.read_csv('test_train_dataset{}{}_X_test.csv'.format(path, j)))
        y_training.append(pd.read_csv('test_train_dataset{}{}_y_train.csv'.format(path, j)))
        y_testing.append(pd.read_csv('test_train_dataset{}{}_y_test.csv'.format(path, j)))
        
    return X_training, X_testing, y_training, y_testing

In [3]:
def frequency (valor):
    max = 0
    res = list(valor)[0] 
    for i in list(valor): 
        freq = list(valor).count(i) 
        if freq > max: 
            max = freq 
            res = i 
    valor = res
    return valor

In [4]:
def maximun (df, name):
    maximun = df.sort_values(by='accuracy_model',ascending=False).head(n=1)
    best = list(maximun[name])[0]
    return best

#  Aplicación del algoritmo DecisionTree

In [5]:
def hyper_DT(path, features, name, multiclass=False):
    
    x_train, x_test, y_train, y_test = opened(path=path)
    
    print('Terminada la apertura de BBDD')
    
    maxi_depth = 204
    turn_depth = [i for i in range(5,maxi_depth)]
    
    maxi_samples = x_train[0].shape[0]
    turn_min_samples_split=[i for i in range(5,maxi_samples)]

    param_grid = [
            {
                'max_depth':turn_depth,
                'min_samples_split': turn_min_samples_split
            }
           ]

    DT_evaluate=[]
    DT_acc_model=[]
    DT_std=[]


    mean=[]
    std=[]
    best_depth=[]
    best_samples_split=[]


    for j in range(0, 50):

        droping=pd.concat([x_train[j][features], y_train[j]], axis=1,sort=False)
        droping=droping.drop_duplicates(subset=features, keep=False)
        xtrain= droping[features]
        if multiclass==True:
            ytrain=droping['CRG']
        else:
            ytrain=droping[['HP', 'Diabetes', 'Otros']]

    #Normalizamos x_test y x_train con la misma media y variancia que x_train
        ss=StandardScaler()
        ss.fit(xtrain)
        ss_train=ss.transform(xtrain)

    #Buscamos los mejores parametros para esa división normalizada    
        clf = GridSearchCV(DecisionTreeClassifier(criterion='entropy'),
                           param_grid, cv=KFold(n_splits=5), scoring='accuracy',n_jobs=-1)

        if multiclass==True:
            y_training = ytrain.values.ravel()
        else:
            y_training = ytrain
        clf.fit(ss_train,y_training)

    #Evaluamos el algortimo teniendo en cuenta que para la función GridSearchCV test es nuestro train
        best_index_Acc = np.nonzero(clf.cv_results_['rank_test_score'] == 1)[0][0]
        best_depth.append(clf.best_params_['max_depth'])
        best_samples_split.append(clf.best_params_['min_samples_split'])

        DT_acc_model.append(clf.cv_results_['mean_test_score'][best_index_Acc])
        DT_std.append(clf.cv_results_['std_test_score'][best_index_Acc])

        DT_evaluate.append([best_depth[j], best_samples_split[j],
                            round(DT_acc_model[j],3), round(DT_std[j],3)])
        print('Particion: ', j)
    
    labels_comp = ['max_depth','min_samples_split',
                   'accuracy_model', 'std']
    
    comparacion=pd.DataFrame(data=DT_evaluate, columns = labels_comp)
    
    comparacion.to_csv('results/DT/DT_hyper_{}.csv'.format(name), index=False)
    

In [6]:
def predict_DT(path, features, name, multiclass=False):   
    
    x_train, x_test, y_train, y_test = opened(path=path)
    
    print('Terminada la apertura de BBDD')
    
    comparacion=pd.read_csv('results/DT/DT_hyper_{}.csv'.format(name))
    
    Depth = maximun(comparacion, 'max_depth')
    print('Max Depth:', Depth)
    Split = maximun(comparacion, 'min_samples_split')
    print('Min Samples Split: ', Split)
    
    accuracy=[]
    hamming_losse=[]
    precision_macro=[]
    precision_micro=[]
    recall_macro=[]
    recall_micro=[]
    f1_scores_macro=[]
    f1_scores_micro=[]
    
    average_accuracy=[]
    average_precision=[]
    average_recall=[]
    f1_scores=[]
    
    for i in range(0,50):

        droping_train=pd.concat([x_train[i][features], y_train[i]], axis=1,sort=False)
        droping_train=droping_train.drop_duplicates(subset=features, keep=False)
        xtrain= droping_train[features]
        if multiclass==True:
            ytrain=droping_train['CRG']
        else:
            ytrain=droping_train[['HP', 'Diabetes', 'Otros']]

        droping_test=pd.concat([x_test[i][features], y_test[i]], axis=1,sort=False)
        droping=droping_test.drop_duplicates(subset=features, keep=False)
        xtest= droping_test[features]
        if multiclass==True:
            ytest=droping_test['CRG']
        else:
            ytest=droping_test[['HP', 'Diabetes', 'Otros']]
                
        ss=StandardScaler()
        ss.fit(xtrain)
        ss_train=ss.transform(xtrain)
        ss_test=ss.transform(xtest)

        clf= DecisionTreeClassifier(criterion='entropy',max_depth=Depth, 
                                    min_samples_split=Split)

        if multiclass==True:
            y_training = ytrain.values.ravel()
        else:
            y_training = ytrain
        clf.fit(ss_train,y_training)
        
        y_true, y_pred = ytest, clf.predict(ss_test)
        if multiclass==False:
            accuracy.append(accuracy_score(y_true, y_pred))
            hamming_losse.append(hamming_loss(y_true, y_pred))
            precision_macro.append(precision_score(y_true,y_pred, average='macro'))
            precision_micro.append(precision_score(y_true,y_pred, average='micro'))
            recall_macro.append(recall_score(y_true, y_pred, average='macro'))
            recall_micro.append(recall_score(y_true, y_pred, average='micro'))
            f1_scores_macro.append(f1_score(y_true, y_pred, average='macro'))
            f1_scores_micro.append(f1_score(y_true, y_pred, average='micro'))
        else:
            cm = confusion_matrix(y_true,y_pred)
            TP = np.diag(cm)
            FP = np.sum(cm, axis=0) - TP
            FN = np.sum(cm, axis=1) - TP
            #num_classes = len(TP)
            #TN = []
            #for i in range(num_classes):
            #    temp = np.delete(cm, i, 0)    # delete ith row
            #    temp = np.delete(temp, i, 1)  # delete ith column
            #    TN.append(sum(sum(temp)))

            precision=TP / (TP + FP)
            recall=TP / (TP + FN)

            average_precision.append(np.mean(TP / (TP + FP)))
            average_recall.append(np.mean(TP / (TP + FN)))
            f1_scores.append(np.mean(2*(precision*recall)/(precision+recall)))
            average_accuracy.append(accuracy_score(y_true, y_pred))
        
    predict=pd.DataFrame()
    if multiclass==False:
        predict['accuracy']=accuracy
        predict['hamming_loss'] = hamming_losse
        predict['precision_macro']=precision_macro
        predict['precision_micro']=precision_micro
        predict['recall_macro']=recall_macro
        predict['recall_micro']=recall_micro
        predict['f1_macro']=f1_scores_macro
        predict['f1_micro']=f1_scores_micro
    else:
        predict['accuracy']=average_accuracy
        predict['precision']=average_precision
        predict['recall']=average_recall
        predict['f1']=f1_scores
    
    
    predict.to_csv('results/DT/DT_predict_{}.csv'.format(name), index=False)


## Bucles para las diferentes ejecuciones

In [7]:
boolean_class=[False, True, False, True]
import os.path as path

### Selección de Caracteriticas: Frecuencia

In [8]:
names = ['ocurrencia_all', 'ocurrencia_ill', 'presencia_all', 'presencia_ill'] 
features_freq = []
for n in names:
    with open("feature_selection/freq_{}.txt".format(n), "r") as file:
        features_freq.append(eval(file.readline()))

In [9]:
paths_CLASS = ['/class/O_WC_A_','/class/O_WC_WO_' , '/class/P_WC_A_', '/class/P_WC_WO_']
names_CLASS_fr=['freq_all_class_O', 'freq_ill_class_O', 'freq_all_class_P', 'freq_ill_class_P']

In [10]:
for p, n, f in zip(paths_CLASS, names_CLASS_fr, features_freq):
    if path.exists('results/DT/DT_hyper_{}.csv'.format(n)): 
        print('Ya existe el hyperparametro:', n)
    else:
        hyper_DT(p, f, n, True)
        print()
        print('--------------------------------------------------------')
        print()
    
    if path.exists('results/DT/DT_predict_{}.csv'.format(n)): 
        print('Ya existe los resultados:', n)
    else:
        predict_DT(p, f, n, True)
        print()
        print('--------------------------------------------------------')
        print()

Ya existe el hyperparametro: freq_all_class_O
Ya existe los resultados: freq_all_class_O
Ya existe el hyperparametro: freq_ill_class_O
Ya existe los resultados: freq_ill_class_O
Ya existe el hyperparametro: freq_all_class_P
Ya existe los resultados: freq_all_class_P
Ya existe el hyperparametro: freq_ill_class_P
Ya existe los resultados: freq_ill_class_P


In [11]:
paths_LABEL = ['/label/O_WL_A_','/label/O_WL_WO_' , '/label/P_WL_A_', '/label/P_WL_WO_']
names_LABEL_fr=['freq_all_label_O', 'freq_ill_label_O', 'freq_all_label_P', 'freq_ill_label_P']

In [12]:
for p, n, f in zip(paths_LABEL, names_LABEL_fr, features_freq):
    if path.exists('results/DT/DT_hyper_{}.csv'.format(n)): 
        print('Ya existe el hyperparametro:', n)
    else:
        hyper_DT(p, f, n)
        print()
        print('--------------------------------------------------------')
        print()
    
    if path.exists('results/DT/DT_predict_{}.csv'.format(n)): 
        print('Ya existe los resultados:', n)
    else:
        predict_DT(p, f, n)
        print()
        print('--------------------------------------------------------')
        print()

Ya existe el hyperparametro: freq_all_label_O
Ya existe los resultados: freq_all_label_O
Ya existe el hyperparametro: freq_ill_label_O
Ya existe los resultados: freq_ill_label_O
Ya existe el hyperparametro: freq_all_label_P
Ya existe los resultados: freq_all_label_P
Ya existe el hyperparametro: freq_ill_label_P
Ya existe los resultados: freq_ill_label_P


### Selección de Caracteriticas: Random Forest

In [13]:
names=['label_o_all','label_o_ill', 'label_p_all', 'label_p_ill']
features_rf_label = []
for n in names:
    with open("feature_selection/rf_{}.txt".format(n), "r") as file:
        features_rf_label.append(eval(file.readline()))

In [14]:
path_label= ['/label/O_WL_A_', '/label/O_WL_WO_', '/label/P_WL_A_', '/label/P_WL_WO_']
names_label_rf=['rf_all_label_O','rf_ill_label_O', 'rf_all_label_P', 'rf_ill_label_P']

In [15]:
for p, n, f in zip(path_label, names_label_rf, features_rf_label):
    if path.exists('results/DT/DT_hyper_{}.csv'.format(n)): 
        print('Ya existe el hyperparametro:', n)
    else:
        hyper_DT(p, f, n)
        print()
        print('--------------------------------------------------------')
        print()
    
    if path.exists('results/DT/DT_predict_{}.csv'.format(n)): 
        print('Ya existe los resultados:', n)
    else:
        predict_DT(p, f, n)
        print()
        print('--------------------------------------------------------')
        print()

Ya existe el hyperparametro: rf_all_label_O
Ya existe los resultados: rf_all_label_O
Ya existe el hyperparametro: rf_ill_label_O
Ya existe los resultados: rf_ill_label_O
Ya existe el hyperparametro: rf_all_label_P
Ya existe los resultados: rf_all_label_P
Ya existe el hyperparametro: rf_ill_label_P
Ya existe los resultados: rf_ill_label_P


In [16]:
names=['class_o_all','class_o_ill', 'class_p_all', 'class_p_ill']
features_rf_class = []
for n in names:
    with open("feature_selection/rf_{}.txt".format(n), "r") as file:
        features_rf_class.append(eval(file.readline()))

In [17]:
path_class= ['/class/O_WC_A_', '/class/O_WC_WO_', '/class/P_WC_A_', '/class/P_WC_WO_']
name_class_rf=['rf_all_class_O','rf_ill_class_O', 'rf_all_class_P', 'rf_ill_class_P']

In [18]:
for p, n, f in zip(path_class, name_class_rf, features_rf_class):
    if path.exists('results/DT/DT_hyper_{}.csv'.format(n)): 
        print('Ya existe el hyperparametro:', n)
    else:
        hyper_DT(p, f, n, multiclass=True)
        print()
        print('--------------------------------------------------------')
        print()
    
    if path.exists('results/DT/DT_predict_{}.csv'.format(n)): 
        print('Ya existe los resultados:', n)
    else:
        predict_DT(p, f, n, multiclass=True)
        print()
        print('--------------------------------------------------------')
        print()

Ya existe el hyperparametro: rf_all_class_O
Ya existe los resultados: rf_all_class_O
Ya existe el hyperparametro: rf_ill_class_O
Ya existe los resultados: rf_ill_class_O
Ya existe el hyperparametro: rf_all_class_P
Ya existe los resultados: rf_all_class_P
Ya existe el hyperparametro: rf_ill_class_P
Ya existe los resultados: rf_ill_class_P


### Selección de caracteristicas: F Fisher

In [19]:
names=['label_o_all','label_o_ill', 'label_p_all', 'label_p_ill']
features_fc_label = []
for n in names:
    with open("feature_selection/fc_{}.txt".format(n), "r") as file:
        features_fc_label.append(eval(file.readline()))

In [20]:
path_label= ['/label/O_WL_A_', '/label/O_WL_WO_', '/label/P_WL_A_', '/label/P_WL_WO_']
names_label_fc=['fc_all_label_O','fc_ill_label_O', 'fc_all_label_P', 'fc_ill_label_P']

In [21]:
for p, n, f in zip(path_label, names_label_fc, features_fc_label):
    if path.exists('results/DT/DT_hyper_{}.csv'.format(n)): 
        print('Ya existe el hyperparametro:', n)
    else:
        hyper_DT(p, f, n)
        print()
        print('--------------------------------------------------------')
        print()
    
    if path.exists('results/DT/DT_predict_{}.csv'.format(n)): 
        print('Ya existe los resultados:', n)
    else:
        predict_DT(p, f, n)
        print()
        print('--------------------------------------------------------')
        print()

Ya existe el hyperparametro: fc_all_label_O
Ya existe los resultados: fc_all_label_O
Ya existe el hyperparametro: fc_ill_label_O
Ya existe los resultados: fc_ill_label_O
Ya existe el hyperparametro: fc_all_label_P
Ya existe los resultados: fc_all_label_P
Ya existe el hyperparametro: fc_ill_label_P
Ya existe los resultados: fc_ill_label_P


In [22]:
names=['class_o_all','class_o_ill', 'class_p_all', 'class_p_ill']
features_rf_class = []
for n in names:
    with open("feature_selection/rf_{}.txt".format(n), "r") as file:
        features_rf_class.append(eval(file.readline()))

In [23]:
path_class= ['/class/O_WC_A_', '/class/O_WC_WO_', '/class/P_WC_A_', '/class/P_WC_WO_']
name_class_fc=['fc_all_class_O','fc_ill_class_O', 'fc_all_class_P', 'fc_ill_class_P']

In [24]:
for p, n, f in zip(path_class, name_class_fc, features_rf_class):
    if path.exists('results/DT/DT_hyper_{}.csv'.format(n)): 
        print('Ya existe el hyperparametro:', n)
    else:
        hyper_DT(p, f, n, multiclass=True)
        print()
        print('--------------------------------------------------------')
        print()
    
    if path.exists('results/DT/DT_predict_{}.csv'.format(n)): 
        print('Ya existe los resultados:', n)
    else:
        predict_DT(p, f, n, multiclass=True)
        print()
        print('--------------------------------------------------------')
        print()

Ya existe el hyperparametro: fc_all_class_O
Ya existe los resultados: fc_all_class_O
Ya existe el hyperparametro: fc_ill_class_O
Ya existe los resultados: fc_ill_class_O
Ya existe el hyperparametro: fc_all_class_P
Ya existe los resultados: fc_all_class_P
Ya existe el hyperparametro: fc_ill_class_P
Ya existe los resultados: fc_ill_class_P


### Resultados

In [25]:
def resultados_etiquetas(names):
    hyper_label=[]
    predict_label=[]
    for name in names:
        hyper_label.append(pd.read_csv('results/DT/DT_hyper_{}.csv'.format(name)))
        predict_label.append(pd.read_csv('results/DT/DT_predict_{}.csv'.format(name)))
    for i, n in zip(range(0, len(names)), names):
        print(n)
        print()
        Depth = (maximun(hyper_label[i], 'max_depth'))
        print('Max Depth:', Depth)
        Split = (maximun(hyper_label[i], 'min_samples_split'))
        print('Min Samples Split: ', Split)

        print('Tasa de acierto:', round(np.mean(predict_label[i]['accuracy']), 3), '+/-', round(np.std(predict_label[i]['accuracy']), 3))
        print('Tasa de Hamming Loss:', round(np.mean(predict_label[i]['hamming_loss']), 3), '+/-', round(np.std(predict_label[i]['hamming_loss']), 3))
        print('Tasa de precision(macro)', round(np.mean(predict_label[i]['precision_macro']), 3), '+/-', round(np.std(predict_label[i]['precision_macro']), 3))
        print('Tasa de precision(micro)', round(np.mean(predict_label[i]['precision_micro']), 3), '+/-', round(np.std(predict_label[i]['precision_micro']), 3))
        print('Tasa de exactitud(macro):', round(np.mean(predict_label[i]['recall_macro']), 3),  '+/-', round(np.std(predict_label[i]['recall_macro']), 3))
        print('Tasa de exactitud(micro):', round(np.mean(predict_label[i]['recall_micro']), 3),  '+/-', round(np.std(predict_label[i]['recall_micro']), 3))
        print('Tasa F1-Score(macro)', round(np.mean(predict_label[i]['f1_macro']), 3) , '+/-', round(np.std(predict_label[i]['f1_macro']),3))
        print('Tasa F1-Score(micro)', round(np.mean(predict_label[i]['f1_micro']), 3) , '+/-', round(np.std(predict_label[i]['f1_micro']),3))
        print('---------------------------------------------------------------')

In [26]:
resultados_etiquetas(names_LABEL_fr)

freq_all_label_O

Max Depth: 18
Min Samples Split:  25
Tasa de acierto: 0.816 +/- 0.014
Tasa de Hamming Loss: 0.071 +/- 0.005
Tasa de precision(macro) 0.892 +/- 0.012
Tasa de precision(micro) 0.938 +/- 0.008
Tasa de exactitud(macro): 0.847 +/- 0.018
Tasa de exactitud(micro): 0.907 +/- 0.01
Tasa F1-Score(macro) 0.867 +/- 0.011
Tasa F1-Score(micro) 0.922 +/- 0.006
---------------------------------------------------------------
freq_ill_label_O

Max Depth: 82
Min Samples Split:  25
Tasa de acierto: 0.798 +/- 0.019
Tasa de Hamming Loss: 0.079 +/- 0.008
Tasa de precision(macro) 0.901 +/- 0.017
Tasa de precision(micro) 0.944 +/- 0.011
Tasa de exactitud(macro): 0.857 +/- 0.018
Tasa de exactitud(micro): 0.92 +/- 0.011
Tasa F1-Score(macro) 0.876 +/- 0.012
Tasa F1-Score(micro) 0.932 +/- 0.007
---------------------------------------------------------------
freq_all_label_P

Max Depth: 14
Min Samples Split:  21
Tasa de acierto: 0.807 +/- 0.017
Tasa de Hamming Loss: 0.075 +/- 0.007
Tasa de precisio

In [27]:
resultados_etiquetas(names_label_fc)

fc_all_label_O

Max Depth: 32
Min Samples Split:  25
Tasa de acierto: 0.831 +/- 0.015
Tasa de Hamming Loss: 0.066 +/- 0.006
Tasa de precision(macro) 0.908 +/- 0.012
Tasa de precision(micro) 0.945 +/- 0.008
Tasa de exactitud(macro): 0.856 +/- 0.021
Tasa de exactitud(micro): 0.912 +/- 0.011
Tasa F1-Score(macro) 0.879 +/- 0.014
Tasa F1-Score(micro) 0.928 +/- 0.007
---------------------------------------------------------------
fc_ill_label_O

Max Depth: 30
Min Samples Split:  7
Tasa de acierto: 0.807 +/- 0.019
Tasa de Hamming Loss: 0.076 +/- 0.009
Tasa de precision(macro) 0.91 +/- 0.017
Tasa de precision(micro) 0.948 +/- 0.01
Tasa de exactitud(macro): 0.865 +/- 0.017
Tasa de exactitud(micro): 0.92 +/- 0.011
Tasa F1-Score(macro) 0.886 +/- 0.013
Tasa F1-Score(micro) 0.934 +/- 0.008
---------------------------------------------------------------
fc_all_label_P

Max Depth: 18
Min Samples Split:  13
Tasa de acierto: 0.835 +/- 0.013
Tasa de Hamming Loss: 0.063 +/- 0.005
Tasa de precision(macro)

In [28]:
resultados_etiquetas(names_label_rf)

rf_all_label_O

Max Depth: 74
Min Samples Split:  5
Tasa de acierto: 0.812 +/- 0.017
Tasa de Hamming Loss: 0.075 +/- 0.007
Tasa de precision(macro) 0.888 +/- 0.016
Tasa de precision(micro) 0.935 +/- 0.01
Tasa de exactitud(macro): 0.847 +/- 0.02
Tasa de exactitud(micro): 0.903 +/- 0.012
Tasa F1-Score(macro) 0.866 +/- 0.015
Tasa F1-Score(micro) 0.919 +/- 0.008
---------------------------------------------------------------
rf_ill_label_O

Max Depth: 46
Min Samples Split:  43
Tasa de acierto: 0.805 +/- 0.017
Tasa de Hamming Loss: 0.076 +/- 0.007
Tasa de precision(macro) 0.914 +/- 0.017
Tasa de precision(micro) 0.948 +/- 0.009
Tasa de exactitud(macro): 0.855 +/- 0.02
Tasa de exactitud(micro): 0.92 +/- 0.012
Tasa F1-Score(macro) 0.88 +/- 0.014
Tasa F1-Score(micro) 0.934 +/- 0.007
---------------------------------------------------------------
rf_all_label_P

Max Depth: 10
Min Samples Split:  13
Tasa de acierto: 0.823 +/- 0.013
Tasa de Hamming Loss: 0.068 +/- 0.005
Tasa de precision(macro) 0

In [29]:
def resultados_clases(names):
    hyper_class=[]
    predict_class=[]
    for name in names:
        hyper_class.append(pd.read_csv('results/DT/DT_hyper_{}.csv'.format(name)))
        predict_class.append(pd.read_csv('results/DT/DT_predict_{}.csv'.format(name)))

    for i, n in zip(range(0, len(names)), names):
        print(n)
        print()
        Depth = (maximun(hyper_class[i], 'max_depth'))
        print('Max Depth:', Depth)
        Split = (maximun(hyper_class[i], 'min_samples_split'))
        print('Min Samples Split: ', Split)

        print('Tasa de acierto:', round(np.mean(predict_class[i]['accuracy']), 3), '+/-', round(np.std(predict_class[i]['accuracy']), 3))
        print('Tasa de precision', round(np.mean(predict_class[i]['precision']), 3), '+/-', round(np.std(predict_class[i]['precision']), 3))
        print('Tasa de exactitud:', round(np.mean(predict_class[i]['recall']), 3),  '+/-', round(np.std(predict_class[i]['recall']), 3))
        print('Tasa F1-Score', round(np.mean(predict_class[i]['f1']), 3) , '+/-', round(np.std(predict_class[i]['f1']),3))
        print('---------------------------------------------------------------')

In [30]:
resultados_clases(names_CLASS_fr)

freq_all_class_O

Max Depth: 68
Min Samples Split:  9
Tasa de acierto: 0.807 +/- 0.016
Tasa de precision 0.806 +/- 0.017
Tasa de exactitud: 0.807 +/- 0.016
Tasa F1-Score 0.805 +/- 0.017
---------------------------------------------------------------
freq_ill_class_O

Max Depth: 10
Min Samples Split:  7
Tasa de acierto: 0.793 +/- 0.017
Tasa de precision 0.798 +/- 0.017
Tasa de exactitud: 0.793 +/- 0.017
Tasa F1-Score 0.791 +/- 0.017
---------------------------------------------------------------
freq_all_class_P

Max Depth: 66
Min Samples Split:  33
Tasa de acierto: 0.801 +/- 0.015
Tasa de precision 0.802 +/- 0.016
Tasa de exactitud: 0.801 +/- 0.015
Tasa F1-Score 0.799 +/- 0.015
---------------------------------------------------------------
freq_ill_class_P

Max Depth: 14
Min Samples Split:  11
Tasa de acierto: 0.781 +/- 0.021
Tasa de precision 0.783 +/- 0.021
Tasa de exactitud: 0.781 +/- 0.021
Tasa F1-Score 0.78 +/- 0.022
---------------------------------------------------------------

In [31]:
resultados_clases(name_class_fc)

fc_all_class_O

Max Depth: 76
Min Samples Split:  25
Tasa de acierto: 0.822 +/- 0.015
Tasa de precision 0.822 +/- 0.015
Tasa de exactitud: 0.822 +/- 0.015
Tasa F1-Score 0.82 +/- 0.016
---------------------------------------------------------------
fc_ill_class_O

Max Depth: 14
Min Samples Split:  33
Tasa de acierto: 0.811 +/- 0.022
Tasa de precision 0.816 +/- 0.024
Tasa de exactitud: 0.811 +/- 0.022
Tasa F1-Score 0.81 +/- 0.022
---------------------------------------------------------------
fc_all_class_P

Max Depth: 12
Min Samples Split:  15
Tasa de acierto: 0.832 +/- 0.015
Tasa de precision 0.839 +/- 0.014
Tasa de exactitud: 0.832 +/- 0.015
Tasa F1-Score 0.831 +/- 0.015
---------------------------------------------------------------
fc_ill_class_P

Max Depth: 14
Min Samples Split:  11
Tasa de acierto: 0.818 +/- 0.018
Tasa de precision 0.826 +/- 0.018
Tasa de exactitud: 0.818 +/- 0.018
Tasa F1-Score 0.818 +/- 0.018
---------------------------------------------------------------


In [32]:
resultados_clases(name_class_rf)

rf_all_class_O

Max Depth: 38
Min Samples Split:  21
Tasa de acierto: 0.817 +/- 0.014
Tasa de precision 0.817 +/- 0.014
Tasa de exactitud: 0.817 +/- 0.014
Tasa F1-Score 0.816 +/- 0.014
---------------------------------------------------------------
rf_ill_class_O

Max Depth: 10
Min Samples Split:  31
Tasa de acierto: 0.8 +/- 0.018
Tasa de precision 0.806 +/- 0.018
Tasa de exactitud: 0.8 +/- 0.018
Tasa F1-Score 0.799 +/- 0.018
---------------------------------------------------------------
rf_all_class_P

Max Depth: 14
Min Samples Split:  11
Tasa de acierto: 0.816 +/- 0.015
Tasa de precision 0.817 +/- 0.016
Tasa de exactitud: 0.816 +/- 0.015
Tasa F1-Score 0.814 +/- 0.015
---------------------------------------------------------------
rf_ill_class_P

Max Depth: 14
Min Samples Split:  47
Tasa de acierto: 0.801 +/- 0.021
Tasa de precision 0.807 +/- 0.024
Tasa de exactitud: 0.801 +/- 0.021
Tasa F1-Score 0.801 +/- 0.021
---------------------------------------------------------------


# Mejor Configuración

## Multi-clase

In [33]:
names = ['fc_class_p_all', 'fc_class_p_ill']
names_features=['atc', 'cie', 'cie_atc']
features = []
for n in names:
    for f in names_features:
        with open("feature_selection/best/{}_{}.txt".format(n, f), "r") as file:
            features.append(eval(file.readline()))
    features += [['Edad', 'Sexo']]
paths_class = ['/class/P_WC_A_', '/class/P_WC_WO_']
names_class=['fc_all_class_P_atc', 'fc_all_class_P_cie', 'fc_all_class_P_cie_atc', 'fc_all_class_P_E_S', 
             'fc_ill_class_P_atc', 'fc_ill_class_P_cie', 'fc_ill_class_P_cie_atc', 'fc_ill_class_P_E_S']

In [34]:
names = ['fc_class_p_all', 'fc_class_p_ill']
names_features=['atc', 'cie', 'cie_atc']
features = []
for n in names:
    for f in names_features:
        with open("feature_selection/best/{}_{}.txt".format(n, f), "r") as file:
            features.append(eval(file.readline()))

paths_class = ['/class/P_WC_A_', '/class/P_WC_WO_']
names_class=['fc_all_class_P_atc', 'fc_all_class_P_cie', 'fc_all_class_P_cie_atc', 
             'fc_ill_class_P_atc', 'fc_ill_class_P_cie', 'fc_ill_class_P_cie_atc']

In [35]:
k=['all','ill']
for p, i in zip(paths_class, k):
    if i=='all':
        names = names_class[0:4]
        feat = features[0:4]
    else:
        names = names_class[4:8]
        feat = features[4:8]
    for n, f in zip( names, feat):
        if path.exists('results/DT/DT_hyper_{}.csv'.format(n)): 
            print('Ya existe el hyperparametro:', n)
        else:
            hyper_DT(p, f, n, multiclass=True)
            print()
            print('--------------------------------------------------------')
            print()

        if path.exists('results/DT/DT_predict_{}.csv'.format(n)): 
            print('Ya existe los resultados:', n)
        else:
            predict_DT(p, f, n, multiclass=True)
            print()
            print('--------------------------------------------------------')
            print()

Ya existe el hyperparametro: fc_all_class_P_atc
Ya existe los resultados: fc_all_class_P_atc
Ya existe el hyperparametro: fc_all_class_P_cie
Ya existe los resultados: fc_all_class_P_cie
Ya existe el hyperparametro: fc_all_class_P_cie_atc
Ya existe los resultados: fc_all_class_P_cie_atc
Ya existe el hyperparametro: fc_ill_class_P_atc
Ya existe los resultados: fc_ill_class_P_atc
Ya existe el hyperparametro: fc_ill_class_P_cie
Ya existe los resultados: fc_ill_class_P_cie
Ya existe el hyperparametro: fc_ill_class_P_cie_atc
Ya existe los resultados: fc_ill_class_P_cie_atc


In [36]:
resultados_clases(names_class)

fc_all_class_P_atc

Max Depth: 60
Min Samples Split:  25
Tasa de acierto: 0.773 +/- 0.029
Tasa de precision 0.774 +/- 0.023
Tasa de exactitud: 0.773 +/- 0.029
Tasa F1-Score 0.769 +/- 0.032
---------------------------------------------------------------
fc_all_class_P_cie

Max Depth: 30
Min Samples Split:  30
Tasa de acierto: 0.589 +/- 0.088
Tasa de precision 0.651 +/- 0.059
Tasa de exactitud: 0.589 +/- 0.088
Tasa F1-Score 0.573 +/- 0.098
---------------------------------------------------------------
fc_all_class_P_cie_atc

Max Depth: 70
Min Samples Split:  35
Tasa de acierto: 0.845 +/- 0.019
Tasa de precision 0.851 +/- 0.019
Tasa de exactitud: 0.845 +/- 0.019
Tasa F1-Score 0.844 +/- 0.019
---------------------------------------------------------------
fc_ill_class_P_atc

Max Depth: 125
Min Samples Split:  15
Tasa de acierto: 0.768 +/- 0.029
Tasa de precision 0.77 +/- 0.022
Tasa de exactitud: 0.768 +/- 0.029
Tasa F1-Score 0.765 +/- 0.031
------------------------------------------------

## Multi-label

In [37]:
names = ['fc_label_p_all', 'fc_label_p_ill']
names_features=['atc', 'cie', 'cie_atc']
features = []
for n in names:
    for f in names_features:
        with open("feature_selection/best/{}_{}.txt".format(n, f), "r") as file:
            features.append(eval(file.readline()))
    features += [['Edad', 'Sexo']]
paths_label = ['/label/P_WL_A_', '/label/P_WL_WO_']
names_label=['fc_all_label_P_atc', 'fc_all_label_P_cie', 'fc_all_label_P_cie_atc', 'fc_all_label_P_E_S', 
             'fc_ill_label_P_atc', 'fc_ill_label_P_cie', 'fc_ill_label_P_cie_atc', 'fc_ill_label_P_E_S']

In [38]:
k=['all','ill']
for p, i in zip(paths_label, k):
    if i=='all':
        names = names_label[0:4]
        feat = features[0:4]
    else:
        names = names_label[4:8]
        feat = features[4:8]
    for n, f in zip( names, feat):
        if path.exists('results/DT/DT_hyper_{}.csv'.format(n)): 
            print('Ya existe el hyperparametro:', n)
        else:
            hyper_DT(p, f, n)
            print()
            print('--------------------------------------------------------')
            print()

        if path.exists('results/DT/DT_predict_{}.csv'.format(n)): 
            print('Ya existe los resultados:', n)
        else:
            predict_DT(p, f, n)
            print()
            print('--------------------------------------------------------')
            print()

Ya existe el hyperparametro: fc_all_label_P_atc
Ya existe los resultados: fc_all_label_P_atc
Ya existe el hyperparametro: fc_all_label_P_cie
Ya existe los resultados: fc_all_label_P_cie
Ya existe el hyperparametro: fc_all_label_P_cie_atc
Ya existe los resultados: fc_all_label_P_cie_atc
Ya existe el hyperparametro: fc_all_label_P_E_S
Ya existe los resultados: fc_all_label_P_E_S
Ya existe el hyperparametro: fc_ill_label_P_atc
Ya existe los resultados: fc_ill_label_P_atc
Ya existe el hyperparametro: fc_ill_label_P_cie
Ya existe los resultados: fc_ill_label_P_cie
Ya existe el hyperparametro: fc_ill_label_P_cie_atc
Ya existe los resultados: fc_ill_label_P_cie_atc
Ya existe el hyperparametro: fc_ill_label_P_E_S
Ya existe los resultados: fc_ill_label_P_E_S


In [39]:
resultados_etiquetas(names_label)

fc_all_label_P_atc

Max Depth: 85
Min Samples Split:  35
Tasa de acierto: 0.768 +/- 0.026
Tasa de Hamming Loss: 0.09 +/- 0.01
Tasa de precision(macro) 0.904 +/- 0.021
Tasa de precision(micro) 0.932 +/- 0.019
Tasa de exactitud(macro): 0.815 +/- 0.02
Tasa de exactitud(micro): 0.87 +/- 0.013
Tasa F1-Score(macro) 0.854 +/- 0.015
Tasa F1-Score(micro) 0.9 +/- 0.01
---------------------------------------------------------------
fc_all_label_P_cie

Max Depth: 100
Min Samples Split:  20
Tasa de acierto: 0.577 +/- 0.081
Tasa de Hamming Loss: 0.173 +/- 0.037
Tasa de precision(macro) 0.774 +/- 0.063
Tasa de precision(micro) 0.798 +/- 0.069
Tasa de exactitud(macro): 0.801 +/- 0.03
Tasa de exactitud(micro): 0.855 +/- 0.028
Tasa F1-Score(macro) 0.778 +/- 0.028
Tasa F1-Score(micro) 0.823 +/- 0.027
---------------------------------------------------------------
fc_all_label_P_cie_atc

Max Depth: 115
Min Samples Split:  20
Tasa de acierto: 0.845 +/- 0.013
Tasa de Hamming Loss: 0.059 +/- 0.006
Tasa de pr