# Dengue

Este notebook faz a análise dos dados de dengue da cidade do Recife, obtidos em 2016, é feito o preprocessamento, análise exploratória dos dados, e teste com modelos de aprendizagem nos dados.

In [1]:
import os.path
import numpy as np
import pandas as pd
import requests
import holoviews as hv

hv.extension('bokeh')

Download da base de dados

In [2]:
database_url = 'http://dados.recife.pe.gov.br/dataset/2a9b1c39-0700-4ddf-9a10-b3c8d5d9396c/resource/2a2ef847-7063-462e-bf76-a49ebc9a6d13/download/casos-dengue2016.csv'
database_file = 'casos-dengue2016.csv'

if not os.path.exists(database_file):
    response = requests.get(database_url, stream=True)
    if response.status_code == 200:
        with open(database_file, 'wb') as file:
            file.write(response.content)

Criação do dataframe e remoção de muitas colunas não usadas

In [3]:
raw_df = pd.read_csv(database_file, sep=';', header='infer', encoding='latin1', low_memory=False)

drop_columns = [
    'nu_notificacao', 'tp_notificacao', 'co_cid', 'dt_notificacao', 'ds_semana_notificacao', 'notificacao_ano',
    'co_uf_notificacao', 'notificacao_ano', 'co_municipio_notificacao', 'id_regional', 'co_unidade_notificacao',
    'dt_diagnostico_sintoma', 'ds_semana_sintoma', 'nu_idade', 'co_uf_residencia', 'co_municipio_residencia',
    'co_regional_residencia', 'co_distrito_residencia', 'co_bairro_residencia', 'co_logradouro_residencia',
    'nome_logradouro_residencia', 'co_geo_campo_1', 'co_geo_campo_2', 'tp_zona_residencia', 'co_pais_residencia',
    'dt_investigacao', 'co_cbo_ocupacao', 'dt_chil_s1', 'dt_chil_s2', 'dt_prnt', 'res_chiks1', 'res_chiks2',
    'resul_prnt', 'dt_coleta_exame', 'dt_coleta_NS1', 'dt_coleta_isolamento', 'dt_coleta_rtpcr', 'tp_sorotipo',
    'dt_internacao', 'co_uf_hospital', 'co_municipio_hospital', 'co_unidade_hospital', 'nu_ddd_hospital',
    'nu_telefone_hospital', 'tp_autoctone_residencia', 'co_pais_infeccao', 'co_municipio_infeccao',
    'co_distrito_infeccao', 'co_bairro_infeccao', 'no_bairro_infeccao', 'tp_criterio_confirmacao',
    'st_doenca_trabalho', 'clinc_chik', 'dt_encerramento', 'alrm_hipot', 'alrm_plaq', 'alrm_vom', 'alrm_sang',
    'alrm_hemat', 'alrm_abdom', 'alrm_letar', 'dt_alrm', 'mani_hemor', 'alrm_hepat', 'alrm_liq', 'epistaxe',
    'gengivo', 'metro', 'petequias', 'hematura', 'sangram', 'laco_n', 'plasmatico', 'evidencia', 'plaq_menor',
    'complica', 'ds_obs', 'tp_sistema', 'tp_duplicidade', 'dt_digitacao', 'tp_fluxo_retorno',
    'ds_identificador_registro', 'nome_referencia', 'nu_cep'
]
drp_df = raw_df.drop(drop_columns,  axis='columns', errors='ignore')

display('raw df', raw_df.sample(5), 'drp df', drp_df.sample(5))

'raw df'

Unnamed: 0,nu_notificacao,tp_notificacao,co_cid,dt_notificacao,ds_semana_notificacao,notificacao_ano,co_uf_notificacao,co_municipio_notificacao,id_regional,co_unidade_notificacao,...,plasmatico,evidencia,plaq_menor,complica,ds_obs,tp_sistema,tp_duplicidade,dt_digitacao,tp_fluxo_retorno,ds_identificador_registro
15014,2594937,2,A90,16-02-2016,201607,2016,26,261160,1497.0,22454,...,,,,,,2.0,,23-08-2016,1,4.0
11909,2334331,2,A90,15-03-2016,201611,2016,26,261160,1497.0,2427427,...,,,,,MENOR ENCAMINHADO DA UPA PARA HBL COM HISTËRIA...,2.0,,17-03-2016,0,7.0
11899,2175996,2,A90,22-02-2016,201608,2016,26,261160,1497.0,1120,...,,,,,FEBRE EXANTEMA ARTRALGIA MIALGIA,2.0,,16-03-2016,1,4.0
4015,2318536,2,A90,27-01-2016,201604,2016,26,261160,1497.0,20516,...,,,,,,2.0,,26-05-2016,0,6.0
4910,2126693,2,A90,01-04-2016,201601,2016,26,261160,1497.0,531,...,,,,,MIALGIA FEBRE VOMITO HA 3DIAS,2.0,,19-01-2016,0,4.0


'drp df'

Unnamed: 0,dt_nascimento,tp_sexo,tp_gestante,tp_raca_cor,tp_escolaridade,no_bairro_residencia,febre,mialgia,cefaleia,exantema,...,Tp_result_NS1,tp_result_isolamento,tp_result_rtpcr,tp_result_histopatologia,tp_result_imunohistoquimica,st_ocorreu_hospitalizacao,co_uf_infeccao,tp_classificacao_final,tp_evolucao_caso,dt_obito
6645,01-11-2008,F,6.0,4.0,9.0,AFOGADOS,,,,,...,,,,,,,,10.0,9.0,
12952,14-01-1983,F,9.0,9.0,9.0,AREIAS,,,,,...,,,,,,,,10.0,9.0,
11863,22-11-1959,M,6.0,9.0,9.0,VARZEA,,,,,...,,,,,,,26.0,10.0,1.0,
3897,12-01-1982,M,6.0,9.0,9.0,COHAB,,,,,...,4.0,4.0,4.0,4.0,4.0,,26.0,10.0,1.0,
1130,09-11-1949,F,5.0,9.0,9.0,ENGENHO DO MEIO,1.0,1.0,1.0,1.0,...,,,,,,,,5.0,1.0,


Transformação dos dados de várias colunas

In [4]:
yes_no_columns = {
    1: 'sim',
    2: 'não',
    None: 'sem resposta'
}
exam_columns = {
    1: 'reagente',
    2: 'não reagente',
    3: 'inconclusivo',
    4: 'não realizado',
    None: 'sem resposta'
}
columns_maps = {
    'tp_gestante': {
        1: '1º trimestre',
        2: '2º trimestre',
        3: '3º trimestre',
        4: 'gestante - ignorada',
        5: 'não se aplica',
        6: 'ignorado',
        None: 'não se aplica'
    },
    'tp_raca_cor': {
        1: 'branca',
        2: 'preta',
        3: 'amarela',
        4: 'parda',
        5: 'indigena',
        None: 'ignorado'
    },
    'tp_escolaridade': {
        1: 'fundamental 1 incompleto',
        2: 'fundamental 1 completo',
        3: 'fundamental 2 incompleto',
        4: 'fundamental 2 completo',
        5: 'médio incompleto',
        6: 'médio completo',
        7: 'superior incompleto',
        8: 'superior completo',
        9: 'ignorado',
        43: 'analfabeto',
        None: 'ignorado',
    },
    'tp_classificacao_final': {
        1: 'dengue',
        2: 'dengue com complicações',
        3: 'dengue hemorrágica',
        4: 'dengue síndrome choque',
        5: 'descartado',
        8: 'inconclusivo',
        10: 'dengue',
        11: 'dengue com complicações',
        12: 'dengue hemorrágica',
        None: 'sem resposta'
    },
    'febre': yes_no_columns,
    'mialgia': yes_no_columns,
    'cefaleia': yes_no_columns,
    'exantema': yes_no_columns,
    'vomito': yes_no_columns,
    'nausea': yes_no_columns,
    'dor_costas': yes_no_columns,
    'conjutivite': yes_no_columns,
    'artrite': yes_no_columns,
    'artralgia': yes_no_columns,
    'petequia_n': yes_no_columns,
    'leucopenia': yes_no_columns,
    'laco': yes_no_columns,
    'dor_retro': yes_no_columns,
    'diabetes': yes_no_columns,
    'hematolog': yes_no_columns,
    'hepatopat': yes_no_columns,
    'renal': yes_no_columns,
    'hipertensao': yes_no_columns,
    'acido_pept': yes_no_columns,
    'auto_imune': yes_no_columns,
    'tp_result_exame': exam_columns,
    'Tp_result_NS1': exam_columns,
    'tp_result_isolamento': exam_columns,
    'tp_result_rtpcr': exam_columns,
    'tp_result_histopatologia': exam_columns,
    'tp_result_imunohistoquimica': exam_columns,
    'st_ocorreu_hospitalizacao': yes_no_columns,
    'tp_evolucao_caso': {
        1: 'cura',
        2: 'óbito dengue',
        3: 'óbito outro',
        4: 'óbito investigação',
        9: 'ignorado',
        None: 'sem resposta'
    }
}

df = drp_df.copy()
for c, maps in columns_maps.items():
    df[c] = df[c].apply(lambda v: maps.get(v, maps[None]))
    
df.sample(10)

Unnamed: 0,dt_nascimento,tp_sexo,tp_gestante,tp_raca_cor,tp_escolaridade,no_bairro_residencia,febre,mialgia,cefaleia,exantema,...,Tp_result_NS1,tp_result_isolamento,tp_result_rtpcr,tp_result_histopatologia,tp_result_imunohistoquimica,st_ocorreu_hospitalizacao,co_uf_infeccao,tp_classificacao_final,tp_evolucao_caso,dt_obito
8634,31-10-1955,M,ignorado,ignorado,ignorado,MADALENA,sem resposta,sem resposta,sem resposta,sem resposta,...,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,26.0,dengue,cura,
6760,08-06-1977,F,não se aplica,ignorado,ignorado,IPUTINGA,sem resposta,sem resposta,sem resposta,sem resposta,...,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,26.0,dengue,cura,
10891,24-12-1944,M,ignorado,parda,ignorado,AGUA FRIA,sem resposta,sem resposta,sem resposta,sem resposta,...,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,,dengue,cura,
4915,17-06-1999,F,não se aplica,ignorado,ignorado,CASA AMARELA,sem resposta,sem resposta,sem resposta,sem resposta,...,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,,descartado,cura,
5830,22-09-1964,M,ignorado,ignorado,ignorado,TORROES,sem resposta,sem resposta,sem resposta,sem resposta,...,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,26.0,dengue,cura,
1242,05-01-1951,M,ignorado,ignorado,ignorado,NOVA DESCOBERTA,sem resposta,sem resposta,sem resposta,sem resposta,...,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,não,,descartado,cura,
1825,31-01-2002,F,não se aplica,ignorado,ignorado,SAN MARTIN,sim,sim,sim,sim,...,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,,dengue,cura,
3134,10-08-1988,F,2º trimestre,branca,ignorado,SAO JOSE,sim,sim,não,sim,...,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,,inconclusivo,sem resposta,
387,11-10-1990,M,ignorado,ignorado,ignorado,TOTO,sim,sim,sim,não,...,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,não,26.0,dengue,cura,
5754,21-03-2014,M,ignorado,parda,ignorado,TORROES,sim,não,não,não,...,sem resposta,sem resposta,sem resposta,sem resposta,sem resposta,sim,,inconclusivo,sem resposta,


Verificação de casos de dengue por bairro

In [45]:
%%opts Bars [width=700 tools=['hover']]
district_classification_ratio = df.groupby(['no_bairro_residencia', 'tp_classificacao_final']).size()\
.rename('ratio').groupby(level=0).apply(lambda br_data: br_data / br_data.sum()).to_frame()

display('district classification ratio', district_classification_ratio.sample(3))

hv.DynamicMap(
    lambda district: hv.Bars(district_classification_ratio.loc[district].sort_index()),
    kdims='District'
).redim.values(District=district_classification_ratio.index.levels[0])

'district classification ratio'

Unnamed: 0_level_0,Unnamed: 1_level_0,ratio
no_bairro_residencia,tp_classificacao_final,Unnamed: 2_level_1
ALTO SANTA TEREZINHA,dengue,0.818182
CURADO,descartado,0.033333
JIQUIA,descartado,0.044944


Verificação de casos de dengue por idade

In [6]:
%%opts Bars [width=700 tools=['hover']]

gender_count = df[df['tp_classificacao_final'].apply(lambda c: c.startswith('dengue'))].groupby('tp_sexo')\
    .size().rename('count').to_frame()

display(gender_count.T)

hv.Bars(gender_count)

tp_sexo,F,I,M
count,5924,6,4581


Verificação de casos de dengue por nível de escolaridade

In [7]:
%%opts Bars [width=700 xrotation=30 tools=['hover']]

scholarity = df[df['tp_classificacao_final'].apply(lambda c: c.startswith('dengue'))]\
    .groupby('tp_escolaridade').size().rename('count').to_frame()

display(scholarity.T)

hv.Bars(scholarity)

tp_escolaridade,fundamental 1 completo,fundamental 1 incompleto,fundamental 2 completo,fundamental 2 incompleto,ignorado,médio completo,médio incompleto,superior completo,superior incompleto
count,18,36,23,38,10228,76,23,57,12


Analise dos casos de dengue por idade

In [8]:
%%opts Histogram [width=700 tools=['hover']] (alpha=0.5)

age_bins = np.arange(0, 101, 5)
df['idade'] = df['dt_nascimento'].apply(lambda data: 2016 - int(data[-4:]))
df['idade'] = pd.cut(df['idade'], bins=age_bins)

display('idade todos', df.groupby('idade').size().sort_index().rename('idade').to_frame().T)
display(
    'idade doentes',
    df[df['tp_classificacao_final'].apply(lambda c: c.startswith('dengue'))]
        .groupby('idade').size().sort_index().rename('idade').to_frame().T
)

hv.Histogram(np.histogram(df['idade'].apply(lambda i: i.mid), bins=20,  range=(0, 100))) *\
hv.Histogram(np.histogram(df[df['tp_classificacao_final'].apply(lambda c: c.startswith('dengue'))]['idade'].apply(lambda i: i.mid), bins=20,  range=(0, 100)))

'idade todos'

idade,"(0, 5]","(5, 10]","(10, 15]","(15, 20]","(20, 25]","(25, 30]","(30, 35]","(35, 40]","(40, 45]","(45, 50]","(50, 55]","(55, 60]","(60, 65]","(65, 70]","(70, 75]","(75, 80]","(80, 85]","(85, 90]","(90, 95]","(95, 100]"
idade,1006,1198,1292,1145,1542,1522,1603,1360,1300,1176,1034,786,656,471,402,333,183,108,35,13


'idade doentes'

idade,"(0, 5]","(5, 10]","(10, 15]","(15, 20]","(20, 25]","(25, 30]","(30, 35]","(35, 40]","(40, 45]","(45, 50]","(50, 55]","(55, 60]","(60, 65]","(65, 70]","(70, 75]","(75, 80]","(80, 85]","(85, 90]","(90, 95]","(95, 100]"
idade,533,719,792,722,917,919,981,839,828,751,604,504,415,289,239,187,111,59,12,5


  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Verificação de ligação dos casos de dengue por exames sorológicos realizados

In [9]:
exams = [
    'tp_result_exame', 'Tp_result_NS1', 'tp_result_isolamento', 'tp_result_rtpcr', 'tp_result_histopatologia',
    'tp_result_imunohistoquimica'
]

[display(df[df['tp_classificacao_final'].apply(lambda c: c.startswith('dengue'))]
     .groupby(['tp_classificacao_final', exam]).size().to_frame())
 for exam in exams]
pass

Unnamed: 0_level_0,Unnamed: 1_level_0,0
tp_classificacao_final,tp_result_exame,Unnamed: 2_level_1
dengue,inconclusivo,12
dengue,não reagente,30
dengue,não realizado,3430
dengue,reagente,96
dengue,sem resposta,6907
dengue com complicações,não realizado,12
dengue com complicações,reagente,3
dengue com complicações,sem resposta,18
dengue hemorrágica,não reagente,1
dengue hemorrágica,sem resposta,2


Unnamed: 0_level_0,Unnamed: 1_level_0,0
tp_classificacao_final,Tp_result_NS1,Unnamed: 2_level_1
dengue,inconclusivo,1
dengue,não reagente,5
dengue,não realizado,2654
dengue,reagente,2
dengue,sem resposta,7813
dengue com complicações,não realizado,15
dengue com complicações,sem resposta,18
dengue hemorrágica,sem resposta,3


Unnamed: 0_level_0,Unnamed: 1_level_0,0
tp_classificacao_final,tp_result_isolamento,Unnamed: 2_level_1
dengue,inconclusivo,1
dengue,não reagente,6
dengue,não realizado,2840
dengue,sem resposta,7628
dengue com complicações,não realizado,18
dengue com complicações,sem resposta,15
dengue hemorrágica,sem resposta,3


Unnamed: 0_level_0,Unnamed: 1_level_0,0
tp_classificacao_final,tp_result_rtpcr,Unnamed: 2_level_1
dengue,inconclusivo,2
dengue,não reagente,4
dengue,não realizado,2809
dengue,sem resposta,7660
dengue com complicações,não realizado,18
dengue com complicações,sem resposta,15
dengue hemorrágica,reagente,1
dengue hemorrágica,sem resposta,2


Unnamed: 0_level_0,Unnamed: 1_level_0,0
tp_classificacao_final,tp_result_histopatologia,Unnamed: 2_level_1
dengue,não reagente,1
dengue,não realizado,2431
dengue,reagente,1
dengue,sem resposta,8042
dengue com complicações,não realizado,15
dengue com complicações,sem resposta,18
dengue hemorrágica,sem resposta,3


Unnamed: 0_level_0,Unnamed: 1_level_0,0
tp_classificacao_final,tp_result_imunohistoquimica,Unnamed: 2_level_1
dengue,não reagente,1
dengue,não realizado,2439
dengue,reagente,3
dengue,sem resposta,8032
dengue com complicações,não realizado,15
dengue com complicações,sem resposta,18
dengue hemorrágica,sem resposta,3


verificação do prograsso da dengue de acordo com o caso

In [10]:
%%opts Bars [width=700 tools=['hover']]

case_evolution = df.groupby(['tp_classificacao_final', 'tp_evolucao_caso']).size().rename('count').to_frame()\
    .sort_index()

hv.DynamicMap(
    lambda classification: hv.Bars(case_evolution.loc[classification].sort_index()),
    kdims='Classificacao'
).redim.values(Classificacao=case_evolution.index.levels[0])

analise dos sintomas da dengue

In [11]:
%%opts Bars [width=700 height=500 xrotation=90 tools=['hover'] show_legend=False]

symptoms = [
    'febre', 'mialgia', 'cefaleia', 'exantema', 'vomito', 'nausea', 'dor_costas', 'conjutivite', 'artrite',
    'artralgia', 'petequia_n', 'leucopenia', 'laco', 'dor_retro'
]
symptoms_df = df[[*symptoms, 'tp_classificacao_final']]
symptoms_df = symptoms_df[(symptoms_df['febre'] != 'sem resposta') & (symptoms_df['tp_classificacao_final'] != 'sem resposta')]

display(symptoms_df.sample(5))

def compute_symptoms_chart(symptom):
    return hv.Bars(symptoms_df.groupby(['tp_classificacao_final', symptom]).size()\
        .groupby(level=0).apply(lambda s: s / s.sum()).rename('ratio').to_frame())

# Dynamic map was getting buggy
hv.HoloMap({s: compute_symptoms_chart(s) for s in symptoms}, kdims='Sintoma')

Unnamed: 0,febre,mialgia,cefaleia,exantema,vomito,nausea,dor_costas,conjutivite,artrite,artralgia,petequia_n,leucopenia,laco,dor_retro,tp_classificacao_final
14194,sim,sim,sim,não,não,não,não,não,não,sim,não,não,não,não,dengue
8519,sim,não,sim,não,sim,não,não,não,sim,não,não,não,não,não,dengue com complicações
3042,sim,sim,sim,não,não,não,não,não,não,não,não,não,não,não,dengue
1221,não,não,não,não,não,não,não,não,não,não,não,não,não,não,descartado
9783,sim,sim,sim,não,sim,sim,não,não,não,não,não,não,não,não,dengue


Testes com classificação

In [12]:
from pprint import pprint
from scipy import interp
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, cross_validate, StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc, confusion_matrix

from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from imblearn.over_sampling import RandomOverSampler

numeric_symptoms_df = symptoms_df.apply(lambda x: x.apply(lambda v: 0 if v == 'não' else 1 if v == 'sim' else v))
numeric_symptoms_df['tp_classificacao_final'] =\
    numeric_symptoms_df['tp_classificacao_final'].apply(lambda v: 1 if v.startswith('dengue') else 0)
display('transform symptoms categories to numbers and merge dengue types', numeric_symptoms_df.sample(5))

data = numeric_symptoms_df.drop('tp_classificacao_final', axis='columns')
classes = numeric_symptoms_df['tp_classificacao_final']

classifiers_order = ['rfc', 'mlp', 'knn', 'nbs']
classifiers = {
    'rfc': RandomForestClassifier,
    'mlp': MLPClassifier,
    'knn': KNeighborsClassifier,
    'nbs': GaussianNB,
}

configs = {
    'rfc': [
        {
            'n_estimators': 10,
            'max_depth': None,
        },
        {
            'n_estimators': 100,
            'max_depth': 4,
        },
        {
            'n_estimators': 200,
            'max_depth': 2,
        },
        {
            'n_estimators': 500,
            'max_depth': 1,
        }
    ],
    'mlp': [
        {
            'hidden_layer_sizes': (10,),
            'activation': 'relu',
            'solver': 'adam', # adam is a sgd variation (it does not support adaptative learning)
            'max_iter': 400,
            'early_stopping': True
        },
        {
            'hidden_layer_sizes': (40,),
            'activation': 'logistic',
            'solver': 'adam', # adam is a sgd variation (it does not support adaptative learning)
            'max_iter': 400,
            'early_stopping': True
        },
        {
            'hidden_layer_sizes': (10, 10),
            'activation': 'relu',
            'solver': 'adam', # adam is a sgd variation (it does not support adaptative learning)
            'max_iter': 400,
            'early_stopping': True
        },
        {
            'hidden_layer_sizes': (40, 10),
            'activation': 'logistic',
            'solver': 'adam', # adam is a sgd variation (it does not support adaptative learning)
            'max_iter': 400,
            'early_stopping': True
        }
    ],
    'knn': [
        {
            'n_neighbors': 1
        },
        {
            'n_neighbors': 3,
            'weights': 'distance'
        },
        {
            'n_neighbors': 5,
            'weights': 'distance'
        },
        {
            'n_neighbors': 10,
            'weights': 'distance'
        }
    ],
    'nbs': [{}] # only prios attribute to test, but isn't needed in this case
}

# for name, cls in classifiers.items():
#     for config in configs[name]:
#         instance = cls(**config)
#         print(f'classifier: {name}')
#         print(f'config: {config}')
#         results = cross_validate(instance, data, classes, cv=5, scoring=['accuracy', 'precision', 'roc_auc'])
#         print('scores:')
#         pprint(results)
#         print('\n')
        

skf = StratifiedKFold(n_splits=6)
ros = RandomOverSampler()
mean_fpr = np.linspace(0, 1, 1000)

classifier_configurations_results = {}
for name, cls in classifiers.items():
    for config in configs[name]:
        instance = cls(**config)
        splits_results = []
        for train, test in skf.split(data, classes):
            # Oversampling fold training data only
            data_fold_resampled, classes_fold_resampled = ros.fit_sample(data.iloc[train], classes.iloc[train])
            
            instance.fit(data_fold_resampled, classes_fold_resampled)
            
            predictions = instance.predict(data.iloc[test])
            probas = instance.predict_proba(data.iloc[test])
            
            # scores
            accuracy = accuracy_score(classes.iloc[test], predictions) # normalized by default
            precision = precision_score(classes.iloc[test], predictions)
            recall = recall_score(classes.iloc[test], predictions)
            f_measure = f1_score(classes.iloc[test], predictions)
            
            # ROC
            fpr, tpr, thresholds = roc_curve(classes.iloc[test], probas[:, 1])
            mean_tpr = interp(mean_fpr, fpr, tpr)
            mean_tpr[0] = 0.0
            roc_auc = auc(fpr, tpr)
            
            # Confusion matrix
            cm = confusion_matrix(classes.iloc[test], predictions)
            
            result = {
                'accuracy': accuracy,
                'precision':precision,
                'recall': recall,
                'f_measure': f_measure,
                'roc': {'fpr': mean_fpr, 'tpr': mean_tpr, 'area': roc_auc},
                'cm': cm
            }
            splits_results.append(result)
        
        print(f'classifier: {name}')
        print(f'config: {config}')
        print(splits_results)
        classifier_configurations_results[(name, str(config))] = splits_results
        print('\n')


'transform symptoms categories to numbers and merge dengue types'

Unnamed: 0,febre,mialgia,cefaleia,exantema,vomito,nausea,dor_costas,conjutivite,artrite,artralgia,petequia_n,leucopenia,laco,dor_retro,tp_classificacao_final
10700,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1
8368,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
16923,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0
329,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
5793,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0


classifier: rfc
config: {'n_estimators': 10, 'max_depth': None}
[{'accuracy': 0.6965299684542586, 'precision': 0.809823677581864, 'recall': 0.6608427543679343, 'f_measure': 0.7277872099603849, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.05305305, 0.05405405,
       0.05505506, 0.05605606, 0.05705706, 0.05805806, 

classifier: rfc
config: {'n_estimators': 100, 'max_depth': 4}
[{'accuracy': 0.7186119873817035, 'precision': 0.7736240913811008, 'recall': 0.7656731757451182, 'f_measure': 0.7696280991735538, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.05305305, 0.05405405,
       0.05505506, 0.05605606, 0.05705706, 0.05805806, 0

classifier: rfc
config: {'n_estimators': 200, 'max_depth': 2}
[{'accuracy': 0.7274447949526814, 'precision': 0.7439134355275022, 'recall': 0.8478931140801644, 'f_measure': 0.792507204610951, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.05305305, 0.05405405,
       0.05505506, 0.05605606, 0.05705706, 0.05805806, 0.

classifier: rfc
config: {'n_estimators': 500, 'max_depth': 1}
[{'accuracy': 0.7003154574132492, 'precision': 0.7436399217221135, 'recall': 0.7810894141829393, 'f_measure': 0.7619047619047619, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.05305305, 0.05405405,
       0.05505506, 0.05605606, 0.05705706, 0.05805806, 0

classifier: mlp
config: {'hidden_layer_sizes': (10,), 'activation': 'relu', 'solver': 'adam', 'max_iter': 400, 'early_stopping': True}
[{'accuracy': 0.6958990536277603, 'precision': 0.7898465171192444, 'recall': 0.6875642343268242, 'f_measure': 0.7351648351648351, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.05305

classifier: mlp
config: {'hidden_layer_sizes': (40,), 'activation': 'logistic', 'solver': 'adam', 'max_iter': 400, 'early_stopping': True}
[{'accuracy': 0.6839116719242903, 'precision': 0.7706422018348624, 'recall': 0.6906474820143885, 'f_measure': 0.7284552845528457, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.0

classifier: mlp
config: {'hidden_layer_sizes': (10, 10), 'activation': 'relu', 'solver': 'adam', 'max_iter': 400, 'early_stopping': True}
[{'accuracy': 0.6908517350157729, 'precision': 0.7837837837837838, 'recall': 0.6855087358684481, 'f_measure': 0.731359649122807, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.053

classifier: mlp
config: {'hidden_layer_sizes': (40, 10), 'activation': 'logistic', 'solver': 'adam', 'max_iter': 400, 'early_stopping': True}
[{'accuracy': 0.6883280757097792, 'precision': 0.7670011148272018, 'recall': 0.7070914696813977, 'f_measure': 0.7358288770053476, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 

classifier: knn
config: {'n_neighbors': 1}
[{'accuracy': 0.6435331230283912, 'precision': 0.7312925170068028, 'recall': 0.6628982528263104, 'f_measure': 0.6954177897574124, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.05305305, 0.05405405,
       0.05505506, 0.05605606, 0.05705706, 0.05805806, 0.05905906,
       0

classifier: knn
config: {'n_neighbors': 3, 'weights': 'distance'}
[{'accuracy': 0.705993690851735, 'precision': 0.7801104972375691, 'recall': 0.7255909558067831, 'f_measure': 0.751863684771033, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.05305305, 0.05405405,
       0.05505506, 0.05605606, 0.05705706, 0.05805806,

classifier: knn
config: {'n_neighbors': 5, 'weights': 'distance'}
[{'accuracy': 0.7091482649842271, 'precision': 0.782560706401766, 'recall': 0.7286742034943474, 'f_measure': 0.7546567323044172, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.05305305, 0.05405405,
       0.05505506, 0.05605606, 0.05705706, 0.05805806

classifier: knn
config: {'n_neighbors': 10, 'weights': 'distance'}
[{'accuracy': 0.701577287066246, 'precision': 0.7886836027713626, 'recall': 0.7019527235354573, 'f_measure': 0.742794997281131, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.05305305, 0.05405405,
       0.05505506, 0.05605606, 0.05705706, 0.05805806

classifier: nbs
config: {}
[{'accuracy': 0.6977917981072556, 'precision': 0.7193605683836589, 'recall': 0.8324768756423433, 'f_measure': 0.7717960933777991, 'roc': {'fpr': array([0.        , 0.001001  , 0.002002  , 0.003003  , 0.004004  ,
       0.00500501, 0.00600601, 0.00700701, 0.00800801, 0.00900901,
       0.01001001, 0.01101101, 0.01201201, 0.01301301, 0.01401401,
       0.01501502, 0.01601602, 0.01701702, 0.01801802, 0.01901902,
       0.02002002, 0.02102102, 0.02202202, 0.02302302, 0.02402402,
       0.02502503, 0.02602603, 0.02702703, 0.02802803, 0.02902903,
       0.03003003, 0.03103103, 0.03203203, 0.03303303, 0.03403403,
       0.03503504, 0.03603604, 0.03703704, 0.03803804, 0.03903904,
       0.04004004, 0.04104104, 0.04204204, 0.04304304, 0.04404404,
       0.04504505, 0.04604605, 0.04704705, 0.04804805, 0.04904905,
       0.05005005, 0.05105105, 0.05205205, 0.05305305, 0.05405405,
       0.05505506, 0.05605606, 0.05705706, 0.05805806, 0.05905906,
       0.06006006, 0.061

In [13]:
classifier_configurations_results_df = pd.DataFrame(classifier_configurations_results)
display('results', classifier_configurations_results_df)

'results'

Unnamed: 0_level_0,rfc,rfc,rfc,rfc,mlp,mlp,mlp,mlp,knn,knn,knn,knn,nbs
Unnamed: 0_level_1,"{'n_estimators': 10, 'max_depth': None}","{'n_estimators': 100, 'max_depth': 4}","{'n_estimators': 200, 'max_depth': 2}","{'n_estimators': 500, 'max_depth': 1}","{'hidden_layer_sizes': (10,), 'activation': 'relu', 'solver': 'adam', 'max_iter': 400, 'early_stopping': True}","{'hidden_layer_sizes': (40,), 'activation': 'logistic', 'solver': 'adam', 'max_iter': 400, 'early_stopping': True}","{'hidden_layer_sizes': (10, 10), 'activation': 'relu', 'solver': 'adam', 'max_iter': 400, 'early_stopping': True}","{'hidden_layer_sizes': (40, 10), 'activation': 'logistic', 'solver': 'adam', 'max_iter': 400, 'early_stopping': True}",{'n_neighbors': 1},"{'n_neighbors': 3, 'weights': 'distance'}","{'n_neighbors': 5, 'weights': 'distance'}","{'n_neighbors': 10, 'weights': 'distance'}",{}
0,"{'accuracy': 0.6965299684542586, 'precision': ...","{'accuracy': 0.7186119873817035, 'precision': ...","{'accuracy': 0.7274447949526814, 'precision': ...","{'accuracy': 0.7003154574132492, 'precision': ...","{'accuracy': 0.6958990536277603, 'precision': ...","{'accuracy': 0.6839116719242903, 'precision': ...","{'accuracy': 0.6908517350157729, 'precision': ...","{'accuracy': 0.6883280757097792, 'precision': ...","{'accuracy': 0.6435331230283912, 'precision': ...","{'accuracy': 0.705993690851735, 'precision': 0...","{'accuracy': 0.7091482649842271, 'precision': ...","{'accuracy': 0.701577287066246, 'precision': 0...","{'accuracy': 0.6977917981072556, 'precision': ..."
1,"{'accuracy': 0.7214150347441567, 'precision': ...","{'accuracy': 0.7346809854706254, 'precision': ...","{'accuracy': 0.7195198989260897, 'precision': ...","{'accuracy': 0.6961465571699305, 'precision': ...","{'accuracy': 0.6999368288060644, 'precision': ...","{'accuracy': 0.6841440303221731, 'precision': ...","{'accuracy': 0.7024636765634871, 'precision': ...","{'accuracy': 0.6860391661402401, 'precision': ...","{'accuracy': 0.6266582438408086, 'precision': ...","{'accuracy': 0.6664560960202148, 'precision': ...","{'accuracy': 0.6614024005053696, 'precision': ...","{'accuracy': 0.7264687302590019, 'precision': ...","{'accuracy': 0.6746683512318383, 'precision': ..."
2,"{'accuracy': 0.7252053063802906, 'precision': ...","{'accuracy': 0.7245735944409349, 'precision': ...","{'accuracy': 0.7258370183196462, 'precision': ...","{'accuracy': 0.7214150347441567, 'precision': ...","{'accuracy': 0.7182564750473784, 'precision': ...","{'accuracy': 0.6993051168667088, 'precision': ...","{'accuracy': 0.687934301958307, 'precision': 0...","{'accuracy': 0.6582438408085913, 'precision': ...","{'accuracy': 0.52179406190777, 'precision': 0....","{'accuracy': 0.6247631080227416, 'precision': ...","{'accuracy': 0.6430827542640556, 'precision': ...","{'accuracy': 0.7264687302590019, 'precision': ...","{'accuracy': 0.6860391661402401, 'precision': ..."
3,"{'accuracy': 0.7125710675931776, 'precision': ...","{'accuracy': 0.6942514213518636, 'precision': ...","{'accuracy': 0.7106759317751106, 'precision': ...","{'accuracy': 0.6778269109286166, 'precision': ...","{'accuracy': 0.6929879974731522, 'precision': ...","{'accuracy': 0.660770688566014, 'precision': 0...","{'accuracy': 0.7068856601389766, 'precision': ...","{'accuracy': 0.6576121288692356, 'precision': ...","{'accuracy': 0.6828806064434618, 'precision': ...","{'accuracy': 0.6942514213518636, 'precision': ...","{'accuracy': 0.7068856601389766, 'precision': ...","{'accuracy': 0.7100442198357549, 'precision': ...","{'accuracy': 0.7005685407454201, 'precision': ..."
4,"{'accuracy': 0.6854074542008844, 'precision': ...","{'accuracy': 0.6626658243840808, 'precision': ...","{'accuracy': 0.6632975363234365, 'precision': ...","{'accuracy': 0.638660770688566, 'precision': 0...","{'accuracy': 0.6620341124447252, 'precision': ...","{'accuracy': 0.6531901452937461, 'precision': ...","{'accuracy': 0.6828806064434618, 'precision': ...","{'accuracy': 0.6506632975363235, 'precision': ...","{'accuracy': 0.5710675931775111, 'precision': ...","{'accuracy': 0.624131396083386, 'precision': 0...","{'accuracy': 0.6272899557801642, 'precision': ...","{'accuracy': 0.6784586228679722, 'precision': ...","{'accuracy': 0.6601389766266582, 'precision': ..."
5,"{'accuracy': 0.6677195198989261, 'precision': ...","{'accuracy': 0.6462413139608338, 'precision': ...","{'accuracy': 0.6070751737207833, 'precision': ...","{'accuracy': 0.6013897662665825, 'precision': ...","{'accuracy': 0.6203411244472521, 'precision': ...","{'accuracy': 0.6190777005685407, 'precision': ...","{'accuracy': 0.6563487049905243, 'precision': ...","{'accuracy': 0.6013897662665825, 'precision': ...","{'accuracy': 0.5969677826910929, 'precision': ...","{'accuracy': 0.6197094125078964, 'precision': ...","{'accuracy': 0.6601389766266582, 'precision': ...","{'accuracy': 0.6727732154137713, 'precision': ...","{'accuracy': 0.6304485154769425, 'precision': ..."


In [41]:
%%opts Bars [show_legend=False xrotation=0 width=650]

def plot_classifiers_metric(metric):
    metric_data = classifier_configurations_results_df.apply(lambda s: s.apply(lambda v: v[metric]))
    metric_mean = metric_data.mean().rename('mean').reset_index()
    metric_mean = metric_mean.rename({'level_0': 'classifier', 'level_1': 'configuration'}, axis='columns')
    metric_mean['configuration'] = [i % 4 for i in range(metric_mean['configuration'].shape[0])]
    
    return hv.Bars(metric_mean, kdims=['classifier', 'configuration'], vdims=['mean'], label=metric)

(plot_classifiers_metric('accuracy') +\
plot_classifiers_metric('precision') +\
plot_classifiers_metric('recall') +\
plot_classifiers_metric('f_measure')).cols(1)

In [16]:
%%opts Curve [width=230 height=230] (alpha=0.7 line_width=1)
%%opts Curve.Luck (line_dash='dashed' alpha=0.7)
%%opts Curve.Mean (alpha=1 line_width=2)
%%opts Overlay [legend_position='bottom_right']

def classifier_rocs(name):
    roc_data = classifier_configurations_results_df[name].apply(lambda s: s.apply(lambda v: v['roc']))
    curve_overlays = [hv.Overlay(
                [
                    *[hv.Curve((data['fpr'], data['tpr']))#, label=f'auc: {data["area"]:.2f}')
                      for i, data in enumerate(roc_data[c])],
                    hv.Curve(([0, 1], [0, 1]), label='luck').relabel(group='Luck'),
                    hv.Curve(
                        (roc_data[c].apply(lambda v: v['fpr']).mean(),
                         roc_data[c].apply(lambda v: v['tpr']).mean()
                        ),
                        label=f'avg. auc: {roc_data[c].apply(lambda v: v["area"]).mean():.2f}'
                    ).relabel(group='Mean')
                ],
                label=c
            ).redim.label(x='false positive rate', y='true positive rate')
            for c in roc_data.columns]
    return hv.Layout(curve_overlays, label=name)

hv.Layout([classifier_rocs(name) for name in classifiers_order])

In [17]:
%%opts HeatMap [width=230 height=230 tools=['hover'] colorbar=False toolbar='above' xrotation=30]

def classifier_confusion_matrices(name):
    cm_data = classifier_configurations_results_df[name].apply(lambda s: s.apply(lambda v: v['cm']))
    cm_mean = cm_data.apply(lambda s: [np.mean(s, axis=0)]).to_frame().T
    return hv.Layout(
        [hv.Overlay(
            [
                hv.HeatMap(
                    pd.DataFrame(
                        cm_mean[c][0][0] / cm_mean[c][0][0].sum(axis=1)[:, np.newaxis],
                        index=['true negative', 'true positive'],
                        columns=['pred negative', 'pred positive']
                    ).stack().rename('count').swaplevel(0, 1).reset_index()
                ).redim.label(level_0='predicted', level_1='true'),
                hv.Labels(
                    pd.DataFrame(
                        cm_mean[c][0][0] / cm_mean[c][0][0].sum(axis=1)[:, np.newaxis],
                        index=['true negative', 'true positive'],
                        columns=['pred negative', 'pred positive']
                    ).stack().rename('count').apply(lambda v: f'{v:.2f}').swaplevel(0, 1).reset_index()
                )
            ],
            label=c
        )
         for c in cm_mean.columns],
        label=name
    )

hv.Layout([classifier_confusion_matrices(name) for name in classifiers_order])