# Metadata analyses 

First we import the required libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# para evitarmos a exibição dos dados em notacao científica
pd.set_option('display.float_format', lambda x: '%.3f' % x)

#comment next line to not use MLFlow for cleaning data
from MLFlow_Classification import *
from MLFlow_Utils import *

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


### Number of patients by region, ICU and MV

In [2]:
df_iacov_model_lean_14hosp = pd.read_csv("df_iacov_model_lean_14hosp.csv", delimiter=";")

In [3]:
df_iacov_model_lean_14hosp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6046 entries, 0 to 6045
Data columns (total 30 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   city_hospital    6046 non-null   object 
 1   age              6046 non-null   float64
 2   male             6046 non-null   float64
 3   race             6046 non-null   object 
 4   heart_rate       5237 non-null   float64
 5   resp_rate        4589 non-null   float64
 6   sys_press        4710 non-null   float64
 7   dias_press       4721 non-null   float64
 8   mean_press       4680 non-null   float64
 9   temp             4873 non-null   float64
 10  hemoglobin       4142 non-null   float64
 11  platelets        5302 non-null   float64
 12  hematocrit       3696 non-null   float64
 13  red_cells_count  3362 non-null   float64
 14  hcm              4855 non-null   float64
 15  rdw              4854 non-null   float64
 16  mcv              4861 non-null   float64
 17  leukocytes    

In [4]:
df_iacov_model_lean_14hosp['excluir'].value_counts()      

excluir
False    6046
Name: count, dtype: int64

In [5]:
df_iacov_model_lean_14hosp['male'].value_counts(normalize = True)

male
1.000   0.539
0.000   0.461
Name: proportion, dtype: float64

In [6]:
df_iacov_model_lean_14hosp['race'].value_counts(normalize = True)
#df_iacov_model_lean_14hosp['race'].value_counts()

race
N/a       0.558
Branco    0.289
Pardo     0.125
Preto     0.026
Amarelo   0.002
Name: proportion, dtype: float64

In [7]:
df_iacov_model_lean_14hosp['age'].mean()

57.52102381739993

In [8]:
df_iacov_model_lean_14hosp['age'].std()

17.869007644631505

In [9]:
df_iacov_model_lean_14hosp.loc[df_iacov_model_lean_14hosp['city_hospital'] == 'HC_USP',].isnull().sum()

city_hospital         0
age                   0
male                  0
race                  0
heart_rate            7
resp_rate            37
sys_press            11
dias_press           10
mean_press           11
temp                  7
hemoglobin         1287
platelets           110
hematocrit         1286
red_cells_count    1500
hcm                 108
rdw                 109
mcv                 108
leukocytes          108
neutrophil          110
lymphocytes         111
basophils           110
eosinophils        1500
monocytes           110
crp                 319
death                 0
icu                   0
mv                    0
state                 0
region                0
excluir               0
dtype: int64

In [10]:
# Cross table - hospital x icu
crosstab_hospital_icu = pd.crosstab(df_iacov_model_lean_14hosp['city_hospital'], df_iacov_model_lean_14hosp['icu'],margins=True)
crosstab_hospital_icu

icu,0,1,All
city_hospital,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AMAZONAS_HUGV,13,34,47
CEARA_HUWC,42,31,73
CEARA_UNIMED,660,185,845
HC_USP,452,1048,1500
HEVV,32,24,56
HMV_POA_02,319,137,456
HOSPPORTUGUES_SALVADOR,1114,245,1359
HOSPSANTACATARINABLUMENAU,127,21,148
HOSPSANTAJULIA_MANAUS,172,75,247
HOSPSAOFRANCISCO_MOGIGUACU,97,27,124


In [11]:
# Cross table - hospital x mv
crosstab_hospital_mv = pd.crosstab(df_iacov_model_lean_14hosp['city_hospital'], df_iacov_model_lean_14hosp['mv'], margins=True)
crosstab_hospital_mv

mv,0,1,All
city_hospital,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AMAZONAS_HUGV,20,27,47
CEARA_HUWC,41,32,73
CEARA_UNIMED,716,129,845
HC_USP,835,665,1500
HEVV,43,13,56
HMV_POA_02,371,85,456
HOSPPORTUGUES_SALVADOR,1240,119,1359
HOSPSANTACATARINABLUMENAU,137,11,148
HOSPSANTAJULIA_MANAUS,201,46,247
HOSPSAOFRANCISCO_MOGIGUACU,104,20,124


In [12]:
# Cross table - hospital x death
crosstab_hospital_death = pd.crosstab(df_iacov_model_lean_14hosp['city_hospital'], df_iacov_model_lean_14hosp['death'], margins=True)
crosstab_hospital_death

death,0,1,All
city_hospital,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AMAZONAS_HUGV,31,16,47
CEARA_HUWC,52,21,73
CEARA_UNIMED,735,110,845
HC_USP,963,537,1500
HEVV,43,13,56
HMV_POA_02,409,47,456
HOSPPORTUGUES_SALVADOR,1265,94,1359
HOSPSANTACATARINABLUMENAU,138,10,148
HOSPSANTAJULIA_MANAUS,209,38,247
HOSPSAOFRANCISCO_MOGIGUACU,107,17,124


In [14]:
numeric = ['age','heart_rate','resp_rate','sys_press','dias_press','mean_press','temp','hemoglobin','platelets','hematocrit','red_cells_count','hcm',
           'rdw','mcv','leukocytes','neutrophil','lymphocytes','basophils','eosinophils','monocytes','crp']

In [15]:
# Variável que indica as categorias
hospital = df_iacov_model_lean_14hosp['city_hospital'].unique()
hospital

array(['AMAZONAS_HUGV', 'CEARA_HUWC', 'CEARA_UNIMED', 'HEVV', 'HC_USP',
       'HOSPPORTUGUES_SALVADOR', 'HOSPSANTACATARINABLUMENAU',
       'HOSPSANTAJULIA_MANAUS', 'HOSPSAOFRANCISCO_MOGIGUACU', 'PELOTAS',
       'PERNAMBUCO_FULL', 'RIO_DE_JANEIRO_UNIMED', 'HRL_2021_02',
       'HMV_POA_02'], dtype=object)

In [16]:
pd.set_option('display.max_columns', None)  # Exibir todas as colunas
pd.set_option('display.width', 1000)

In [17]:
from scipy.stats import shapiro

for cat in hospital:
    cat_dd = df_iacov_model_lean_14hosp[df_iacov_model_lean_14hosp['city_hospital'] == cat].drop(columns=['city_hospital'])

    cat_desc = cat_dd[numeric].describe()

    cat_skew = cat_dd[numeric].skew()
    cat_desc.loc['skewness'] = cat_skew

    cat_kurt = cat_dd[numeric].kurt()
    cat_desc.loc['kurtosis'] = cat_kurt

    cat_cv = cat_dd[numeric].std()/cat_dd[numeric].mean()
    cat_desc.loc['cv'] = cat_cv

    cat_normality = cat_dd[numeric].apply(lambda x: shapiro(x)[1])
    cat_desc.loc['shapiro'] = cat_normality

    cat_missing = cat_dd[numeric].isnull().sum()/cat_dd.shape[0]
    cat_desc.loc['missing'] = cat_missing

    print("Categoria:", cat)
    display(cat_desc)

Categoria: AMAZONAS_HUGV


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,47.0,45.0,41.0,44.0,44.0,42.0,35.0,46.0,45.0,46.0,44.0,43.0,44.0,44.0,46.0,44.0,44.0,44.0,44.0,44.0,36.0
mean,57.149,91.222,24.463,133.318,77.932,94.713,36.466,10.763,257936.444,33.48,3.917,27.857,13.086,85.069,12467.826,10782.227,938.841,55.205,72.841,654.182,124.639
std,15.605,21.151,8.05,24.338,14.792,14.418,0.906,2.898,148993.424,7.75,0.872,2.01,2.181,5.498,6138.18,6054.574,644.627,42.397,137.864,367.239,90.968
min,22.0,57.0,12.0,79.0,56.0,59.354,33.6,3.2,28300.0,20.1,2.39,21.13,10.33,67.13,663.0,2645.0,142.0,0.0,0.0,73.0,0.1
25%,46.5,80.0,19.0,119.25,68.75,87.227,36.0,8.445,163700.0,26.425,3.335,27.0,11.42,82.468,8371.0,6143.75,530.75,27.25,2.0,405.75,25.325
50%,61.0,89.0,23.0,130.0,76.0,96.148,36.5,11.4,223900.0,34.905,4.045,28.2,12.265,85.425,11640.0,9897.5,759.0,48.5,12.0,638.5,127.0
75%,68.0,102.0,28.0,150.25,80.25,102.921,36.85,12.992,331600.0,39.26,4.543,29.145,14.275,87.893,15357.5,13433.25,1214.25,69.25,73.5,865.5,201.25
max,83.0,172.0,57.0,180.0,120.0,139.98,38.3,16.45,724000.0,49.39,5.75,31.3,20.56,95.42,31220.0,29215.0,3327.0,253.0,750.0,1699.0,272.2
skewness,-0.711,1.369,1.963,-0.063,1.248,0.361,-0.477,-0.165,1.119,0.027,-0.081,-1.158,1.324,-0.775,0.856,1.083,2.067,2.529,3.311,0.804,-0.043
kurtosis,-0.45,3.695,6.261,-0.443,1.983,1.546,2.414,-0.387,1.516,-0.963,-0.748,2.081,1.904,2.037,1.134,1.16,5.514,10.144,13.379,0.809,-1.623


Categoria: CEARA_HUWC


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,73.0,73.0,73.0,73.0,73.0,73.0,73.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,71.0,70.0
mean,56.151,86.0,19.603,120.397,72.438,88.425,36.04,9.994,188963.472,30.391,3.523,28.535,14.758,86.658,11913.292,7142.403,1237.111,31.972,140.458,478.803,7.154
std,16.138,18.879,3.996,25.159,16.33,18.385,0.93,2.646,145655.013,7.705,0.89,3.032,2.832,7.587,25247.809,7073.948,827.328,38.087,243.909,319.119,5.543
min,22.0,41.0,12.0,70.0,40.0,50.0,34.5,5.0,7160.0,16.6,1.99,20.5,10.1,64.6,1023.0,649.0,117.0,0.0,0.0,37.0,0.05
25%,42.0,75.0,17.0,101.0,60.0,77.0,35.4,7.8,95980.0,23.725,2.75,26.8,12.7,82.1,4094.75,2696.75,570.25,0.0,0.0,261.0,2.393
50%,58.0,87.0,19.0,117.0,71.0,87.0,36.0,9.8,157050.0,30.25,3.61,28.8,13.9,86.45,7892.5,5487.5,990.0,22.0,39.0,453.0,5.575
75%,69.0,94.0,20.0,135.0,81.0,98.0,36.5,11.825,239275.0,36.35,4.238,30.325,17.025,90.95,12020.0,9792.0,1759.0,52.25,132.5,552.5,11.94
max,86.0,140.0,33.0,189.0,120.0,142.0,39.0,18.1,826600.0,54.8,6.27,35.6,24.0,103.6,214500.0,47636.0,4290.0,181.0,1005.0,1960.0,17.97
skewness,-0.415,0.681,0.994,0.557,0.457,0.527,0.542,0.454,1.879,0.461,0.323,-0.353,0.902,-0.298,7.52,3.112,1.053,1.448,2.199,1.89,0.499
kurtosis,-0.638,1.063,1.239,-0.032,0.708,0.544,0.614,-0.034,5.024,0.122,-0.273,0.782,0.559,0.658,60.484,14.61,1.288,2.505,3.902,5.946,-1.072


Categoria: CEARA_UNIMED


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,845.0,794.0,787.0,791.0,802.0,801.0,802.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,199.0,199.0,768.0,768.0,768.0,778.0
mean,62.599,81.084,18.717,120.932,72.742,88.235,36.287,12.773,449322.917,38.525,2.393,28.524,13.977,86.15,7750.286,3611.714,957.809,3.579,40.948,439.374,10.464
std,17.706,18.56,5.068,27.392,18.083,20.883,1.326,1.726,64176.276,4.78,0.988,1.944,1.403,5.568,8167.18,1961.759,570.125,8.975,79.27,240.224,8.377
min,18.0,25.0,8.0,21.0,12.0,18.332,30.6,6.2,233000.0,20.6,0.29,18.0,11.7,53.8,40.0,33.0,12.0,0.0,0.0,1.003,0.08
25%,48.0,70.0,16.0,110.0,65.0,82.323,36.0,11.7,406000.0,35.5,1.73,27.6,13.1,83.1,4472.5,2459.5,639.5,0.0,0.0,272.75,3.72
50%,65.0,80.0,18.0,120.0,77.5,91.312,36.2,12.8,451500.0,38.6,2.215,28.6,13.7,86.3,6910.0,3725.0,903.0,0.0,0.0,427.5,8.41
75%,76.0,89.75,20.0,135.5,80.0,99.98,36.5,14.1,494250.0,42.0,2.85,29.7,14.6,89.7,9727.5,4826.0,1259.5,0.0,61.0,608.0,15.27
max,100.0,176.0,72.0,214.0,141.0,152.316,63.0,17.3,649000.0,53.3,8.28,35.5,26.0,104.9,174640.0,9022.0,3688.0,80.0,914.0,995.0,44.92
skewness,-0.257,0.627,2.886,-0.607,-0.615,-0.856,10.316,-0.373,-0.139,-0.323,1.439,-0.98,2.366,-0.653,12.518,0.086,0.944,4.27,5.119,0.099,1.039
kurtosis,-0.816,2.09,17.942,1.562,1.282,1.417,206.856,0.294,0.273,0.269,4.081,4.26,11.61,2.995,237.669,-0.039,2.417,24.67,43.541,-0.529,0.863


Categoria: HEVV


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,56.0,52.0,43.0,46.0,46.0,4.0,27.0,56.0,56.0,56.0,56.0,56.0,56.0,56.0,56.0,56.0,56.0,0.0,56.0,56.0,56.0
mean,66.966,90.038,20.116,122.717,74.457,87.25,36.722,11.018,201.107,33.775,3.884,29.058,14.605,87.719,10.932,8.336,4.973,,0.211,54.296,52.957
std,17.651,18.998,4.822,29.282,15.711,19.619,1.365,3.022,104.426,7.97,0.978,2.972,2.28,7.764,6.501,4.419,9.582,,0.945,400.795,32.646
min,23.2,50.0,13.0,10.0,37.0,70.0,34.2,1.4,7.0,13.3,1.33,21.97,11.4,68.371,0.76,0.0,0.0,,0.0,0.0,0.5
25%,55.5,79.5,18.0,107.75,63.5,76.0,35.65,8.75,135.5,28.5,3.348,26.523,13.1,83.467,7.627,5.3,0.843,,0.0,0.245,22.025
50%,70.5,88.0,20.0,123.5,74.0,82.0,36.6,11.7,176.5,35.6,4.13,29.755,14.2,88.36,10.32,8.2,1.194,,0.0,0.559,64.35
75%,79.35,99.25,21.5,140.0,81.5,93.25,37.9,12.925,252.0,39.5,4.527,30.958,15.15,93.051,13.3,10.892,2.543,,0.085,0.825,81.65
max,97.0,144.0,40.0,180.0,120.0,115.0,39.5,15.7,549.0,47.8,5.69,34.97,23.1,104.9,42.0,21.035,49.0,,7.0,3000.0,97.9
skewness,-0.781,0.755,1.905,-1.045,0.443,1.363,0.291,-1.012,1.175,-0.778,-0.596,-0.181,2.024,-0.353,2.086,0.608,3.09,,6.998,7.483,-0.281
kurtosis,0.04,1.12,6.087,4.081,1.107,1.998,-0.75,0.753,2.288,0.074,-0.113,-0.33,4.743,0.112,8.651,0.803,10.206,,50.825,55.999,-1.554


Categoria: HC_USP


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,1500.0,1493.0,1463.0,1489.0,1490.0,1489.0,1493.0,213.0,1390.0,214.0,0.0,1392.0,1391.0,1392.0,1392.0,1390.0,1389.0,1390.0,0.0,1390.0,1181.0
mean,59.305,87.086,24.476,119.4,71.702,87.549,36.366,12.34,234923.741,37.683,,28.843,14.216,85.803,9782.917,7977.892,997.984,0.022,,0.51,1.553
std,15.995,18.503,7.383,22.559,14.456,15.57,0.888,2.538,104514.476,8.062,,2.38,2.168,6.311,10866.328,4711.526,1314.463,0.081,,0.453,1.126
min,18.0,13.0,11.0,10.0,6.0,7.665,32.4,5.0,6000.0,1.0,,16.9,11.1,56.9,290.0,240.0,30.0,0.0,,0.0,0.015
25%,48.0,74.0,20.0,106.0,61.0,77.649,35.9,11.0,166000.0,33.0,,27.6,12.9,82.4,6100.0,4682.5,560.0,0.01,,0.27,0.655
50%,61.0,86.0,23.0,120.0,70.0,86.98,36.4,13.0,217000.0,39.0,,29.0,13.7,85.8,8555.0,7060.0,840.0,0.01,,0.42,1.268
75%,71.0,98.0,28.0,132.0,80.0,96.65,36.9,14.0,288750.0,43.0,,30.3,14.7,89.4,11962.5,10220.0,1240.0,0.02,,0.64,2.276
max,98.0,191.0,108.0,200.0,129.0,141.654,39.1,19.0,992000.0,58.0,,46.9,28.2,128.2,365920.0,36170.0,42120.0,2.86,,10.98,6.432
skewness,-0.295,0.32,3.055,-0.417,-0.184,-0.331,-0.111,-0.137,1.2,-0.551,,-0.243,2.536,0.161,25.57,1.416,23.628,30.882,,10.093,0.866
kurtosis,-0.55,1.189,23.987,3.08,2.503,2.775,0.916,0.016,3.496,1.517,,3.924,9.449,3.458,830.924,3.279,708.368,1072.175,,208.871,0.385


Categoria: HOSPPORTUGUES_SALVADOR


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,1359.0,1087.0,570.0,574.0,574.0,574.0,821.0,928.0,928.0,928.0,928.0,928.0,928.0,928.0,928.0,928.0,928.0,928.0,928.0,928.0,737.0
mean,49.422,86.479,20.192,127.987,76.781,93.564,36.363,12.91,233440.733,39.6,4.511,29.063,13.606,88.501,7196.136,4951.301,1389.867,13.809,75.188,609.546,80.849
std,18.203,12.298,2.611,15.694,8.485,9.175,0.661,1.97,89291.782,5.088,0.65,2.178,1.811,6.24,5669.551,3270.254,793.39,18.541,128.945,375.459,96.9
min,18.0,46.0,14.0,89.0,46.0,64.5,34.2,6.5,57500.0,19.5,1.86,17.65,10.9,59.0,1370.0,301.0,93.0,0.0,0.0,49.0,5.0
25%,34.0,78.0,18.625,120.0,70.25,89.0,36.0,12.038,176000.0,36.875,4.147,27.8,12.6,84.775,4857.5,2806.75,827.375,0.0,0.0,384.0,17.15
50%,47.0,86.0,20.0,125.25,79.5,93.0,36.3,13.1,219000.0,40.1,4.575,29.25,13.2,88.7,6320.0,4020.0,1259.0,10.0,31.0,558.0,40.3
75%,62.5,97.0,20.0,139.0,80.0,98.0,36.7,14.3,275125.0,43.1,4.95,30.4,14.1,92.125,8262.5,6286.125,1748.0,18.0,87.0,757.75,89.2
max,99.0,130.5,35.0,209.5,106.0,136.5,39.45,18.1,788000.0,52.4,6.455,38.15,30.0,112.8,143370.0,26730.0,7362.5,222.0,1346.5,7243.0,708.3
skewness,0.405,0.186,1.789,0.789,-0.334,0.204,0.531,-0.759,1.36,-0.658,-0.572,-0.584,3.11,-0.341,15.518,2.207,1.72,3.789,3.743,6.552,1.918
kurtosis,-0.725,0.176,3.993,1.417,1.26,1.503,1.271,0.609,3.748,0.761,0.838,2.88,16.057,1.936,360.14,8.102,6.889,25.492,20.058,105.054,4.215


Categoria: HOSPSANTACATARINABLUMENAU


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,148.0,142.0,145.0,147.0,147.0,147.0,146.0,148.0,148.0,148.0,148.0,148.0,148.0,148.0,148.0,148.0,148.0,148.0,148.0,147.0,106.0
mean,49.622,87.056,19.048,126.109,78.116,94.116,36.762,13.549,207166.892,40.914,4.725,29.132,13.553,87.671,5524.978,3793.953,1300.002,36.293,61.674,546.472,39.955
std,17.794,14.191,2.222,20.948,12.388,14.156,0.826,1.519,63811.029,5.325,0.966,1.958,1.671,5.798,2794.234,2131.793,665.029,38.281,103.626,289.83,52.512
min,18.0,51.0,16.0,18.0,37.0,30.0,34.5,6.4,45700.0,20.8,3.08,17.8,3.0,56.3,2.5,3.869,233.0,0.0,0.0,57.0,0.2
25%,38.0,78.0,18.0,114.0,70.0,86.0,36.3,12.875,163750.0,38.475,4.348,28.475,12.9,85.2,3900.0,2269.125,840.975,15.15,9.1,342.85,4.88
50%,47.5,87.0,18.0,124.0,77.0,93.0,36.6,13.7,202000.0,41.5,4.65,29.25,13.4,88.35,5250.0,3275.0,1182.5,26.75,33.9,489.6,18.015
75%,62.0,96.0,20.0,137.0,85.5,102.0,37.1,14.5,241500.0,44.1,4.98,30.1,14.0,91.1,6877.5,4956.75,1663.9,42.925,64.5,699.4,45.25
max,95.0,130.0,30.0,196.0,112.0,137.0,39.8,16.7,428000.0,71.9,14.5,33.9,26.0,100.9,15500.0,11733.0,4351.0,259.2,691.0,1688.4,201.3
skewness,0.339,0.339,2.081,-0.085,0.014,-0.161,0.524,-1.074,0.872,0.412,7.107,-2.648,1.454,-2.694,0.418,1.178,1.208,2.839,4.113,1.249,1.709
kurtosis,-0.656,0.593,7.489,5.371,0.78,2.6,0.776,2.932,1.888,9.015,71.577,13.802,30.985,12.613,0.968,1.624,2.794,11.001,20.046,2.189,1.919


Categoria: HOSPSANTAJULIA_MANAUS


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,247.0,207.0,189.0,208.0,208.0,190.0,190.0,196.0,191.0,196.0,194.0,196.0,196.0,196.0,191.0,196.0,196.0,9.0,186.0,182.0,166.0
mean,56.628,82.208,21.011,123.529,76.548,79.266,36.271,12.166,285958.115,36.953,4.3,27.683,13.64,86.223,4843.382,7726.531,1459.184,0.444,1.57,652.747,8.988
std,17.069,15.32,3.638,14.274,10.964,32.431,0.632,1.773,113679.088,4.805,0.602,3.941,1.448,5.691,4678.693,1003.231,879.141,0.527,1.18,229.504,9.279
min,18.0,36.0,14.0,72.0,10.0,1.033,34.0,4.92,59000.0,23.7,2.62,10.6,11.7,63.72,68.0,4100.0,100.0,0.0,1.0,200.0,0.5
25%,43.0,72.5,19.0,118.75,70.0,80.0,36.0,10.975,207500.0,33.575,3.905,27.345,12.8,82.748,796.0,7200.0,800.0,0.0,1.0,500.0,1.9
50%,56.0,80.0,20.0,120.0,80.0,90.0,36.1,12.4,264000.0,37.35,4.365,28.36,13.2,86.155,3960.0,7900.0,1200.0,0.0,1.0,600.0,5.9
75%,69.0,90.0,22.0,130.0,80.0,96.925,36.6,13.4,336500.0,40.4,4.738,29.503,14.0,88.845,7670.0,8400.0,1900.0,1.0,2.0,800.0,14.95
max,95.0,144.0,42.0,190.0,114.0,137.0,39.0,15.8,594000.0,49.4,5.58,39.69,24.3,119.47,20230.0,9400.0,5000.0,1.0,12.0,1500.0,45.0
skewness,0.106,0.638,2.292,1.005,-0.834,-1.515,0.433,-0.633,0.756,-0.225,-0.46,-2.534,3.104,0.868,1.12,-1.054,1.349,0.271,4.78,0.71,1.401
kurtosis,-0.71,1.481,8.755,3.532,6.367,0.985,3.105,0.868,0.146,-0.18,-0.041,8.313,16.207,6.423,0.74,1.311,2.35,-2.571,34.795,0.59,1.471


Categoria: HOSPSAOFRANCISCO_MOGIGUACU


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,124.0,124.0,124.0,124.0,124.0,16.0,124.0,120.0,121.0,0.0,120.0,104.0,104.0,104.0,119.0,0.0,117.0,51.0,53.0,119.0,120.0
mean,53.306,83.621,20.121,130.25,81.887,92.438,36.573,13.048,233661.157,,4.512,30.946,15.0,83.501,152.015,,211.828,17.745,114.679,428.738,64.457
std,15.528,12.407,2.987,15.644,10.894,12.479,0.74,2.686,83439.954,,1.097,7.968,9.911,15.579,1210.643,,335.631,18.136,165.559,287.281,73.352
min,26.0,51.0,13.0,97.0,54.0,65.0,34.9,1.5,80000.0,,2.1,13.3,11.3,8.15,2.52,,1.0,0.0,0.0,1.01,0.4
25%,41.0,77.0,18.0,120.0,80.0,87.25,36.1,12.075,170000.0,,4.022,29.2,12.9,83.6,6.155,,1.352,10.0,20.0,196.0,7.8
50%,50.5,82.0,19.5,130.0,80.0,93.5,36.4,13.55,224000.0,,4.565,30.3,13.35,87.3,8.27,,2.05,10.0,80.0,400.0,37.25
75%,64.0,91.0,21.0,140.0,90.0,99.5,37.0,14.7,286000.0,,4.9,31.5,14.0,90.575,10.705,,506.0,20.0,130.0,669.0,99.15
max,89.0,120.0,36.0,188.0,120.0,117.0,39.2,17.8,484000.0,,13.9,88.1,86.1,98.5,12460.0,,1380.0,110.0,905.0,1358.0,310.8
skewness,0.522,0.352,2.957,0.7,0.02,-0.537,0.867,-1.775,0.637,,5.063,5.781,6.946,-3.141,9.46,,1.293,3.679,3.041,0.366,1.453
kurtosis,-0.381,0.969,12.346,1.473,0.685,1.112,1.231,4.722,-0.046,,44.964,39.133,47.935,10.358,93.688,,0.442,15.37,10.665,-0.479,1.605


Categoria: PELOTAS


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,91.0,89.0,86.0,86.0,86.0,3.0,82.0,90.0,90.0,90.0,0.0,90.0,89.0,90.0,89.0,0.0,88.0,90.0,0.0,89.0,88.0
mean,60.659,83.719,21.849,128.151,76.14,105.667,36.522,12.189,218055.556,36.577,,29.139,13.689,87.72,1728.921,,1411.364,0.022,,448.315,128.68
std,16.055,17.075,4.697,21.211,14.817,10.214,1.248,2.479,99171.439,7.081,,2.301,2.127,6.07,4656.026,,1486.292,0.148,,228.191,98.943
min,20.0,52.0,12.0,85.0,44.0,94.0,33.6,6.6,15000.0,19.3,,22.1,11.0,73.8,172.0,,100.0,0.0,,100.0,1.3
25%,52.0,71.0,18.0,112.0,66.5,102.0,35.7,10.2,145750.0,31.425,,28.1,12.4,84.775,684.0,,600.0,0.0,,300.0,45.975
50%,65.0,82.0,21.0,124.0,76.5,110.0,36.3,12.6,196500.0,37.7,,29.35,13.2,86.55,930.0,,1050.0,0.0,,400.0,109.25
75%,71.5,94.0,24.0,140.0,80.75,111.5,37.5,13.8,282750.0,40.375,,30.3,14.0,90.9,1372.0,,1725.0,0.0,,600.0,189.3
max,89.0,156.0,38.0,200.0,120.0,113.0,39.2,17.5,504000.0,57.4,,35.5,23.9,110.6,40704.0,,9400.0,1.0,,1100.0,377.9
skewness,-0.43,0.895,1.046,0.739,0.516,-1.565,0.2,-0.398,0.672,-0.051,,-0.431,2.247,0.677,7.475,,3.605,6.593,,0.568,0.72
kurtosis,-0.318,2.398,1.791,1.028,0.261,,-0.256,-0.462,0.311,0.545,,1.972,6.577,1.702,59.3,,16.833,42.408,,-0.023,-0.439


Categoria: PERNAMBUCO_FULL


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,112.0,112.0,103.0,111.0,111.0,0.0,80.0,112.0,101.0,109.0,101.0,70.0,73.0,74.0,106.0,53.0,46.0,60.0,60.0,56.0,63.0
mean,56.991,90.893,22.738,126.55,78.117,,37.33,11.108,250437.505,34.748,3.851,29.625,14.171,89.381,10483.123,5785.094,1140.915,19.05,190.085,577.771,19.981
std,17.359,15.166,5.336,21.009,15.622,,0.982,2.582,118927.823,7.331,0.631,2.908,2.212,9.466,8190.874,3931.886,1076.505,37.993,634.709,395.951,35.548
min,19.0,60.0,14.0,82.0,45.0,,35.2,3.5,188.0,10.6,2.51,21.0,11.2,63.3,73.0,0.9,1.0,0.0,0.0,0.3,0.6
25%,44.75,80.0,18.0,114.0,66.0,,36.5,9.425,181000.0,30.5,3.47,28.1,12.7,85.6,6285.0,2960.0,662.5,0.0,7.5,261.5,6.6
50%,58.0,90.0,22.0,120.0,80.0,,37.2,11.2,246000.0,35.5,3.7,29.8,13.2,89.55,9170.0,5343.0,810.0,2.0,60.0,567.0,12.0
75%,69.25,100.25,27.0,140.0,90.0,,38.2,13.025,296000.0,39.0,4.2,31.6,15.0,94.0,13550.0,8200.0,1100.5,12.25,126.0,812.5,20.45
max,95.0,125.0,39.0,180.0,120.0,,39.2,15.9,686000.0,48.0,5.72,37.8,22.3,118.7,77580.0,14540.0,5800.0,139.0,4450.0,1580.0,261.0
skewness,0.115,0.174,0.751,0.364,0.219,,0.069,-0.509,0.995,-0.713,0.572,-0.882,1.308,-0.314,5.308,0.51,2.954,2.219,5.919,0.37,5.525
kurtosis,-0.544,-0.715,0.101,0.007,-0.277,,-1.048,0.185,2.008,0.856,0.256,2.343,1.5,2.151,42.621,-0.435,10.177,3.693,37.142,-0.494,35.27


Categoria: RIO_DE_JANEIRO_UNIMED


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,449.0,433.0,407.0,429.0,429.0,429.0,421.0,420.0,420.0,97.0,420.0,420.0,420.0,420.0,420.0,420.0,390.0,390.0,390.0,390.0,417.0
mean,66.165,83.748,20.263,130.753,77.159,95.531,36.337,12.742,209400.0,30.931,4.256,30.019,12.781,91.614,7633.095,5497.595,1245.797,0.009,0.082,438.205,14.41
std,17.043,15.184,4.616,20.556,13.039,13.925,1.173,2.031,90804.792,7.616,0.677,2.132,1.753,5.503,4715.66,3794.408,716.446,0.016,0.122,272.843,11.918
min,22.0,49.0,5.0,54.0,31.0,37.0,21.4,5.0,16000.0,15.1,1.68,20.9,9.6,67.7,600.0,30.0,284.0,0.0,0.0,30.0,0.5
25%,55.0,73.0,18.0,118.0,69.0,87.0,36.0,11.7,149000.0,25.6,3.908,28.875,11.4,88.4,4700.0,3077.5,795.25,0.0,0.0,260.0,5.0
50%,68.0,82.0,20.0,130.0,77.0,95.0,36.4,12.9,198000.0,30.3,4.305,30.25,12.4,91.65,6400.0,4415.0,1052.5,0.0,0.06,380.0,12.9
75%,80.0,92.0,21.0,142.0,86.0,104.0,36.7,14.1,258500.0,37.2,4.652,31.2,13.825,94.7,8725.0,6705.0,1513.25,0.01,0.11,570.0,20.5
max,97.0,168.0,63.0,195.0,127.0,144.0,38.8,18.1,823000.0,51.3,6.03,36.2,19.9,108.2,36100.0,25120.0,6318.0,0.18,1.58,2520.0,66.9
skewness,-0.476,1.082,3.253,0.157,0.109,0.026,-7.33,-0.593,1.398,0.206,-0.581,-0.551,0.863,-0.041,2.273,1.917,2.398,5.008,6.3,2.228,1.113
kurtosis,-0.486,3.525,22.43,0.615,0.995,1.084,86.101,0.957,5.503,-0.511,1.217,1.412,0.734,1.262,7.192,4.743,9.861,42.87,65.036,10.611,1.263


Categoria: HRL_2021_02


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,539.0,532.0,504.0,534.0,533.0,532.0,526.0,517.0,516.0,516.0,511.0,515.0,512.0,516.0,514.0,11.0,513.0,521.0,519.0,510.0,425.0
mean,57.985,88.267,24.387,128.204,76.82,93.257,36.853,13.169,241.072,40.231,4.392,30.041,13.649,91.633,11.0,2241.511,157.518,3.191,5.013,210.1,138.229
std,15.781,21.06,8.155,24.791,15.809,17.208,14.795,2.392,95.068,7.704,0.722,4.435,1.934,12.251,8.607,4074.253,398.443,18.62,28.498,411.057,108.432
min,19.0,20.0,8.0,13.8,10.0,30.0,16.5,3.3,12.0,4.2,0.93,10.0,9.7,9.07,1.4,0.0,0.0,0.0,0.0,0.0,0.4
25%,46.0,75.0,19.0,113.25,68.0,83.0,35.8,11.9,176.75,36.075,4.015,27.99,12.7,85.975,6.9,3.96,4.7,0.0,0.0,4.0,57.0
50%,57.0,87.0,22.0,128.0,78.0,94.0,36.2,13.4,231.5,41.0,4.47,30.16,13.6,91.8,9.8,77.0,12.5,0.0,0.0,9.85,108.2
75%,68.0,99.0,29.0,140.75,85.0,103.0,36.7,14.7,290.0,44.825,4.83,32.27,14.4,97.268,12.895,2105.75,25.1,0.0,0.0,275.0,193.3
max,99.0,233.0,95.0,242.0,131.0,166.0,372.0,30.6,660.0,77.0,7.51,86.4,33.0,201.4,134.0,10120.0,4698.0,240.0,372.0,4600.0,641.6
skewness,0.136,0.911,1.85,0.231,-0.277,0.203,22.241,0.029,0.824,-0.275,-0.586,3.279,3.818,1.511,8.325,1.614,5.209,9.008,8.222,4.536,1.195
kurtosis,-0.534,4.336,10.636,2.306,2.028,1.781,504.422,6.22,1.465,4.164,2.187,52.312,35.393,31.356,103.493,1.051,42.919,96.464,80.189,33.99,1.646


Categoria: HMV_POA_02


Unnamed: 0,age,heart_rate,resp_rate,sys_press,dias_press,mean_press,temp,hemoglobin,platelets,hematocrit,red_cells_count,hcm,rdw,mcv,leukocytes,neutrophil,lymphocytes,basophils,eosinophils,monocytes,crp
count,456.0,54.0,54.0,54.0,54.0,380.0,53.0,456.0,456.0,456.0,0.0,53.0,53.0,53.0,456.0,455.0,456.0,455.0,455.0,455.0,420.0
mean,60.114,86.778,19.944,131.185,74.278,13.286,36.17,13.398,196635.789,39.262,,30.498,13.674,88.423,7471.897,5045.565,1191.513,16.637,35.187,607.622,6.658
std,17.929,20.344,3.708,20.042,11.931,33.088,5.137,1.934,83172.262,5.059,,1.533,1.008,4.104,13590.283,3300.365,811.233,18.331,67.657,364.06,7.069
min,19.0,50.0,0.0,100.0,49.0,0.0,0.0,6.7,120.0,20.5,,24.0,12.3,73.9,4.86,320.0,0.0,0.0,0.0,20.0,0.03
25%,45.75,73.75,19.0,116.25,64.25,0.0,36.1,12.2,143000.0,36.3,,29.8,12.8,86.4,4477.5,2740.0,777.5,10.0,0.0,360.0,1.538
50%,60.0,84.5,20.0,125.5,74.0,0.0,36.8,13.6,182500.0,39.8,,30.6,13.5,88.8,5955.0,4120.0,1035.0,10.0,10.0,550.0,3.885
75%,74.0,96.0,20.0,139.75,83.0,0.0,37.4,14.7,231000.0,42.7,,31.3,14.3,91.4,8162.5,6305.0,1410.0,20.0,40.0,750.0,9.598
max,105.0,186.0,30.0,193.0,101.0,127.0,39.3,18.1,769000.0,52.3,,33.7,17.5,97.8,287000.0,26120.0,8140.0,160.0,770.0,3320.0,36.51
skewness,-0.031,2.278,-2.301,0.913,0.221,2.166,-6.96,-0.569,1.809,-0.548,,-1.308,1.087,-0.597,19.229,1.953,3.765,3.952,4.695,1.966,1.655
kurtosis,-0.813,9.727,16.451,0.664,-0.646,2.93,49.885,0.377,7.042,0.607,,5.511,2.509,1.929,395.577,5.78,21.716,21.611,35.603,8.426,2.668


In [None]:
#!pip install openpyxl

## Chamando os metadados para comparar entre Local Training vs Outra estratégia

In [18]:
metadados = pd.read_csv("metadados_csv.csv", delimiter=";")

In [19]:
metadados.shape

(14, 24)

In [20]:
metadados.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 24 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ORDEM                14 non-null     int64  
 1   Hospital_name        14 non-null     object 
 2   Hospital             14 non-null     object 
 3   Region_paper         14 non-null     int64  
 4   n                    14 non-null     int64  
 5   Best_for_all         14 non-null     int64  
 6   prop_ICU             14 non-null     float64
 7   shannon_ICU          14 non-null     float64
 8   prop_MV              14 non-null     float64
 9   shannon_MV           14 non-null     float64
 10  prop_death           14 non-null     float64
 11  shannon_death        14 non-null     float64
 12  skewness_mean        14 non-null     float64
 13  std_skewness         14 non-null     float64
 14  kurtosis_mean        14 non-null     float64
 15  std_kurtosis         14 non-null     float

In [21]:
to_metadata_analisys_ICU = ['n', 'prop_missing', 'prop_ICU', 'shannon_ICU', 'cv_mean', 'skewness_mean', 'std_skewness', 'kurtosis_mean', 'std_kurtosis']
to_metadata_analisys_MV = ['n', 'prop_missing', 'prop_MV', 'shannon_MV', 'cv_mean', 'skewness_mean', 'std_skewness', 'kurtosis_mean', 'std_kurtosis']
to_metadata_analisys_death = ['n', 'prop_missing', 'prop_death', 'shannon_death', 'cv_mean', 'skewness_mean', 'std_skewness', 'kurtosis_mean', 'std_kurtosis']

### Best for ICU - Local Training vs Other

In [22]:
median_by_strategy_ICU = metadados.groupby('Best_ICU')[to_metadata_analisys_ICU].median().transpose()
IQR_by_strategy_ICU = (metadados.groupby('Best_ICU')[to_metadata_analisys_ICU].quantile(0.75) - metadados.groupby('Best_ICU')[to_metadata_analisys_ICU].quantile(0.25)).transpose()

In [23]:
median_by_strategy_ICU_formatted = median_by_strategy_ICU.applymap(lambda x: "{:.2f}".format(x))
print(median_by_strategy_ICU_formatted)

Best_ICU           0       1
n              73.00  449.00
prop_missing    0.08    0.22
prop_ICU        0.51    0.30
shannon_ICU     0.68    0.61
cv_mean         0.55    0.57
skewness_mean   1.16    1.67
std_skewness    1.70    3.10
kurtosis_mean   4.67   15.04
std_kurtosis   13.24   24.69


In [24]:
IQR_by_strategy_ICU_formatted = IQR_by_strategy_ICU.applymap(lambda x: "{:.2f}".format(x))
print(IQR_by_strategy_ICU_formatted)

Best_ICU           0       1
n              22.00  556.00
prop_missing    0.10    0.13
prop_ICU        0.15    0.41
shannon_ICU     0.05    0.12
cv_mean         0.18    0.33
skewness_mean   0.29    0.92
std_skewness    0.60    2.00
kurtosis_mean   2.72   19.73
std_kurtosis    6.63   67.29


In [25]:
# Separando os arrays a serem comparados pelo teste de Mann-Withney
metadados_localtraining_ICU, metadados_other_ICU = metadados[metadados["Best_ICU"] == 1], metadados[metadados["Best_ICU"] == 0]

In [27]:
metadados_localtraining_ICU.shape

(11, 24)

In [28]:
metadados_other_ICU.shape

(3, 24)

In [29]:
# Wilcoxon para duas amostras independentes – ManWhitney
from scipy.stats import mannwhitneyu

stat_n_ICU, p_n_ICU                         = mannwhitneyu(metadados_localtraining_ICU['n'], metadados_other_ICU['n'])

stat_prop_missing_ICU, p_prop_missing_ICU   = mannwhitneyu(metadados_localtraining_ICU['prop_missing'], metadados_other_ICU['prop_missing'])

stat_prop_ICU, p_prop_ICU                   = mannwhitneyu(metadados_localtraining_ICU['prop_ICU'], metadados_other_ICU['prop_ICU'])

stat_shannon_ICU, p_shannon_ICU             = mannwhitneyu(metadados_localtraining_ICU['shannon_ICU'], metadados_other_ICU['shannon_ICU'])

stat_cv_mean_ICU, p_cv_mean_ICU             = mannwhitneyu(metadados_localtraining_ICU['cv_mean'], metadados_other_ICU['cv_mean'])

stat_skewness_mean_ICU, p_skewness_mean_ICU = mannwhitneyu(metadados_localtraining_ICU['skewness_mean'], metadados_other_ICU['skewness_mean'])

stat_std_skewness_ICU, p_std_skewness_ICU   = mannwhitneyu(metadados_localtraining_ICU['std_skewness'], metadados_other_ICU['std_skewness'])

stat_kurtosis_mean_ICU, p_kurtosis_mean_ICU = mannwhitneyu(metadados_localtraining_ICU['kurtosis_mean'], metadados_other_ICU['kurtosis_mean'])

stat_std_kurtosis_ICU, p_std_kurtosis_ICU   = mannwhitneyu(metadados_localtraining_ICU['std_kurtosis'], metadados_other_ICU['std_kurtosis'])

In [30]:
print(np.round(p_n_ICU,3))

print(np.round(p_prop_missing_ICU,3))

print(np.round(p_prop_ICU,3))

print(np.round(p_shannon_ICU,3))

print(np.round(p_cv_mean_ICU,3))

print(np.round(p_skewness_mean_ICU,3))

print(np.round(p_std_skewness_ICU,3))

print(np.round(p_kurtosis_mean_ICU,3))

print(np.round(p_std_kurtosis_ICU,3))

0.022
0.126
0.368
0.17
0.885
0.368
0.06
0.06
0.088


### Best for MV - Local Training vs Other

In [31]:
median_by_strategy_MV = metadados.groupby('Best_MV')[to_metadata_analisys_MV].median().transpose()
IQR_by_strategy_MV = (metadados.groupby('Best_MV')[to_metadata_analisys_MV].quantile(0.75) - metadados.groupby('Best_MV')[to_metadata_analisys_MV].quantile(0.25)).transpose()

In [32]:
median_by_strategy_MV_formatted = median_by_strategy_MV.applymap(lambda x: "{:.2f}".format(x))
print(median_by_strategy_MV_formatted)

Best_MV            0       1
n              82.00  348.00
prop_missing    0.08    0.22
prop_MV         0.50    0.19
shannon_MV      0.68    0.48
cv_mean         0.66    0.54
skewness_mean   1.22    1.46
std_skewness    2.03    2.86
kurtosis_mean   6.26   13.86
std_kurtosis   15.11   24.04


In [33]:
IQR_by_strategy_MV_formatted = IQR_by_strategy_MV.applymap(lambda x: "{:.2f}".format(x))
print(IQR_by_strategy_MV_formatted)

Best_MV             0       1
n              136.50  617.75
prop_missing     0.06    0.12
prop_MV          0.15    0.18
shannon_MV       0.01    0.20
cv_mean          0.34    0.21
skewness_mean    0.77    0.91
std_skewness     1.52    1.46
kurtosis_mean   13.91   19.04
std_kurtosis    29.24   60.15


In [34]:
# Separando os arrays a serem comparados pelo teste de Mann-Withney
metadados_localtraining_MV, metadados_other_MV = metadados[metadados["Best_MV"] == 1], metadados[metadados["Best_MV"] == 0]

In [35]:
metadados_localtraining_MV.shape

(10, 24)

In [36]:
metadados_other_MV.shape

(4, 24)

In [37]:
# Wilcoxon para duas amostras independentes – ManWhitney
from scipy.stats import mannwhitneyu

stat_n_MV, p_n_MV                         = mannwhitneyu(metadados_localtraining_MV['n'], metadados_other_MV['n'])

stat_prop_missing_MV, p_prop_missing_MV   = mannwhitneyu(metadados_localtraining_MV['prop_missing'], metadados_other_MV['prop_missing'])

stat_prop_MV, p_prop_MV                   = mannwhitneyu(metadados_localtraining_MV['prop_MV'], metadados_other_MV['prop_MV'])

stat_shannon_MV, p_shannon_MV             = mannwhitneyu(metadados_localtraining_MV['shannon_MV'], metadados_other_MV['shannon_MV'])

stat_cv_mean_MV, p_cv_mean_MV             = mannwhitneyu(metadados_localtraining_MV['cv_mean'], metadados_other_MV['cv_mean'])

stat_skewness_mean_MV, p_skewness_mean_MV = mannwhitneyu(metadados_localtraining_MV['skewness_mean'], metadados_other_MV['skewness_mean'])

stat_std_skewness_MV, p_std_skewness_MV   = mannwhitneyu(metadados_localtraining_MV['std_skewness'], metadados_other_MV['std_skewness'])

stat_kurtosis_mean_MV, p_kurtosis_mean_MV = mannwhitneyu(metadados_localtraining_MV['kurtosis_mean'], metadados_other_MV['kurtosis_mean'])

stat_std_kurtosis_MV, p_std_kurtosis_MV   = mannwhitneyu(metadados_localtraining_MV['std_kurtosis'], metadados_other_MV['std_kurtosis'])

In [39]:
print(np.round(p_n_MV,3))

print(np.round(p_prop_missing_MV,3))

print(np.round(p_prop_MV,3))

print(np.round(p_shannon_MV,3))

print(np.round(p_cv_mean_MV,3))

print(np.round(p_skewness_mean_MV,3))

print(np.round(p_std_skewness_MV,3))

print(np.round(p_kurtosis_mean_MV,3))

print(np.round(p_std_kurtosis_MV,3))

0.142
0.054
0.024
0.106
0.539
0.945
0.374
0.374
0.454


### Best for Death - Local Training vs Other

In [40]:
median_by_strategy_death = metadados.groupby('Best_Death')[to_metadata_analisys_death].median().transpose()
IQR_by_strategy_death = (metadados.groupby('Best_Death')[to_metadata_analisys_death].quantile(0.75) - metadados.groupby('Best_Death')[to_metadata_analisys_death].quantile(0.25)).transpose()

In [41]:
median_by_strategy_death_formatted = median_by_strategy_death.applymap(lambda x: "{:.2f}".format(x))
print(median_by_strategy_death_formatted)

Best_Death         0       1
n              91.00  449.00
prop_missing    0.09    0.22
prop_death      0.34    0.14
shannon_death   0.64    0.40
cv_mean         0.55    0.57
skewness_mean   1.16    1.67
std_skewness    1.70    3.10
kurtosis_mean   4.67   15.04
std_kurtosis   13.24   24.69


In [42]:
IQR_by_strategy_death_formatted = IQR_by_strategy_death.applymap(lambda x: "{:.2f}".format(x))
print(IQR_by_strategy_death_formatted)

Best_Death          0       1
n              174.00  721.00
prop_missing     0.14    0.13
prop_death       0.09    0.13
shannon_death    0.06    0.21
cv_mean          0.37    0.21
skewness_mean    0.59    0.77
std_skewness     0.77    1.25
kurtosis_mean    3.39   17.40
std_kurtosis     8.84   63.48


In [43]:
# Separando os arrays a serem comparados pelo teste de Mann-Withney
metadados_localtraining_death, metadados_other_death = metadados[metadados["Best_Death"] == 1], metadados[metadados["Best_Death"] == 0]

In [44]:
metadados_localtraining_death.shape

(9, 24)

In [45]:
metadados_other_death.shape

(5, 24)

In [46]:
# Wilcoxon para duas amostras independentes – ManWhitney
from scipy.stats import mannwhitneyu

stat_n_death, p_n_death                         = mannwhitneyu(metadados_localtraining_death['n'], metadados_other_death['n'])

stat_prop_missing_death, p_prop_missing_death   = mannwhitneyu(metadados_localtraining_death['prop_missing'], metadados_other_death['prop_missing'])

stat_prop_death, p_prop_death                   = mannwhitneyu(metadados_localtraining_death['prop_death'], metadados_other_death['prop_death'])

stat_shannon_death, p_shannon_death             = mannwhitneyu(metadados_localtraining_death['shannon_death'], metadados_other_death['shannon_death'])

stat_cv_mean_death, p_cv_mean_death             = mannwhitneyu(metadados_localtraining_death['cv_mean'], metadados_other_death['cv_mean'])

stat_skewness_mean_death, p_skewness_mean_death = mannwhitneyu(metadados_localtraining_death['skewness_mean'], metadados_other_death['skewness_mean'])

stat_std_skewness_death, p_std_skewness_death   = mannwhitneyu(metadados_localtraining_death['std_skewness'], metadados_other_death['std_skewness'])

stat_kurtosis_mean_death, p_kurtosis_mean_death = mannwhitneyu(metadados_localtraining_death['kurtosis_mean'], metadados_other_death['kurtosis_mean'])

stat_std_kurtosis_death, p_std_kurtosis_death   = mannwhitneyu(metadados_localtraining_death['std_kurtosis'], metadados_other_death['std_kurtosis'])

In [47]:
print(np.round(p_n_death,3))

print(np.round(p_prop_missing_death,3))

print(np.round(p_prop_death,3))

print(np.round(p_shannon_death,3))

print(np.round(p_cv_mean_death,3))

print(np.round(p_skewness_mean_death,3))

print(np.round(p_std_skewness_death,3))

print(np.round(p_kurtosis_mean_death,3))

print(np.round(p_std_kurtosis_death,3))

0.19
0.19
0.06
0.06
0.898
0.364
0.112
0.112
0.147
