In [1]:
import pandas as pd
import numpy as np
import os

## Lendo datasets

In [48]:
fleury = pd.read_csv('../Dados/Fleury/Grupo_Fleury_Dataset_Covid19_Resultados_Exames.csv', sep='|', encoding = "ISO-8859-1")
fleury = fleury[['ID_PACIENTE', 'DT_COLETA', 'DE_ORIGEM', 'DE_EXAME', 
                       'DE_ANALITO', 'DE_RESULTADO', 'CD_UNIDADE', 'DE_VALOR_REFERENCIA']].copy()

In [49]:
fleury['DE_VALOR_REFERENCIA'] = fleury['DE_VALOR_REFERENCIA'].replace(np.nan, 'NaN')
fleury['CD_UNIDADE'] = fleury['CD_UNIDADE'].replace(np.nan, 'NaN') 

In [50]:
fleury.loc[fleury['DE_VALOR_REFERENCIA'].str.contains('\d+', regex=True), 'DE_RESULTADO'] = 'Number'

In [51]:
fleury.head()

Unnamed: 0,ID_PACIENTE,DT_COLETA,DE_ORIGEM,DE_EXAME,DE_ANALITO,DE_RESULTADO,CD_UNIDADE,DE_VALOR_REFERENCIA
0,1AD07C7A1E4D80B608DD7A650766CCF0,19/03/2020,LAB,"NOVO CORONAVÍRUS 2019 (SARS-CoV-2), DETECÇÃO P...","Covid 19, Detecção por PCR",NÃO DETECTADO,,Não detectado
1,1AD07C7A1E4D80B608DD7A650766CCF0,19/03/2020,LAB,"NOVO CORONAVÍRUS 2019 (SARS-CoV-2), DETECÇÃO P...","Covid 19, Material",raspado de nasofaringe,,
2,7B10786D64F27D7732503D915A38DE6F,30/12/2019,LAB,"HEMOGRAMA, sangue total",Hemoglobina,Number,g/dL,"13,5 a 17,5"
3,7B10786D64F27D7732503D915A38DE6F,30/12/2019,LAB,"HEMOGRAMA, sangue total",VCM,Number,fL,"81,0 a 95,0"
4,7B10786D64F27D7732503D915A38DE6F,30/12/2019,LAB,"HEMOGRAMA, sangue total",Leucócitos,Number,/mm3,3.500 a 10.500


In [52]:
def check_float(string):
    try:
        number = float(string)
        return 'Number'
    except:
        return string

In [54]:
fleury['DE_RESULTADO'] = fleury['DE_RESULTADO'].str.replace(',', '.')
fleury['DE_RESULTADO'] = fleury['DE_RESULTADO'].apply(lambda x: check_float(x))

## Vários exames de COVID-19

In [55]:
different_covid_exams = fleury[(fleury['DE_EXAME'].str.contains('COV')) | (fleury['DE_EXAME'].str.contains('Corona')) | (fleury['DE_EXAME'].str.contains('CORONAV'))]['DE_EXAME'].unique()

In [56]:
for exam in different_covid_exams:
    print(exam)

NOVO CORONAVÍRUS 2019 (SARS-CoV-2), DETECÇÃO POR PCR
COVID19, ANTICORPOS IgG, soro
COVID19, ANTICORPOS IgM, soro
COVID19, ANTICORPOS IgA, soro
SARS-COV-2, ANTICORPOS IgG, soro


In [57]:
different_covid_exams.size

5

In [58]:
fleury['count'] = 1

fleury_cov = fleury[fleury['DE_EXAME'].isin(different_covid_exams)].copy()
fleury_cov = fleury_cov.groupby(['DE_EXAME', 'DE_ANALITO', 'DE_RESULTADO', 'CD_UNIDADE', 'DE_VALOR_REFERENCIA']).agg({'count':'sum'})

In [59]:
fleury_cov.to_excel('claudio_covid_exams_num.xlsx')

## Pivot Table 

In [23]:
pivot = full.pivot_table(index=['ID_PACIENTE', 'DT_COLETA'], columns='DE_ANALITO', values='DE_RESULTADO', aggfunc=np.max)
pivot

Unnamed: 0_level_0,DE_ANALITO,183,25 Hidroxi - vitamina D,250H - Vitamina D Total,25OH-Vitamina D Total,ACTH,ALT (TGP),AST (TGO),Adenovirus,AgHBe,AgHBs,...,pH (gasometria venosa),pH venoso,"pH, urina",pO2 (gasometria venosa),pO2 venoso,"Ácido ascorbico, plasma",Ácido fólico,Ácido fólico - Eletroquimioluminescência,Ácido Úrico,"Ácido Úrico, soro"
ID_PACIENTE,DT_COLETA,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
024120776B6797AEBDFC12C0301B1A8F,06/04/2020,,,,,,,,,,,...,,,,,,,,,,
027B6329D6AC1CD1B5DED1DCF87AD204,30/03/2020,,,,,,,,,,,...,,,,,,,,,,
02B32473EF55DFB7EB96C68FA353A606,01/06/2020 00:00:00,,,,,,,,,,,...,,,,,,,,,,
058CA32F0DC54A4D0A52242409D85A79,17/04/2020,,,,,,,,,,,...,,,,,,,,,,
06CBA5955242CD9F383B0E050FF5C21A,09/04/2020 00:00:00,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FC98B7A6A6D62C1EC4FB2A1EB1092C68,21/04/2020,,,,,,,,,,,...,,,,,,,,,,
FCCEB133D939375813ADDCE9931350B6,29/03/2020 00:00:00,,,,,,,,,,,...,,,,,,,,,,
FDC916AB5986E6A1D05C6D674D55AAEA,05/04/2020 00:00:00,,,,,,,,,,,...,,,,,,,,,,
FEA4AEEE2118866F62941EB73850FAD8,14/04/2020,,,,,,,,,,,...,,,,,,,,,,
