In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import re
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', None)
%matplotlib inline


In [2]:
# import files
df_inp = pd.read_pickle("./hospiedata/hospice_input_table.pkl")
df_out = pd.read_pickle("./hospiedata/hospice_outcome.pkl")

In [3]:
# split and clean columns
tmp = df_inp["Author_Insert"].str.split(" - ", expand=True)
df_inp["Author_Name"] = tmp[0]
df_inp["Author_ID"] = tmp[1]

In [4]:
# drop columns
df_inp = df_inp.drop(["Author_Insert"], axis=1)

In [5]:
col_names = [' N/A', 'Unnamed: 0_x', 'IDANAMNESI_INF', 'IDEHR', 'Author_Name', 'Author_ID',
       'TimeStamp_Insert', 'IDAccess', 'EHRType', 'PatientID',
       'IDDigitalSignDocument', 'Non_Rilevabile_x', 'Note_Non_Rilevabile_x',
       'nutritional', 'cognitivo_percettivo', 'sonno_riposo', 'perc_salute',
       'elimination', 'Perception', 'rapporti_fam', 'persone_vicine',
       'Caregiver', 'Religion', 'Note_Elim_urinaria', 'Unnamed: 0_y',
       'IDANAMNESI_MED', 'Non_Rilevabile_y', 'Note_Non_Rilevabile_y',
       'opt_consapevolezza', 'diagnosis', 'Unnamed: 0_x.1',
       'IDANAMNESI_SOCIALE', 'Patient', 'FamigliaAltro', 'Paziente_T',
       'FamigliaAltro_T', 'Non_Rilevabile_x.1', 'Note_Non_Rilevabile_x.1',
       'opt_Problemi', 'Note_I', 'ds_note_timori', 'chk_contr_sintomi',
       'chk_competenza', 'opt_paziente_a', 'opt_famiglia_a', 'opt_adeguatezza',
       'ds_note_ad', 'opt_paziente_solo', 'ds_note_con',
       'opt_presente_assente', 'Presenza_minori', 'Caregiver_principale',
       'opt_capacita', 'ds_familiari_coinv', 'opt_necessario', 'opt_presente',
       'opt_risorse_ec', 'opt_paziente_psi', 'opt_Ins_vol', 'ds_note_prio',
       'opt_paziente_ad', 'opt_caregiver_ad', 'opt_esenzione',
       'opt_inv_civile', 'invalidita_perc', 'ds_codice_es', 'Needs',
       'Domestic partnership', 'Fragility', 'opt_disponibilita_f',
       'opt_indennita_acc', 'opt_legge', 'opt_famiglia_psi',
       'opt_disponibilit_paz', 'Unnamed: 0_x.2', 'IDDIAGNOSI_CROSSOU',
       'Non_Rilevabile_x.2', 'ds_ICD', 'dt_Data_diagnosi', 'Unnamed: 0_y.1',
       'IDDIAGNOSI_ICD', 'Non_Rilevabile_y.1', 'Note_Non_Rilevabile_y.1',
       'I_ICD', 'II_ICD', 'III_ICD', 'IV_ICD', 'V_ICD', 'VI_ICD', 'I_Anno',
       'II_Anno', 'III_Anno', 'IV_Anno', 'They go', 'I_Mese']
df_inp = df_inp[col_names]
df_inp.head()

Unnamed: 0,N/A,Unnamed: 0_x,IDANAMNESI_INF,IDEHR,Author_Name,Author_ID,TimeStamp_Insert,IDAccess,EHRType,PatientID,...,III_ICD,IV_ICD,V_ICD,VI_ICD,I_Anno,II_Anno,III_Anno,IV_Anno,They go,I_Mese
0,0,7.0,3.0,1030,ROJAS H. NOELIA I.,RJSNSB81E57Z611R,2015-01-15 13:45:43.950,,EHR,96,...,,,,,,,,,,
1,1,8.0,4.0,1034,MANENTI ELENA,MNNLNE78E63A794M,2015-01-15 14:46:30.080,,EHR,100,...,,,,,,,,,,
2,2,13.0,6.0,1037,DE OLD ROSELLA,DVCRLL64B64F205S,2015-01-19 12:31:16.060,,EHR,103,...,,,,,,,,,,
3,3,14.0,7.0,1260,ESPINOZA C. JULIO C.,SPNJCS71M24Z611L,2015-01-19 12:32:21.630,41.0,AMB,101,...,,,,,,,,,,
4,4,15.0,8.0,44,ESPINOZA C. JULIO C.,SPNJCS71M24Z611L,2015-01-19 13:13:54.630,45.0,AMB,15,...,,,,,,,,,,


In [6]:
df_inp['nutritional'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16898
1,nausea # 0,377
2,"# 0 nausea, emesis # 1",197
3,NR,138
4,emesis # 1,84


In [7]:
df_tmp = df_inp['nutritional']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.replace('NR', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['nutritional'] = df_upd
df_upd.head(15)

Unnamed: 0,nutritional
0,
1,
2,
3,
4,
5,
6,
7,0
8,0;1
9,


In [8]:
df_inp['nutritional'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,17038
1,0,377
2,0;1,199
3,1,84
4,2,68


In [9]:
df_inp['cognitivo_percettivo'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15318
1,uncontrolled pain # 0,906
2,ideo-motor slowdown # 4,271
3,NR,138
4,uncontrolled pain # 0; slowdown ideo-motor # 4,116


In [10]:
# use regex to capture relevant information
df_tmp = df_inp['cognitivo_percettivo']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.replace('NR', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['cognitivo_percettivo'] = df_upd
df_upd.head(20)

Unnamed: 0,cognitivo_percettivo
0,0
1,
2,0
3,0
4,
5,1;2
6,
7,0;3;4
8,
9,0


In [11]:
df_inp['cognitivo_percettivo'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15457
1,0,906
2,4,271
3,0;4,116
4,6,110


In [12]:
df_inp['sonno_riposo'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15616
1,daytime sleepiness # 1,1204
2,Insomnia # 0,854
3,"Insomnia # 0, # 1 daytime sleepiness",170
4,NR,138
5,Insomnia # 0; \r daytime sleepiness # 1\r\n,3
6,,1


In [13]:
df_tmp = df_inp['sonno_riposo']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.replace('NR', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['sonno_riposo'] = df_upd
df_inp['sonno_riposo'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15755
1,1,1204
2,0,854
3,0;1,173


In [14]:
df_inp['perc_salute'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14115
1,perdit√ † Performance # 0; increased dell'affa...,408
2,perdit√ † performance # 0; perdit√ † weight # ...,402
3,perdit√ † Performance # 0,368
4,perdit√ † Performance # 0; perdit√ weight † # ...,297


In [15]:
df_tmp = df_inp['perc_salute']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.replace('NR', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['perc_salute'] = df_upd
df_inp['perc_salute'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14253
1,0;2;3,411
2,0;1;2;3,403
3,0,368
4,0;1;2;3;4,297


In [16]:
df_inp['elimination'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16559
1,constipated bowel # 1,1086
2,alvo accelerated # 0,188
3,NR,138
4,alvo accelerated # 0; constipated bowel # 1,13
5,,2


In [17]:
df_tmp = df_inp['elimination']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.replace('NR', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['elimination'] = df_upd
df_inp['elimination'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16699
1,1,1086
2,0,188
3,0;1,13


In [18]:
df_inp['Perception'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14905
1,concern for health # 0,704
2,Apathy # 1,273
3,NR,138
4,concern for health # 0; apathy # 1,85


In [19]:
df_tmp = df_inp['Perception']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.replace('NR', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['Perception'] = df_upd
df_inp['Perception'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15043
1,0,704
2,1,273
3,0;1,85
4,0;4,71


In [20]:
df_inp['rapporti_fam'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14714
1,is # 0,2964
2,NR,138
3,no # 1,113
4,na # 2,57


In [21]:
df_tmp = df_inp['rapporti_fam']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.replace('NR', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['rapporti_fam'] = df_upd
df_inp['rapporti_fam'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14852
1,0.0,2964
2,1.0,113
3,2.0,57


In [22]:
df_inp['persone_vicine'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16853
1,NR,138
2,son,79
3,sons,46
4,caregiver,39


In [23]:
df_tmp = df_inp['persone_vicine']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.replace('NR', np.nan)

df_upd = df_tmp.to_frame()
df_inp['persone_vicine'] = df_upd
df_inp['persone_vicine'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16991
1,son,79
2,sons,46
3,caregiver,39
4,daughter,39


In [24]:
df_inp['Caregiver'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14613
1,wife,437
2,daughter,239
3,NR,138
4,husband,134


In [25]:
df_tmp = df_inp['Caregiver']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.replace('NR', np.nan)
df_tmp = df_tmp.replace('(daughter)')
# df_upd = df_tmp.to_frame()
# df_upd.head()
df_tmp.value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()
# df_inp['Caregiver'] = df_upd
# df_inp['Caregiver'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14751
1,wife,437
2,daughter,239
3,husband,134
4,son,101


In [26]:
df_inp['Religion'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15403
1,Catholic # 0,2391
2,NR,138
3,agnostic # 1,25
4,jew # 3,15
5,Muslim # 4,9
6,Buddhist # 5,3
7,Protestant # 2,2


In [27]:
df_tmp = df_inp['Religion']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['Religion'] = df_upd
df_inp['Religion'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15403
1,0.0,2391
2,,138
3,1.0,25
4,3.0,15
5,4.0,9
6,5.0,3
7,2.0,2


In [28]:
df_inp['Note_Elim_urinaria'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16813
1,NR,125
2,diuresis active,102
3,continent,44
4,Regular,30


In [29]:
df_inp['Unnamed: 0_y'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,13176
1,2364.0,1
2,2368.0,1
3,12169.0,1
4,10563.0,1


In [30]:
df_inp['IDANAMNESI_MED'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,13176
1,3646.0,1
2,3648.0,1
3,3649.0,1
4,3650.0,1


In [31]:
df_inp['Non_Rilevabile_y'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,13176
1,0.0,4809
2,,1


In [32]:
df_inp['Non_Rilevabile_y'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,13176
1,0.0,4809
2,,1


In [33]:
df_inp['Note_Non_Rilevabile_y'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,13176
1,NR,4809
2,,1


In [34]:
df_inp['opt_consapevolezza'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16036
1,Awareness of diagnosis but no prognosis # 2,636
2,Total absence of diagnosis and prognosis aware...,392
3,Awareness of diagnosis and prognosis overestim...,324
4,Full Awareness of diagnosis and prognosis # 5,305
5,There are elements of evaluation # 7,190
6,Awareness uncertain despite the topic emerged ...,81
7,Awareness of terminalit√ † but not the diagnos...,21
8,,1


In [35]:
df_tmp = df_inp['opt_consapevolezza']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_consapevolezza'] = df_upd
df_inp['opt_consapevolezza'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16037
1,2.0,636
2,1.0,392
3,3.0,324
4,5.0,305
5,7.0,190
6,6.0,81
7,4.0,21


In [36]:
df_inp['diagnosis'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,13214
1,1993 asportazione nodulo mammario sx positivo ...,7
2,2007 diagnosi di adenocarcinoma della prostata...,7
3,Adenocarcinoma of the sigma (03/2015) mts lymp...,6
4,"paziente affetta da secondarismi polmonari, li...",5


In [37]:
df_inp['Unnamed: 0_x'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,13909
1,2567.0,1
2,1953.0,1
3,1235.0,1
4,2221.0,1


In [38]:
df_inp['IDANAMNESI_SOCIALE'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14693
1,1363.0,1
2,1379.0,1
3,1375.0,1
4,1374.0,1


In [39]:
df_inp['Patient'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14721
1,Si#1,2467
2,No#0,695
3,Parziale#2,102
4,,1


In [40]:
df_tmp = df_inp['Patient']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['Patient'] = df_upd
df_inp['Patient'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14722
1,1.0,2467
2,0.0,695
3,2.0,102


In [41]:
df_inp['FamigliaAltro'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14769
1,Si#1,3182
2,No#0,24
3,Parziale#2,10
4,,1


In [43]:
df_tmp = df_inp['FamigliaAltro']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['FamigliaAltro'] = df_upd
df_inp['FamigliaAltro'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14770
1,1.0,3182
2,0.0,24
3,2.0,10


In [44]:
df_inp['Paziente_T'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14723
1,No#0,2050
2,Parziale#2,620
3,Si#1,592
4,,1


In [45]:
df_tmp = df_inp['Paziente_T']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['Paziente_T'] = df_upd
df_inp['Paziente_T'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14724
1,0.0,2050
2,2.0,620
3,1.0,592


In [46]:
df_inp['FamigliaAltro_T'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14774
1,Si#1,2820
2,Parziale#2,234
3,No#0,157
4,,1


In [47]:
df_tmp = df_inp['FamigliaAltro_T']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['FamigliaAltro_T'] = df_upd
df_inp['FamigliaAltro_T'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14775
1,1.0,2820
2,2.0,234
3,0.0,157


In [48]:
df_inp['Non_Rilevabile_x'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,13909
1,0.0,3939
2,1.0,138


In [49]:
df_inp['Note_Non_Rilevabile_x'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,13909
1,NR,3939
2,pcs stupor,2
3,pcs confused and very slow.,1
4,soporosa patient. Care giver her son Carlo Alb...,1


In [50]:
df_inp['opt_Problemi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14775
1,No#0,2260
2,Si#1,950
3,,1


In [51]:
df_tmp = df_inp['opt_Problemi']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_Problemi'] = df_upd
df_inp['opt_Problemi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14776
1,0.0,2260
2,1.0,950


In [52]:
df_inp['Note_I'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15876
1,"La paziente conosce la sua situazione clinica,...",4
2,La pz sa della diagnosi ma non della prognosi ...,3
3,"Paziente e moglie consapevoli della diagnosi, ...",3
4,La pz non sa nulla della sua situazione clinic...,3


In [53]:
df_inp['ds_note_timori'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16636
1,La famiglia vorrebbe il controllo dei sintomi,6
2,La famiglia vorrebbe il controllo dei sintomi.,5
3,La figlia vorrebbe il controllo dei sintomi in...,4
4,La famiglia vorrebbe il controllo dei sintomi:...,3


In [54]:
df_inp['chk_contr_sintomi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15157
1,controllo sintomi#0,2828
2,,1


In [55]:
df_tmp = df_inp['chk_contr_sintomi']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['chk_contr_sintomi'] = df_upd
df_inp['chk_contr_sintomi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15158
1,0.0,2828


In [56]:
df_inp['chk_competenza'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16901
1,competenza/capacit√† assistenziale caregiver#0,1084
2,,1


In [57]:
df_tmp = df_inp['chk_competenza']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['chk_competenza'] = df_upd
df_inp['chk_competenza'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16902
1,0.0,1084


In [58]:
df_inp['opt_paziente_a'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14723
1,Indefinite#2,2060
2,Congruenti#1,977
3,Sovradimensionate#0,225
4,,1


In [59]:
df_tmp = df_inp['opt_paziente_a']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_paziente_a'] = df_upd
df_inp['opt_paziente_a'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14724
1,2.0,2060
2,1.0,977
3,0.0,225


In [60]:
df_inp['opt_famiglia_a'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14774
1,Congruenti#1,2964
2,Indefinite#2,170
3,Sovradimensionate#0,77
4,,1


In [61]:
df_tmp = df_inp['opt_famiglia_a']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_famiglia_a'] = df_upd
df_inp['opt_famiglia_a'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14775
1,1.0,2964
2,2.0,170
3,0.0,77


In [62]:
df_inp['opt_adeguatezza'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14736
1,Si#1,1571
2,Da valutare#2,1269
3,No#0,409
4,,1


In [63]:
df_tmp = df_inp['opt_adeguatezza']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_adeguatezza'] = df_upd
df_inp['opt_adeguatezza'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14737
1,1.0,1571
2,2.0,1269
3,0.0,409


In [64]:
df_inp['ds_note_ad'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16640
1,Attualmente vive con la figlia che si √® tempo...,5
2,"l'unico care-giver √® il figlio, in difficolt√...",4
3,La figlia ed il genero si stanno organizzando ...,3
4,Da valutare la tenuta emotiva della famiglia.,3


In [65]:
df_inp['opt_paziente_solo'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14733
1,No#0,2667
2,Si#1,585
3,,1


In [66]:
df_tmp = df_inp['opt_paziente_solo']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_paziente_solo'] = df_upd
df_inp['opt_paziente_solo'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14734
1,0.0,2667
2,1.0,585


In [67]:
df_inp['ds_note_con'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15507
1,Il pz vive con la coniuge,12
2,La pz vive con il coniuge,10
3,Il pz. vive con la moglie,5
4,"Vive con la figlia Cezarina di 24 aa, che si √...",4


In [68]:
df_inp['opt_presente_assente'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14782
1,Presente#1,2970
2,Assente#0,228
3,Ostacolante#2,4
4,,1
5,"inoltre ha chiarimento affermato """"vorrei che ...",1


In [69]:
df_tmp = df_inp['opt_presente_assente']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_presente_assente'] = df_upd
df_inp['opt_presente_assente'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14783
1,1.0,2970
2,0.0,228
3,2.0,4
4,,1


In [70]:
df_inp['Presenza_minori'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15284
1,No#0,2515
2,Si#1,185
3,,1
4,Presente#1,1


In [71]:
df_tmp = df_inp['Presenza_minori']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['Presenza_minori'] = df_upd
df_inp['Presenza_minori'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15285
1,0.0,2515
2,1.0,186


In [72]:
df_inp['Caregiver_principale'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14934
1,spouse,274
2,wife,250
3,caregiver,207
4,daughter,128


In [73]:
df_inp['opt_capacita'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15404
1,Incrementabile#1,1396
2,Adeguato#0,737
3,Non incrementabile#2,447
4,,1
5,Caregiver,1


In [74]:
df_tmp = df_inp['opt_capacita']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_capacita'] = df_upd
df_inp['opt_capacita'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15405
1,1.0,1396
2,0.0,737
3,2.0,447
4,,1


In [75]:
df_inp['ds_familiari_coinv'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16023
1,sons,179
2,daughter,104
3,son,66
4,daughters,56


In [76]:
df_inp['opt_necessario'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15435
1,Si#1,1432
2,No#0,1117
3,,1
4,Figlia e genero,1


In [77]:
df_tmp = df_inp['opt_necessario']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_necessario'] = df_upd
df_inp['opt_necessario'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15436
1,1.0,1432
2,0.0,1117
3,,1


In [78]:
df_inp['opt_presente'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15803
1,No#0,1499
2,Si#1,683
3,,1


In [79]:
df_tmp = df_inp['opt_presente']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_presente'] = df_upd
df_inp['opt_presente'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15804
1,0.0,1499
2,1.0,683


In [80]:
df_inp['opt_risorse_ec'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14872
1,Adeguate#1,2402
2,Da valutare#2,608
3,Non adeguate#0,102
4,,1
5,La badante √® necessaria ma √® gi√† presente s...,1


In [81]:
df_tmp = df_inp['opt_risorse_ec']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_risorse_ec'] = df_upd
df_inp['opt_risorse_ec'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,14873
1,1.0,2402
2,2.0,608
3,0.0,102
4,24.0,1


In [82]:
df_inp['opt_paziente_psi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15592
1,No#0,2273
2,Si#1,120
3,,1


In [83]:
df_tmp = df_inp['opt_paziente_psi']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_paziente_psi'] = df_upd
df_inp['opt_paziente_psi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15593
1,0.0,2273
2,1.0,120


In [84]:
df_inp['opt_Ins_vol'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15610
1,No#0,2094
2,Si#1,280
3,Non √® necessario un aiuto da un punto di vist...,1
4,,1


In [85]:
df_tmp = df_inp['opt_Ins_vol']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_Ins_vol'] = df_upd
df_inp['opt_Ins_vol'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15611
1,0.0,2094
2,1.0,280
3,,1


In [86]:
df_inp['ds_note_prio'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16638
1,Il bisogno espresso √® a livello clinico assis...,107
2,Il bisogno espresso √® a livello clinico assis...,9
3,Il bisogno espresso √® a livello clinico/assis...,5
4,Visto l'attuale grado di autonomia e il progra...,4


In [87]:
df_inp['opt_paziente_ad'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15825
1,Totale#2,1315
2,Parziale#1,674
3,Problematica#0,170
4,,1
5,Il bisogno espresso √® a livello clinico/assis...,1


In [88]:
df_tmp = df_inp['opt_paziente_ad']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_paziente_ad'] = df_upd
df_inp['opt_paziente_ad'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15826
1,2.0,1315
2,1.0,674
3,0.0,170
4,,1


In [89]:
df_inp['opt_caregiver_ad'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15847
1,Totale#2,2066
2,Parziale#1,54
3,Problematica#0,18
4,,1


In [90]:
df_tmp = df_inp['opt_caregiver_ad']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_caregiver_ad'] = df_upd
df_inp['opt_caregiver_ad'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15848
1,2.0,2066
2,1.0,54
3,0.0,18


In [91]:
df_inp['opt_esenzione'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16014
1,Si#1,1672
2,No#0,299
3,,1


In [92]:
df_tmp = df_inp['opt_esenzione']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_esenzione'] = df_upd
df_inp['opt_esenzione'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16015
1,1.0,1672
2,0.0,299


In [93]:
df_inp['opt_inv_civile'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15194
1,Si#1,1359
2,No#0,953
3,in fase di accertamento#2,479
4,,1


In [94]:
df_tmp = df_inp['opt_inv_civile']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_inv_civile'] = df_upd
df_inp['opt_inv_civile'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15195
1,1.0,1359
2,0.0,953
3,2.0,479


In [95]:
df_inp['invalidita_perc'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16714
1,100.0,1205
2,70.0,23
3,80.0,15
4,75.0,8


In [96]:
df_inp['ds_codice_es'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16337
1,48,1251
2,IC14,105
3,E01,92
4,IC13,53


In [97]:
df_inp['Needs'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14767
1,Clinici#0,2392
2,Clinici#0;Sociali#1,560
3,Clinici#0;Psicologici#2,171
4,Clinici#0;Sociali#1;Psicologici#2,51


In [98]:
df_tmp = df_inp['Needs']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['Needs'] = df_upd
df_inp['Needs'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14768
1,0,2392
2,0;1,560
3,0;2,171
4,0;1;2,51


In [99]:
df_inp['Domestic partnership'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15271
1,Coniuge/Convivente#0,1321
2,Badante#1,358
3,Coniuge/Convivente#0;Figli#2,268
4,Figli#2,257


In [100]:
df_tmp = df_inp['Domestic partnership']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['Domestic partnership'] = df_upd
df_inp['Domestic partnership'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15272
1,0,1322
2,1,358
3,0;2,268
4,2,257


In [101]:
df_inp['Fragility'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15433
1,sovraccarico assistenziale#4,1094
2,nessuna#0,1003
3,fisica#1,172
4,psico-fisica#3,165
5,psichica#2,117
6,,1
7,Badante#1,1


In [102]:
df_tmp = df_inp['Fragility']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['Fragility'] = df_upd
df_inp['Fragility'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15434
1,4.0,1094
2,0.0,1003
3,1.0,173
4,3.0,165
5,2.0,117


In [103]:
df_inp['opt_disponibilita_f'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16053
1,Si#1,775
2,Da verificare#2,689
3,No#0,468
4,,1


In [104]:
df_tmp = df_inp['opt_disponibilita_f']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_disponibilita_f'] = df_upd
df_inp['opt_disponibilita_f'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16054
1,1.0,775
2,2.0,689
3,0.0,468


In [105]:
df_inp['opt_indennita_acc'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15863
1,No#0,1212
2,Si#1,606
3,in fase di accertamento#2,304
4,,1


In [106]:
df_tmp = df_inp['opt_indennita_acc']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_indennita_acc'] = df_upd
df_inp['opt_indennita_acc'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15864
1,0.0,1212
2,1.0,606
3,2.0,304


In [107]:
df_inp['opt_legge'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16302
1,No#0,1248
2,Si#1,281
3,in fase di accertamento#2,154
4,,1


In [108]:
df_tmp = df_inp['opt_legge']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_legge'] = df_upd
df_inp['opt_legge'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16303
1,0.0,1248
2,1.0,281
3,2.0,154


In [109]:
df_inp['opt_famiglia_psi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15581
1,No#0,2035
2,S√¨#1,369
3,,1


In [110]:
df_tmp = df_inp['opt_famiglia_psi']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_famiglia_psi'] = df_upd
df_inp['opt_famiglia_psi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,15582
1,0.0,2035
2,1.0,369


In [111]:
df_inp['opt_disponibilit_paz'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16051
1,Da verificare#2,824
2,No#0,580
3,Si#1,530
4,,1


In [112]:
df_tmp = df_inp['opt_disponibilit_paz']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_upd = df_tmp.to_frame()
df_inp['opt_disponibilit_paz'] = df_upd
df_inp['opt_disponibilit_paz'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16052
1,2.0,824
2,0.0,580
3,1.0,530


In [113]:
df_inp['Unnamed: 0_x'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,13909
1,2567.0,1
2,1953.0,1
3,1235.0,1
4,2221.0,1


In [114]:
df_inp['IDDIAGNOSI_CROSSOU'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15761
1,739.0,1
2,745.0,1
3,744.0,1
4,743.0,1


In [115]:
df_inp['Non_Rilevabile_x'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,13909
1,0.0,3939
2,1.0,138


In [116]:
df_inp['ds_ICD'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15833
1,V667 - Trattamento per cure palliative#2402=0,177
2,V667 Trattamento per cure palliative,128
3,V604 - Mancanza di un familiare capace di pres...,106
4,1970 - Tumori maligni secondari del polmone#21...,72


In [117]:
df_inp['dt_Data_diagnosi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,17390
1,2014-12-31 00:00:00,18
2,2015-01-15 00:00:00,15
3,2014-11-07 00:00:00,14
4,2014-09-01 00:00:00,11


In [118]:
df_inp['Unnamed: 0_y'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,13176
1,2364.0,1
2,2368.0,1
3,12169.0,1
4,10563.0,1


In [119]:
df_inp['IDDIAGNOSI_ICD'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14439
1,1177.0,1
2,1188.0,1
3,1187.0,1
4,1186.0,1


In [120]:
df_inp['Non_Rilevabile_y'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,13176
1,0.0,4809
2,,1


In [121]:
df_inp['Note_Non_Rilevabile_y'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,13176
1,NR,4809
2,,1


In [122]:
df_inp['I_ICD'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14469
1,1629 - Tumori maligni del bronco o polmone - n...,187
2,185 - Tumori maligni della prostata#2112,113
3,1550 - Tumori maligni primitivi del fegato#2048,109
4,1570 - Tumori maligni della testa del pancreas...,90


In [123]:
df_inp['II_ICD'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14482
1,1970 - Tumori maligni secondari del polmone#2148,305
2,1977 - Tumori maligni secondari del fegato - s...,277
3,1985 - Tumori maligni secondari di osso e mido...,239
4,1962 - Tumori maligni secondari e non specific...,166


In [124]:
df_inp['III_ICD'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,14813
1,1970 - Tumori maligni secondari del polmone#2148,218
2,1985 - Tumori maligni secondari di osso e mido...,151
3,1977 - Tumori maligni secondari del fegato - s...,145
4,4011 - Ipertensione essenziale benigna#2333,143


In [125]:
df_inp['IV_ICD'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15344
1,4011 - Ipertensione essenziale benigna#2333,121
2,1970 - Tumori maligni secondari del polmone#2148,97
3,V667 - Cure palliative#2402,92
4,1985 - Tumori maligni secondari di osso e mido...,83


In [126]:
df_inp['V_ICD'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16107
1,V667 - Cure palliative#2402,128
2,V667 - Cure palliative#2402=0,76
3,4011 - Ipertensione essenziale benigna#2333,66
4,V667 - Trattamento per cure palliative#2402=0,61


In [127]:
df_inp['VI_ICD'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16808
1,V667 - Cure palliative#2402=0,71
2,V667 - Cure palliative#2402,66
3,4011 - Ipertensione essenziale benigna#2333,61
4,V604 - Mancanza di un familiare capace di pres...,40


In [128]:
df_inp['I_Anno'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15352
1,2015#55,688
2,2016#56,653
3,2014#54,330
4,2013#53,197


In [129]:
df_tmp = df_inp['I_Anno']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_tmp = df_tmp.str.extract('([0-9]+)', expand=True)
df_upd = df_tmp
df_inp['I_Anno'] = df_upd
df_inp['I_Anno'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,15353
1,2015.0,688
2,2016.0,653
3,2014.0,330
4,2013.0,197


In [130]:
df_inp['II_Anno'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16285
1,2016#56,542
2,2015#55,446
3,2014#54,176
4,2017#57,155


In [131]:
df_tmp = df_inp['II_Anno']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_tmp = df_tmp.str.extract('([0-9]+)', expand=True)
df_upd = df_tmp
df_inp['II_Anno'] = df_upd
df_inp['II_Anno'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16286
1,2016.0,542
2,2015.0,446
3,2014.0,176
4,2017.0,155


In [132]:
df_inp['III_Anno'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16702
1,2016#56,471
2,2015#55,312
3,2017#57,146
4,2014#54,102


In [133]:
df_tmp = df_inp['III_Anno']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_tmp = df_tmp.str.extract('([0-9]+)', expand=True)
df_upd = df_tmp
df_inp['III_Anno'] = df_upd
df_inp['III_Anno'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,16703
1,2016.0,471
2,2015.0,312
3,2017.0,146
4,2014.0,102


In [134]:
df_inp['IV_Anno'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,17050
1,2016#56,354
2,2015#55,198
3,2017#57,114
4,2014#54,67


In [135]:
df_tmp = df_inp['IV_Anno']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_tmp = df_tmp.str.extract('([0-9]+)', expand=True)
df_upd = df_tmp
df_inp['IV_Anno'] = df_upd
df_inp['IV_Anno'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,17051
1,2016.0,354
2,2015.0,198
3,2017.0,114
4,2014.0,67


In [136]:
df_inp['They go'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,17371
1,2016#56,249
2,2015#55,111
3,2017#57,84
4,2014#54,37


In [137]:
df_tmp = df_inp['They go']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_tmp = df_tmp.str.extract('([0-9]+)', expand=True)
df_upd = df_tmp
df_inp['They go'] = df_upd
df_inp['They go'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,17372
1,2016.0,249
2,2015.0,111
3,2017.0,84
4,2014.0,37


In [138]:
df_inp['I_Mese'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16453
1,02#02,168
2,01#01,140
3,11#11,139
4,03#03,138
5,10#10,135
6,05#05,130
7,12#12,127
8,06#06,118
9,07#07,115


In [139]:
df_tmp = df_inp['I_Mese']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_tmp = df_tmp.str.extract('([0-9]+)', expand=True)
df_upd = df_tmp
df_inp['I_Mese'] = df_upd
df_inp['I_Mese'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,16454
1,2.0,168
2,1.0,140
3,11.0,139
4,3.0,138
5,10.0,135
6,5.0,130
7,12.0,127
8,6.0,118
9,7.0,115


In [140]:
df_inp.to_pickle("./hospiedata/input_clean.pkl")

In [141]:
df_out.columns

Index(['IDDecesso', 'IDEHR', 'Author_Insert', 'TimeStamp_Insert', 'IDAccess',
       'EHRType', 'PatientID', 'IDDigitalSignDocument', 'Not detectable',
       'Note_Non_Rilevabile', 'Date', 'Now', 'Luogo_decesso', 'Note',
       'coupling', 'IDPAI_VIDAS', 'opt_problem', 'opt_obiettivo', 'ds_note',
       'opt_stato_problema', 'opt_interventi'],
      dtype='object')

In [142]:
tmp = df_out["Author_Insert"].str.split(" - ", expand=True)
df_out["Author_Name"] = tmp[0]
df_out["Author_ID"] = tmp[1]
df_out = df_out.drop(["Author_Insert"], axis=1)

In [143]:
col_names = ['IDDecesso', 'IDEHR', 'Author_Name', 'Author_ID', 'TimeStamp_Insert', 'IDAccess',
       'EHRType', 'PatientID', 'IDDigitalSignDocument', 'Not detectable',
       'Note_Non_Rilevabile', 'Date', 'Now', 'Luogo_decesso', 'Note',
       'coupling', 'IDPAI_VIDAS', 'opt_problem', 'opt_obiettivo', 'ds_note',
       'opt_stato_problema', 'opt_interventi']
df_out = df_out[col_names]
df_out.head()

Unnamed: 0,IDDecesso,IDEHR,Author_Name,Author_ID,TimeStamp_Insert,IDAccess,EHRType,PatientID,IDDigitalSignDocument,Not detectable,...,Now,Luogo_decesso,Note,coupling,IDPAI_VIDAS,opt_problem,opt_obiettivo,ds_note,opt_stato_problema,opt_interventi
0,1.0,4,Lonati Jade Carla,LNTGCR68S56F205H,2015-01-10 14:33:33,,EHR,59,1243,0.0,...,16:15:00,Vidas Hospice # 1,,,,,,,,
1,2.0,1022,Lonati Jade Carla,LNTGCR68S56F205H,2015-01-11 08:57:13,,EHR,90,1379,0.0,...,00:15:00,Vidas Hospice # 1,,,,,,,,
2,3.0,16,Visconti Giovanna,VSCGNN70T70F205E,2015-01-15 09:05:32,,EHR,71,3124,0.0,...,01:45:00,Vidas Hospice # 1,,,,,,,,
3,4.0,1025,Visconti Giovanna,VSCGNN70T70F205E,2015-01-15 09:08:39,,EHR,91,3125,0.0,...,06:44:00,Vidas Hospice # 1,,,,,,,,
4,5.0,6,Calamida Fabrizio,CLMFRZ71S19F205R,2015-01-20 15:23:36,,EHR,61,5897,0.0,...,14:50:00,Vidas Hospice # 1,,,,,,,,


In [144]:
df_out['Not detectable'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,105583
1,0.0,2253


In [145]:
df_out['Note_Non_Rilevabile'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,105583
1,NR,2253


In [146]:
df_out['Now'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,106243
1,06:00:00,34
2,07:30:00,18
3,03:00:00,18
4,18:30:00,18


In [147]:
df_out['Luogo_decesso'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,105604
1,# 2 Domicile,1242
2,Vidas Hospice # 1,944
3,Hospital # 3,29
4,PS # 6,14
5,Other hospice # 5,3


In [148]:
df_tmp = df_out['Luogo_decesso']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_tmp = df_tmp.str.extract('([0-9]+)', expand=True)
df_upd = df_tmp
df_out['Luogo_decesso'] = df_upd
df_out['Luogo_decesso'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,105604
1,2.0,1242
2,1.0,944
3,3.0,29
4,6.0,14
5,5.0,3


In [149]:
df_out['Note'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,107398
1,it is found in death.,11
2,Drawn to legal documents.,6
3,filling out death certificates,6
4,compiled certification,5


In [150]:
df_out['coupling'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,,107819
1,reached # 0,17


In [151]:
df_out['opt_problem'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,Impaired mobilit√ † / limitation of physical m...,28410
1,Alteration of comfort associated with chronic ...,19534
2,Alteration or risk of impairment of lung funct...,9999
3,Deficit in the care of s√® # 25,7226
4,Alteration hive # 33,7204


In [152]:
df_tmp = df_out['opt_problem']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_tmp = df_tmp.str.extract('([0-9]+)', expand=True)
df_upd = df_tmp
df_out['opt_problem'] = df_upd
df_out['opt_problem'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts')

Unnamed: 0,unique_values,counts
0,27.0,29897
1,29.0,20597
2,26.0,10632
3,25.0,9068
4,33.0,7430
5,,5556
6,34.0,5551
7,31.0,4269
8,30.0,3869
9,37.0,3372


In [153]:
df_out['opt_obiettivo'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,The patient riferir√ † ¬ † a satisfactory pain...,16727
1,The patient manterr√ † ¬ † ¬ † † mobilit√ the ...,11533
2,,5448
3,The patient does not presenter√ † ¬ † symptoms...,4862
4,The patient utilizzer√ † ¬ † aids designed to ...,4474


In [154]:
df_tmp = df_out['opt_obiettivo']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_tmp = df_tmp.str.extract('([0-9]+)', expand=True)
df_upd = df_tmp
df_out['opt_obiettivo'] = df_upd
df_out['opt_obiettivo'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,56.0,17674
1,49.0,11816
2,47.0,9876
3,,7029
4,45.0,6946


In [155]:
df_out['ds_note'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,81310
1,the patient does not presenter√ † ¬ † alterati...,1550
2,in monitoring,1318
3,to be monitored,788
4,Patient died.,271


In [156]:
df_out['opt_stato_problema'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,Open Problem # 1,88313
1,closed Problem # 2,11322
2,,7923
3,in monitoring,27
4,Pz resigned.,18


In [157]:
df_out['opt_interventi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,,6643
1,PAI Implementation - Help the patient favoring...,4657
2,PAI Implementation - therapeutic upgrading # 441,3619
3,Implementation PAI - Evaluate given mobilit√ †...,2104
4,PAI Implementation - Evaluate given mobilit√ †...,1850


In [158]:
df_tmp = df_out['opt_interventi']
df_tmp = df_tmp.replace(' N/A', np.nan)
df_tmp = df_tmp.str.findall('([0-9]+)')
df_tmp = df_tmp.apply(lambda x: x if isinstance(x, float) else ';'.join(x))
df_tmp = df_tmp.str.extract('([0-9]+)', expand=True)
df_upd = df_tmp
df_out['opt_interventi'] = df_upd
df_out['opt_interventi'].value_counts(dropna=False).rename_axis('unique_values').reset_index(name='counts').head()

Unnamed: 0,unique_values,counts
0,441.0,10179
1,,6643
2,369.0,5624
3,368.0,5565
4,292.0,3841


In [159]:
df_out.head()

Unnamed: 0,IDDecesso,IDEHR,Author_Name,Author_ID,TimeStamp_Insert,IDAccess,EHRType,PatientID,IDDigitalSignDocument,Not detectable,...,Now,Luogo_decesso,Note,coupling,IDPAI_VIDAS,opt_problem,opt_obiettivo,ds_note,opt_stato_problema,opt_interventi
0,1.0,4,Lonati Jade Carla,LNTGCR68S56F205H,2015-01-10 14:33:33,,EHR,59,1243,0.0,...,16:15:00,1,,,,,,,,
1,2.0,1022,Lonati Jade Carla,LNTGCR68S56F205H,2015-01-11 08:57:13,,EHR,90,1379,0.0,...,00:15:00,1,,,,,,,,
2,3.0,16,Visconti Giovanna,VSCGNN70T70F205E,2015-01-15 09:05:32,,EHR,71,3124,0.0,...,01:45:00,1,,,,,,,,
3,4.0,1025,Visconti Giovanna,VSCGNN70T70F205E,2015-01-15 09:08:39,,EHR,91,3125,0.0,...,06:44:00,1,,,,,,,,
4,5.0,6,Calamida Fabrizio,CLMFRZ71S19F205R,2015-01-20 15:23:36,,EHR,61,5897,0.0,...,14:50:00,1,,,,,,,,


In [160]:
df_out.to_pickle("./hospiedata/outcome_clean.pkl")