In [1]:
import pandas as pd
from utils import get_path, group_cols
from datatable import dt, f, by

# Descrição

Neste notebook iremos formar a tabela deslocamento, com origem e destino.

# Tabelas base

## CNES

In [2]:
cols_cnes = {
  'CNES': 'identificador',
  'Nome Fantasia': 'nome',
}

In [3]:
path_cnes = get_path('LOCALIDADES', 'cnes.csv.gzip')
df_cnes = pd.read_csv(path_cnes, dtype={'CNES': str})
df_cnes = df_cnes.rename(columns=cols_cnes)[cols_cnes.values()]
df_cnes = dt.Frame(df_cnes)
df_cnes

Unnamed: 0_level_0,identificador,nome
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪
0,0002232,HOSPITAL E MATERNIDADE SANTA IZABEL
1,3151794,MATERNIDADE DE REFERENCIA ANA BRAGA
2,2323397,MATERNIDADE DONA EVANGELINA ROSA
3,2311682,HOSPITAL SANTA HELENA
4,0026794,HOSPITAL SOFIA FELDMAN
5,2020068,SES AP HOSPITAL DA MULHER
6,6627595,INSTITUTO DA MULHER DONA LINDU
7,2799278,HOSPITAL INACIA PINTO DOS SANTOS
8,0010480,HOSPITAL REGIONAL DE CEILANDIA
9,2077388,HOSPITAL AMPARO MATERNAL


# Gerando tabela

In [4]:
def get_count_df(path_proc, ref):
  cols_proc = {
    'bienio': 'bienio', 'parto_normal': 'parto_normal',
    ref: 'codigo', 'criticidade': 'criticidade',
    'count': 'count', ...: None}
  df_proc = dt.fread(path_proc, columns=cols_proc)
  df_counts = df_proc[:, dt.sum(f.count), by(df_proc.names[:-1])]
  df_counts = df_counts.sort(f.codigo, f.criticidade, f.parto_normal, f.bienio)#, -f.count_critic)
  df_counts['referencial'] = ref
  cols_order = ['bienio', 'parto_normal', 'codigo', 'referencial', 'criticidade', 'count']
  df_counts = df_counts[:, cols_order]
  return df_counts

In [5]:
path_proc = get_path('GESTANTES', 'procs_bienio.csv.gzip')
refs = ['origem', 'destino']
df_count = dt.rbind(*[get_count_df(path_proc, ref) for ref in refs])
df_count

Unnamed: 0_level_0,bienio,parto_normal,codigo,referencial,criticidade,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,CO/DF/53001/530010,origem,0,20906
1,1,0,CO/DF/53001/530010,origem,0,19949
2,0,1,CO/DF/53001/530010,origem,0,40969
3,1,1,CO/DF/53001/530010,origem,0,33309
4,0,0,CO/DF/53001/530010,origem,3,4
5,1,0,CO/DF/53001/530010,origem,3,41
6,0,1,CO/DF/53001/530010,origem,3,12
7,1,1,CO/DF/53001/530010,origem,3,42
8,0,0,CO/DF/53001/530010,origem,4,10
9,1,0,CO/DF/53001/530010,origem,4,69


In [6]:
# df_proc_total = df_proc[:, f[:].remove(f.criticidade)]
# df_proc_total = df_proc_total[:, dt.sum(f.count), by(df_proc_total.names[:-1])]
# df_proc_total.names = {'count': 'count_total'}
# dfs_critic = list()
# for i in range(5):
#   df_proc_critic = df_proc[f.criticidade == i, :]
#   df_proc_critic = df_proc_critic[:, dt.sum(f.count), by(df_proc_critic.names[:-1])]
#   df_proc_critic.names = {'count': 'count_critic'}
#   dfs_critic.append(df_proc_critic)
# df_critic = dt.rbind(*dfs_critic)
# print(df_critic.names)
# df_critic.key = df_critic.names[:3]
# df_counts = df_proc_total[:, :, dt.join(df_critic)]
# df_counts['count_critic'] = dt.Frame([
#   0 if c == None else c for c in df_counts['count_critic'].to_list()[0]])
# df_counts['referencial'] = ref
# cols_order = ['bienio', 'parto_normal', 'codigo', 'referencial', 'criticidade', 'count_critic', 'count_total']
# df_counts = df_counts[:, cols_order]

## Infos

In [7]:
def rank_locs(df_counts, path_locs, scale):
  ref = 'destino' if scale == 'cnes' else 'origem'
  cols_locs = {'codigo': 'codigo', f'nome_{scale}': 'nome', ...: None}
  df_locs = dt.fread(path_locs, columns=cols_locs)
  if scale == 'cnes':
    df_locs['nome'] = ''
  df_locs.key = 'codigo'
  df_count = df_counts[f.referencial == ref, f[:].remove(f.referencial)]
  df_count['escala'] = scale
  df_count = df_count[:, :, dt.join(df_locs)]
  i = {'cnes': 5, 'municipio': 4, 'regiao_saude': 3, 'uf': 2, 'regiao': 1}[scale]
  codigos = df_count['codigo'].to_list()[0]
  df_codigos = dt.Frame(['/'.join(c.split('/')[:i]) for c in codigos])
  df_count['codigo'] = df_codigos
  df_count['identificador'] = dt.Frame([c.split('/')[-1] for c in df_count['codigo'].to_list()[0]])
  cols = df_count.names
  cols_order = cols[:3] + cols[5:] + cols[3:5]
  df_count = df_count[:, cols_order]
  df_count = df_count[:, [dt.sum(f.count)], by(df_count.names[:-1])]
  # df_count[:, dt.update(critic_percent = f.count_critic / f.count_total * 100)]
  df_count = df_count.sort(f.codigo, f.criticidade, f.parto_normal, f.bienio)
  return df_count

In [8]:
path_locs = get_path('LOCALIDADES', 'localidades.csv.gzip')
scales = ['regiao', 'uf', 'regiao_saude', 'municipio', 'cnes']
df_counts = dt.rbind(*[
  rank_locs(df_count, path_locs, scale) for scale in scales])
df_counts

Unnamed: 0_level_0,bienio,parto_normal,codigo,escala,nome,identificador,criticidade,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,CO,regiao,Centro-Oeste,CO,0,85068
1,1,0,CO,regiao,Centro-Oeste,CO,0,100995
2,0,1,CO,regiao,Centro-Oeste,CO,0,141604
3,1,1,CO,regiao,Centro-Oeste,CO,0,118221
4,0,0,CO,regiao,Centro-Oeste,CO,1,8372
5,1,0,CO,regiao,Centro-Oeste,CO,1,13732
6,0,1,CO,regiao,Centro-Oeste,CO,1,13243
7,1,1,CO,regiao,Centro-Oeste,CO,1,16144
8,0,0,CO,regiao,Centro-Oeste,CO,2,3465
9,1,0,CO,regiao,Centro-Oeste,CO,2,3834


### Nome dos CNES

In [9]:
df_cnes.key = 'identificador'
df_id_cnes = df_counts[f.escala == 'cnes', ['identificador']]
df_id_cnes = df_id_cnes[:, :, dt.join(df_cnes)]
df_id_cnes

Unnamed: 0_level_0,identificador,nome
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪
0,0010464,HRAN
1,0010464,HRAN
2,0010464,HRAN
3,0010464,HRAN
4,0010464,HRAN
5,0010464,HRAN
6,0010464,HRAN
7,0010464,HRAN
8,0010464,HRAN
9,0010464,HRAN


In [10]:
df_counts[f.escala == 'cnes', ['identificador', 'nome']] = df_id_cnes
df_counts

Unnamed: 0_level_0,bienio,parto_normal,codigo,escala,nome,identificador,criticidade,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,CO,regiao,Centro-Oeste,CO,0,85068
1,1,0,CO,regiao,Centro-Oeste,CO,0,100995
2,0,1,CO,regiao,Centro-Oeste,CO,0,141604
3,1,1,CO,regiao,Centro-Oeste,CO,0,118221
4,0,0,CO,regiao,Centro-Oeste,CO,1,8372
5,1,0,CO,regiao,Centro-Oeste,CO,1,13732
6,0,1,CO,regiao,Centro-Oeste,CO,1,13243
7,1,1,CO,regiao,Centro-Oeste,CO,1,16144
8,0,0,CO,regiao,Centro-Oeste,CO,2,3465
9,1,0,CO,regiao,Centro-Oeste,CO,2,3834


### Percent

In [11]:
df_total = df_counts[:, ['bienio', 'parto_normal', 'identificador', 'count']]
df_total = df_total[:, dt.sum(f.count), by(df_total.names[:-1])]
df_total.names = {'count': 'total'}
df_total.key = df_total.names[:-1]
df_counts = df_counts[:, :, dt.join(df_total)]
df_counts[:, dt.update(percent = f.count / f.total * 100)]
df_counts

Unnamed: 0_level_0,bienio,parto_normal,codigo,escala,nome,identificador,criticidade,count,total,percent
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,CO,regiao,Centro-Oeste,CO,0,85068,100035,85.0382
1,1,0,CO,regiao,Centro-Oeste,CO,0,100995,124971,80.8147
2,0,1,CO,regiao,Centro-Oeste,CO,0,141604,167105,84.7395
3,1,1,CO,regiao,Centro-Oeste,CO,0,118221,151310,78.1317
4,0,0,CO,regiao,Centro-Oeste,CO,1,8372,100035,8.36907
5,1,0,CO,regiao,Centro-Oeste,CO,1,13732,124971,10.9881
6,0,1,CO,regiao,Centro-Oeste,CO,1,13243,167105,7.92496
7,1,1,CO,regiao,Centro-Oeste,CO,1,16144,151310,10.6695
8,0,0,CO,regiao,Centro-Oeste,CO,2,3465,100035,3.46379
9,1,0,CO,regiao,Centro-Oeste,CO,2,3834,124971,3.06791


### Ordem colunas

In [12]:
cols_order = [
  'parto_normal',
  'bienio',
  'escala',
  'codigo',
  'identificador',
  'nome',
  'criticidade',
  'count',
  'total',
  'percent',
]

In [13]:
df_counts = df_counts[:, cols_order]
df_counts

Unnamed: 0_level_0,parto_normal,bienio,escala,codigo,identificador,nome,criticidade,count,total,percent
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,regiao,CO,CO,Centro-Oeste,0,85068,100035,85.0382
1,0,1,regiao,CO,CO,Centro-Oeste,0,100995,124971,80.8147
2,1,0,regiao,CO,CO,Centro-Oeste,0,141604,167105,84.7395
3,1,1,regiao,CO,CO,Centro-Oeste,0,118221,151310,78.1317
4,0,0,regiao,CO,CO,Centro-Oeste,1,8372,100035,8.36907
5,0,1,regiao,CO,CO,Centro-Oeste,1,13732,124971,10.9881
6,1,0,regiao,CO,CO,Centro-Oeste,1,13243,167105,7.92496
7,1,1,regiao,CO,CO,Centro-Oeste,1,16144,151310,10.6695
8,0,0,regiao,CO,CO,Centro-Oeste,2,3465,100035,3.46379
9,0,1,regiao,CO,CO,Centro-Oeste,2,3834,124971,3.06791


# Exportando tabela

In [14]:
output_counts = get_path('GESTANTES', 'contagens.csv.gzip')
df_counts.to_csv(output_counts)

In [15]:
dt.fread(output_counts)

Unnamed: 0_level_0,parto_normal,bienio,escala,codigo,identificador,nome,criticidade,count,total,percent
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,regiao,CO,CO,Centro-Oeste,0,85068,100035,85.0382
1,0,1,regiao,CO,CO,Centro-Oeste,0,100995,124971,80.8147
2,1,0,regiao,CO,CO,Centro-Oeste,0,141604,167105,84.7395
3,1,1,regiao,CO,CO,Centro-Oeste,0,118221,151310,78.1317
4,0,0,regiao,CO,CO,Centro-Oeste,1,8372,100035,8.36907
5,0,1,regiao,CO,CO,Centro-Oeste,1,13732,124971,10.9881
6,1,0,regiao,CO,CO,Centro-Oeste,1,13243,167105,7.92496
7,1,1,regiao,CO,CO,Centro-Oeste,1,16144,151310,10.6695
8,0,0,regiao,CO,CO,Centro-Oeste,2,3465,100035,3.46379
9,0,1,regiao,CO,CO,Centro-Oeste,2,3834,124971,3.06791


In [18]:
df_cnt = pd.read_csv(output_counts, dtype={'identificador': str})

In [19]:
df_cnt['escala'].value_counts()

municipio       53557
cnes            31299
regiao_saude     7579
uf                530
regiao            100
Name: escala, dtype: int64

## Brasil (regsau)

In [21]:
df_bra_regsau = df_cnt[df_cnt['escala'].isin({'regiao', 'uf', 'regiao_saude'})]
df_bra_regsau

Unnamed: 0,parto_normal,bienio,escala,codigo,identificador,nome,criticidade,count,total,percent
0,0,0,regiao,CO,CO,Centro-Oeste,0,85068,100035,85.038237
1,0,1,regiao,CO,CO,Centro-Oeste,0,100995,124971,80.814749
2,1,0,regiao,CO,CO,Centro-Oeste,0,141604,167105,84.739535
3,1,1,regiao,CO,CO,Centro-Oeste,0,118221,151310,78.131650
4,0,0,regiao,CO,CO,Centro-Oeste,1,8372,100035,8.369071
...,...,...,...,...,...,...,...,...,...,...
8204,0,0,regiao_saude,SL/SC/42016,42016,Laguna,3,1,1221,0.081900
8205,0,1,regiao_saude,SL/SC/42016,42016,Laguna,3,1,1805,0.055402
8206,0,1,regiao_saude,SL/SC/42016,42016,Laguna,4,2,1805,0.110803
8207,1,0,regiao_saude,SL/SC/42016,42016,Laguna,4,1,1636,0.061125


In [22]:
output_bra_regsau = output_counts.replace('.csv.gzip', '_bra_regsau.xlsx')
df_bra_regsau.to_excel(output_bra_regsau, index=False)

## RJ

In [16]:
df_rj = df_cnt[df_cnt['codigo'].str.contains('/RJ')]
df_rj

Unnamed: 0,parto_normal,bienio,escala,codigo,identificador,nome,criticidade,count,total,percent
530,0,0,uf,SD/RJ,RJ,Rio de Janeiro,0,31475,39323,80.042214
531,0,1,uf,SD/RJ,RJ,Rio de Janeiro,0,64571,82583,78.189216
532,1,0,uf,SD/RJ,RJ,Rio de Janeiro,0,126306,148327,85.153748
533,1,1,uf,SD/RJ,RJ,Rio de Janeiro,0,116113,148010,78.449429
534,0,0,uf,SD/RJ,RJ,Rio de Janeiro,1,7153,39323,18.190372
...,...,...,...,...,...,...,...,...,...,...
84206,0,0,cnes,SD/RJ/33009/330590/3584968,3584968,HOSPITAL FRANCISCO LIMONGI,0,4,4,100.000000
84207,1,0,cnes,SD/RJ/33009/330590/3584968,3584968,HOSPITAL FRANCISCO LIMONGI,0,39,39,100.000000
84208,1,1,cnes,SD/RJ/33009/330590/3584968,3584968,HOSPITAL FRANCISCO LIMONGI,0,6,8,75.000000
84209,1,1,cnes,SD/RJ/33009/330590/3584968,3584968,HOSPITAL FRANCISCO LIMONGI,1,1,8,12.500000


In [17]:
output_rj = output_counts.replace('.csv.gzip', '_rj.xlsx')
df_rj.to_excel(output_rj, index=False)