In [1]:
from utils import get_path, group_cols
from datatable import dt, f

# Descrição

Neste notebook iremos formar a tabela deslocamento, com origem e destino.

# Tabelas base

## Procedimentos

### Colunas da tabela

In [2]:
cols_deslc = {
  'ano': 'ano',
  'parto_normal': 'parto_normal',
  'hosp_cod_municipio': 'hosp_cod_municipio',
  'hosp_regiao_saude': 'hosp_regiao_saude',
  'cod_municipio': 'cod_municipio',
  'regiao_saude': 'regiao_saude',
  'count': 'count',
  ...: None,
  # 'cnes',
  # 'idade'
}

### Linhas da tabela

In [3]:
def filter_deslc(df):
  df_filtered = df[
    (
      f.ano == 2010
    ) | (
      f.ano == 2011
    ) | (
      f.ano == 2018
    ) | (
      f.ano == 2019
    ), :]
  return df_filtered

### Tabela carregada

In [4]:
path_deslc = get_path('GESTANTES', 'procedimentos.csv.gzip')
df_deslc = dt.fread(path_deslc, columns=cols_deslc)
df_deslc = filter_deslc(df_deslc)
df_deslc

Unnamed: 0_level_0,ano,parto_normal,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count
Unnamed: 0_level_1,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,2010,1,355030,3501,355030,3501,362
1,2010,1,355030,3501,355030,3501,333
2,2010,1,355030,3501,355030,3501,331
3,2010,1,355030,3501,355030,3501,327
4,2010,1,355030,3501,355030,3501,318
5,2010,1,355030,3501,355030,3501,303
6,2010,1,355030,3501,355030,3501,297
7,2010,1,355030,3501,355030,3501,296
8,2010,1,355030,3501,355030,3501,295
9,2010,1,355030,3501,355030,3501,292


### Atualizando variável ano (biênio)

Valor da variável `bienio`:

- `0 (False) = 2010/11`
- `1 (True) = 2018/19`

In [5]:
df_deslc[:, dt.update(ano = (f.ano == 2018) | (f.ano == 2019))]
df_deslc.names = {'ano': 'bienio'}
df_deslc

Unnamed: 0_level_0,bienio,parto_normal,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,0,1,355030,3501,355030,3501,362
1,0,1,355030,3501,355030,3501,333
2,0,1,355030,3501,355030,3501,331
3,0,1,355030,3501,355030,3501,327
4,0,1,355030,3501,355030,3501,318
5,0,1,355030,3501,355030,3501,303
6,0,1,355030,3501,355030,3501,297
7,0,1,355030,3501,355030,3501,296
8,0,1,355030,3501,355030,3501,295
9,0,1,355030,3501,355030,3501,292


### Tabela agrupada

In [6]:
df_deslc = group_cols(df_deslc, df_deslc.names[:-1])
df_deslc = df_deslc.sort(f.bienio, -f.count)
df_deslc

Unnamed: 0_level_0,bienio,parto_normal,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,1,355030,3501,355030,3501,2368
1,0,0,355030,3501,355030,3501,2020
2,0,1,330455,3305,330455,3305,1210
3,0,0,330455,3305,330455,3305,872
4,0,0,230440,2301,230440,2301,727
5,0,1,230440,2301,230440,2301,717
6,0,1,150140,1501,150140,1501,639
7,0,0,150140,1501,150140,1501,634
8,0,0,520870,5201,520870,5201,621
9,0,1,130260,1301,130260,1301,616


- with all: `3_355_691` rows
- without age: `461_276` rows
- without age and cnes: `98_055` rows

# Exportando tabela

In [7]:
output_deslc = get_path('GESTANTES', 'deslocamentos.csv.gzip')
df_deslc.to_csv(output_deslc)

In [8]:
dt.fread(output_deslc)

Unnamed: 0_level_0,bienio,parto_normal,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,0,1,355030,3501,355030,3501,2368
1,0,0,355030,3501,355030,3501,2020
2,0,1,330455,3305,330455,3305,1210
3,0,0,330455,3305,330455,3305,872
4,0,0,230440,2301,230440,2301,727
5,0,1,230440,2301,230440,2301,717
6,0,1,150140,1501,150140,1501,639
7,0,0,150140,1501,150140,1501,634
8,0,0,520870,5201,520870,5201,621
9,0,1,130260,1301,130260,1301,616
