In [1]:
import pandas as pd
from utils import get_path, group_cols
from datatable import dt, f

# Descrição

Neste notebook iremos formar a tabela deslocamento, com origem e destino.

# Tabelas base

## Tabela Procedimentos

### Colunas da tabela

In [2]:
cols_deslc = {
  'ano': 'ano',
  'parto_normal': 'parto_normal',
  'cod_municipio': 'origem',
  'hosp_cod_municipio': 'destino',
  'cnes': 'hosp_cnes',
  'count': 'count',
  ...: None,
}

### Linhas da tabela

In [3]:
def filter_deslc(df):
  df_filtered = df[
    (
      f.ano == 2010
    ) | (
      f.ano == 2011
    ) | (
      f.ano == 2018
    ) | (
      f.ano == 2019
    ), :]
  return df_filtered

### Carregando tabela

In [4]:
path_deslc = get_path('GESTANTES', 'procedimentos.csv.gzip')
df_deslc = dt.fread(path_deslc, columns=cols_deslc)
df_deslc = filter_deslc(df_deslc)
df_deslc

Unnamed: 0_level_0,ano,parto_normal,hosp_cnes,destino,origem,count
Unnamed: 0_level_1,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,2010,1,2077701,355030,355030,4358
1,2010,1,2077388,355030,355030,4207
2,2010,1,3151794,130260,130260,4145
3,2010,1,10480,530010,530010,4099
4,2010,1,2079186,355030,355030,4059
5,2010,1,3956369,292740,292740,3935
6,2010,1,26794,310620,310620,3797
7,2010,1,2232,280030,280030,3533
8,2010,1,2270609,330455,330455,3386
9,2010,1,2040069,351880,351880,3213


### Atualizando variável ano (biênio)

Valor da variável `bienio`:

- `0 (False) = 2010/11`
- `1 (True) = 2018/19`

In [5]:
df_deslc[:, dt.update(ano = (f.ano == 2018) | (f.ano == 2019))]
df_deslc.names = {'ano': 'bienio'}
df_deslc

Unnamed: 0_level_0,bienio,parto_normal,hosp_cnes,destino,origem,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,0,1,2077701,355030,355030,4358
1,0,1,2077388,355030,355030,4207
2,0,1,3151794,130260,130260,4145
3,0,1,10480,530010,530010,4099
4,0,1,2079186,355030,355030,4059
5,0,1,3956369,292740,292740,3935
6,0,1,26794,310620,310620,3797
7,0,1,2232,280030,280030,3533
8,0,1,2270609,330455,330455,3386
9,0,1,2040069,351880,351880,3213


### Tabela agrupada

In [6]:
df_deslc = group_cols(df_deslc, df_deslc.names[:-1])
df_deslc

Unnamed: 0_level_0,bienio,parto_normal,hosp_cnes,destino,origem,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,27,260290,260290,1
1,0,0,27,260290,260720,1
2,0,0,94,260290,260140,2
3,0,0,94,260290,260290,2
4,0,0,94,260290,260345,2
5,0,0,94,260290,260400,1
6,0,0,94,260290,260520,2
7,0,0,94,260290,260540,2
8,0,0,94,260290,260640,2
9,0,0,94,260290,260680,1


- with all: `3_355_691` rows
- without age: `461_276` rows
- without age and cnes: `98_055` rows

## Tabela Municípios

### Colunas da tabela

In [7]:
cols_mun = {
  'cod_municipio': 'origem',
  'regiao': 'regiao',
  'uf': 'uf',
  'cod_regiao_saude': 'regiao_saude',
  'nome_regiao_saude': 'nome_regiao_saude',
  'municipio': 'municipio',
  'capital': 'capital',
  'latitude': 'latitude',
  'longitude': 'longitude',
  'socioecon': 'socioecon',
  ...: None
}

In [8]:
cols_mun_hosp = {
  'cod_municipio': 'destino',
  'uf': 'hosp_uf',
  'cod_regiao_saude': 'hosp_regiao_saude',
  'latitude': 'hosp_latitude',
  'longitude': 'hosp_longitude',
  ...: None
}

### Carregando tabela

In [9]:
path_mun = get_path('GESTANTES', 'municipios.csv.gzip')
df_mun = dt.fread(path_mun, columns=cols_mun)
df_mun

Unnamed: 0_level_0,origem,capital,regiao_saude,nome_regiao_saude,uf,regiao,socioecon,latitude,longitude
Unnamed: 0_level_1,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,520005,0,52001,Central,GO,Centro-Oeste,5,−16.7573,−49.4412
1,310010,0,31052,Patrocínio Monte Carmelo,MG,Sudeste,2,−18.4831,−47.3916
2,520010,0,52011,Pirineus,GO,Centro-Oeste,3,−16.197,−48.7057
3,310020,0,31067,Sete Lagoas,MG,Sudeste,3,−19.1551,−45.4444
4,150010,0,15011,Tocantins,PA,Norte,1,−1.72183,−48.8788
5,230010,0,23019,19ª Região Brejo Santo,CE,Nordeste,1,−7.34588,−39.0416
6,290010,0,29023,Seabra,BA,Nordeste,1,−13.2488,−41.6619
7,290020,0,29017,Paulo Afonso,BA,Nordeste,1,−8.72073,−39.1162
8,410010,0,41018,18ª RS Cornélio Procópio,PR,Sul,3,−23.3049,−50.3133
9,420005,0,42008,Meio Oeste,SC,Sul,3,−27.6126,−51.0233


In [10]:
df_mun_hosp = dt.fread(path_mun, columns=cols_mun_hosp)
df_mun_hosp

Unnamed: 0_level_0,destino,hosp_regiao_saude,hosp_uf,hosp_latitude,hosp_longitude
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,520005,52001,GO,−16.7573,−49.4412
1,310010,31052,MG,−18.4831,−47.3916
2,520010,52011,GO,−16.197,−48.7057
3,310020,31067,MG,−19.1551,−45.4444
4,150010,15011,PA,−1.72183,−48.8788
5,230010,23019,CE,−7.34588,−39.0416
6,290010,29023,BA,−13.2488,−41.6619
7,290020,29017,BA,−8.72073,−39.1162
8,410010,41018,PR,−23.3049,−50.3133
9,420005,42008,SC,−27.6126,−51.0233


### Abreviando valores coluna Região

In [11]:
regioes = {
  'Norte': 'NT',
  'Nordeste': 'ND',
  'Centro-Oeste': 'CO',
  'Sudeste': 'SD',
  'Sul': 'SL',
}

In [12]:
reg = df_mun[:, 'regiao']
reg.replace(regioes)
df_mun['regiao'] = reg
df_mun

Unnamed: 0_level_0,origem,capital,regiao_saude,nome_regiao_saude,uf,regiao,socioecon,latitude,longitude
Unnamed: 0_level_1,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,520005,0,52001,Central,GO,CO,5,−16.7573,−49.4412
1,310010,0,31052,Patrocínio Monte Carmelo,MG,SD,2,−18.4831,−47.3916
2,520010,0,52011,Pirineus,GO,CO,3,−16.197,−48.7057
3,310020,0,31067,Sete Lagoas,MG,SD,3,−19.1551,−45.4444
4,150010,0,15011,Tocantins,PA,NT,1,−1.72183,−48.8788
5,230010,0,23019,19ª Região Brejo Santo,CE,ND,1,−7.34588,−39.0416
6,290010,0,29023,Seabra,BA,ND,1,−13.2488,−41.6619
7,290020,0,29017,Paulo Afonso,BA,ND,1,−8.72073,−39.1162
8,410010,0,41018,18ª RS Cornélio Procópio,PR,SL,3,−23.3049,−50.3133
9,420005,0,42008,Meio Oeste,SC,SL,3,−27.6126,−51.0233


## Tabela Distâncias

### Carregando tabela

In [13]:
output_dist = get_path('GESTANTES', 'distancias.csv.gzip')
df_dist = dt.fread(output_dist)
df_dist = df_dist[:, df_dist.names[:-1]]
df_dist

Unnamed: 0_level_0,origem,destino,distancia,tempo
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,355030,355030,0,0
1,351880,355030,19.7358,0.407306
2,330455,330455,0,0
3,352310,355030,42.4207,0.667611
4,330350,330455,36.809,0.511
5,330170,330455,20.8512,0.317583
6,530010,530010,0,0
7,351570,355030,39.7046,0.739528
8,230440,230440,0,0
9,330045,330455,31.2666,0.469667


# Gerando tabela

## Juntando com outras

### Juntando com Distâncias

In [14]:
df_dist.key = ['origem', 'destino']
df_deslc = df_deslc[:, :, dt.join(df_dist)]
df_dist

origem,destino,distancia,tempo
▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
110001,110001,0,0
110001,110002,309.05,6.16906
110001,110004,81.2011,1.91775
110001,110006,353.417,5.53989
110001,110009,145.678,2.346
110001,110012,130.77,2.54781
110001,110013,357.511,7.13067
110001,110015,172.926,3.18425
110001,110018,114.061,1.87139
110001,110020,505.856,9.16514


### Juntando com Municípios (res)

In [15]:
df_mun.key = 'origem'
df_deslc = df_deslc[:, :, dt.join(df_mun)]
df_mun

origem,capital,regiao_saude,nome_regiao_saude,uf,regiao,socioecon,latitude,longitude
▪▪▪▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
110001,0,11005,Zona da Mata,RO,NT,1,−11.9283,−61.9953
110002,0,11001,Vale do Jamari,RO,NT,1,−9.90571,−63.0325
110003,0,11006,Cone Sul,RO,NT,2,−13.4945,−60.552
110004,0,11002,Café,RO,NT,3,−11.4343,−61.4562
110005,0,11006,Cone Sul,RO,NT,2,−13.187,−60.8168
110006,0,11006,Cone Sul,RO,NT,2,−13.1174,−60.5454
110007,0,11006,Cone Sul,RO,NT,2,−12.9551,−60.8947
110008,0,11007,Vale do Guaporé,RO,NT,1,−12.4367,−64.228
110009,0,11002,Café,RO,NT,3,−11.5266,−61.0252
110010,0,11004,Madeira-Mamoré,RO,NT,3,−10.7889,−65.3296


### Juntando com Municípios (hosp)

In [16]:
df_mun_hosp.key = 'destino'
df_deslc = df_deslc[:, :, dt.join(df_mun_hosp)]
df_deslc

Unnamed: 0_level_0,bienio,parto_normal,hosp_cnes,destino,origem,count,distancia,tempo,capital,regiao_saude,…,longitude,hosp_regiao_saude,hosp_uf,hosp_latitude,hosp_longitude
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪,▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,27,260290,260290,1,0,0,0,26010,…,−35.0253,26010,PE,−8.28218,−35.0253
1,0,0,27,260290,260720,1,16.8139,0.319111,0,26010,…,−35.0609,26010,PE,−8.28218,−35.0253
2,0,0,94,260290,260140,2,73.9977,1.34642,0,26008,…,−35.1832,26010,PE,−8.28218,−35.0253
3,0,0,94,260290,260290,2,0,0,0,26010,…,−35.0253,26010,PE,−8.28218,−35.0253
4,0,0,94,260290,260345,2,42.4065,0.630306,0,26010,…,−34.9782,26010,PE,−8.28218,−35.0253
5,0,0,94,260290,260400,1,81.5093,1.15006,0,26006,…,−35.2514,26010,PE,−8.28218,−35.0253
6,0,0,94,260290,260520,2,27.9907,0.378972,0,26008,…,−35.2241,26010,PE,−8.28218,−35.0253
7,0,0,94,260290,260540,2,103.117,1.53569,0,26006,…,−35.3801,26010,PE,−8.28218,−35.0253
8,0,0,94,260290,260640,2,104,1.41492,0,26003,…,−35.5675,26010,PE,−8.28218,−35.0253
9,0,0,94,260290,260680,1,62.8655,0.96025,0,26010,…,−34.9013,26010,PE,−8.28218,−35.0253


## Criando colunas

In [17]:
df_deslc['mun_diff'] = f.origem != f.destino
df_deslc['regsau_diff'] = f.regiao_saude != f.hosp_regiao_saude
df_deslc

Unnamed: 0_level_0,bienio,parto_normal,hosp_cnes,destino,origem,count,distancia,tempo,capital,regiao_saude,…,hosp_uf,hosp_latitude,hosp_longitude,mun_diff,regsau_diff
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪,▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪,▪
0,0,0,27,260290,260290,1,0,0,0,26010,…,PE,−8.28218,−35.0253,0,0
1,0,0,27,260290,260720,1,16.8139,0.319111,0,26010,…,PE,−8.28218,−35.0253,1,0
2,0,0,94,260290,260140,2,73.9977,1.34642,0,26008,…,PE,−8.28218,−35.0253,1,1
3,0,0,94,260290,260290,2,0,0,0,26010,…,PE,−8.28218,−35.0253,0,0
4,0,0,94,260290,260345,2,42.4065,0.630306,0,26010,…,PE,−8.28218,−35.0253,1,0
5,0,0,94,260290,260400,1,81.5093,1.15006,0,26006,…,PE,−8.28218,−35.0253,1,1
6,0,0,94,260290,260520,2,27.9907,0.378972,0,26008,…,PE,−8.28218,−35.0253,1,1
7,0,0,94,260290,260540,2,103.117,1.53569,0,26006,…,PE,−8.28218,−35.0253,1,1
8,0,0,94,260290,260640,2,104,1.41492,0,26003,…,PE,−8.28218,−35.0253,1,1
9,0,0,94,260290,260680,1,62.8655,0.96025,0,26010,…,PE,−8.28218,−35.0253,1,0


## Removendo valores NA

In [18]:
df_deslc = df_deslc[(
  f.latitude != None) & (
  f.longitude != None) & (
  f.hosp_latitude != None) & (
  f.hosp_longitude != None), :]
df_deslc

Unnamed: 0_level_0,bienio,parto_normal,hosp_cnes,destino,origem,count,distancia,tempo,capital,regiao_saude,…,hosp_uf,hosp_latitude,hosp_longitude,mun_diff,regsau_diff
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪,▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪,▪
0,0,0,27,260290,260290,1,0,0,0,26010,…,PE,−8.28218,−35.0253,0,0
1,0,0,27,260290,260720,1,16.8139,0.319111,0,26010,…,PE,−8.28218,−35.0253,1,0
2,0,0,94,260290,260140,2,73.9977,1.34642,0,26008,…,PE,−8.28218,−35.0253,1,1
3,0,0,94,260290,260290,2,0,0,0,26010,…,PE,−8.28218,−35.0253,0,0
4,0,0,94,260290,260345,2,42.4065,0.630306,0,26010,…,PE,−8.28218,−35.0253,1,0
5,0,0,94,260290,260400,1,81.5093,1.15006,0,26006,…,PE,−8.28218,−35.0253,1,1
6,0,0,94,260290,260520,2,27.9907,0.378972,0,26008,…,PE,−8.28218,−35.0253,1,1
7,0,0,94,260290,260540,2,103.117,1.53569,0,26006,…,PE,−8.28218,−35.0253,1,1
8,0,0,94,260290,260640,2,104,1.41492,0,26003,…,PE,−8.28218,−35.0253,1,1
9,0,0,94,260290,260680,1,62.8655,0.96025,0,26010,…,PE,−8.28218,−35.0253,1,0


## Ordenando

### Colunas

In [19]:
cols_order = [
  # procedimento:
  'bienio',
  'parto_normal',
  # deslocamento:
  'origem',
  'destino',
  'mun_diff',
  'regsau_diff',
  'distancia',
  'tempo',
  # localização de residência:
  'regiao',
  'uf',
  # 'nome_uf',
  # 'macroregiao_saude',
  'regiao_saude',
  'nome_regiao_saude',
  # 'microregiao_saude',
  # 'municipio',
  'capital',
  'socioecon',
  'latitude',
  'longitude',
  # localização do hospital:
  'hosp_latitude',
  'hosp_longitude',
  'hosp_cnes',
  'hosp_regiao_saude',
  'hosp_uf',
  # contagem:
  'count',
]

In [20]:
df_deslc = df_deslc[:, cols_order]
df_deslc

Unnamed: 0_level_0,bienio,parto_normal,origem,destino,mun_diff,regsau_diff,distancia,tempo,regiao,uf,…,hosp_longitude,hosp_cnes,hosp_regiao_saude,hosp_uf,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,260290,260290,0,0,0,0,ND,PE,…,−35.0253,27,26010,PE,1
1,0,0,260720,260290,1,0,16.8139,0.319111,ND,PE,…,−35.0253,27,26010,PE,1
2,0,0,260140,260290,1,1,73.9977,1.34642,ND,PE,…,−35.0253,94,26010,PE,2
3,0,0,260290,260290,0,0,0,0,ND,PE,…,−35.0253,94,26010,PE,2
4,0,0,260345,260290,1,0,42.4065,0.630306,ND,PE,…,−35.0253,94,26010,PE,2
5,0,0,260400,260290,1,1,81.5093,1.15006,ND,PE,…,−35.0253,94,26010,PE,1
6,0,0,260520,260290,1,1,27.9907,0.378972,ND,PE,…,−35.0253,94,26010,PE,2
7,0,0,260540,260290,1,1,103.117,1.53569,ND,PE,…,−35.0253,94,26010,PE,2
8,0,0,260640,260290,1,1,104,1.41492,ND,PE,…,−35.0253,94,26010,PE,2
9,0,0,260680,260290,1,0,62.8655,0.96025,ND,PE,…,−35.0253,94,26010,PE,1


### Linhas

In [21]:
df_deslc = df_deslc.sort(f.bienio, f.parto_normal, f.origem, f.destino)
df_deslc

Unnamed: 0_level_0,bienio,parto_normal,origem,destino,mun_diff,regsau_diff,distancia,tempo,regiao,uf,…,hosp_longitude,hosp_cnes,hosp_regiao_saude,hosp_uf,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,110001,110001,0,0,0,0,NT,RO,…,−61.9953,2679477,11005,RO,2
1,0,0,110001,110012,1,1,130.77,2.54781,NT,RO,…,−61.9322,2495279,11003,RO,2
2,0,0,110001,110013,1,1,357.511,7.13067,NT,RO,…,−61.9818,2808617,11001,RO,1
3,0,0,110001,110018,1,1,114.061,1.87139,NT,RO,…,−61.198,2496534,11002,RO,1
4,0,0,110001,110020,1,1,505.856,9.16514,NT,RO,…,−63.8999,4001303,11004,RO,1
5,0,0,110001,110028,1,0,48.6099,0.835667,NT,RO,…,−61.7714,2495228,11005,RO,2
6,0,0,110001,351110,1,1,2196.35,30.8736,NT,RO,…,−48.977,2089327,35151,SP,1
7,0,0,110002,110002,0,0,0,0,NT,RO,…,−63.0325,2494299,11001,RO,2
8,0,0,110002,110011,1,1,95.4727,2.33883,NT,RO,…,−62.4788,2808609,11003,RO,1
9,0,0,110002,110012,1,1,181.364,3.735,NT,RO,…,−61.9322,2495279,11003,RO,1


# Exportando tabela

In [22]:
output_deslc = get_path('GESTANTES', 'deslocamentos.csv.gzip')
df_deslc.to_csv(output_deslc)

In [23]:
dt.fread(output_deslc)

Unnamed: 0_level_0,bienio,parto_normal,origem,destino,mun_diff,regsau_diff,distancia,tempo,regiao,uf,…,hosp_longitude,hosp_cnes,hosp_regiao_saude,hosp_uf,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,0,0,110001,110001,0,0,0,0,NT,RO,…,−61.9953,2679477,11005,RO,2
1,0,0,110001,110012,1,1,130.77,2.54781,NT,RO,…,−61.9322,2495279,11003,RO,2
2,0,0,110001,110013,1,1,357.511,7.13067,NT,RO,…,−61.9818,2808617,11001,RO,1
3,0,0,110001,110018,1,1,114.061,1.87139,NT,RO,…,−61.198,2496534,11002,RO,1
4,0,0,110001,110020,1,1,505.856,9.16514,NT,RO,…,−63.8999,4001303,11004,RO,1
5,0,0,110001,110028,1,0,48.6099,0.835667,NT,RO,…,−61.7714,2495228,11005,RO,2
6,0,0,110001,351110,1,1,2196.35,30.8736,NT,RO,…,−48.977,2089327,35151,SP,1
7,0,0,110002,110002,0,0,0,0,NT,RO,…,−63.0325,2494299,11001,RO,2
8,0,0,110002,110011,1,1,95.4727,2.33883,NT,RO,…,−62.4788,2808609,11003,RO,1
9,0,0,110002,110012,1,1,181.364,3.735,NT,RO,…,−61.9322,2495279,11003,RO,1


# Tabelas Filtradas

## Tabela referência

In [24]:
df_deslc = dt.fread(output_deslc)

## Brasil (sem CNES)

In [25]:
df_deslc_brasil = df_deslc[:, f[:].remove(f.hosp_cnes)]
df_deslc_brasil = df_deslc_brasil[:, dt.sum(f.count), dt.by(df_deslc_brasil.names[:-1])]
df_deslc_brasil

Unnamed: 0_level_0,bienio,parto_normal,origem,destino,mun_diff,regsau_diff,distancia,tempo,regiao,uf,…,hosp_latitude,hosp_longitude,hosp_regiao_saude,hosp_uf,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,110001,110001,0,0,0,0,NT,RO,…,−11.9283,−61.9953,11005,RO,2
1,0,0,110001,110012,1,1,130.77,2.54781,NT,RO,…,−10.8777,−61.9322,11003,RO,2
2,0,0,110001,110013,1,1,357.511,7.13067,NT,RO,…,−9.44363,−61.9818,11001,RO,1
3,0,0,110001,110018,1,1,114.061,1.87139,NT,RO,…,−11.672,−61.198,11002,RO,1
4,0,0,110001,110020,1,1,505.856,9.16514,NT,RO,…,−8.76077,−63.8999,11004,RO,1
5,0,0,110001,110028,1,0,48.6099,0.835667,NT,RO,…,−11.7271,−61.7714,11005,RO,2
6,0,0,110001,351110,1,1,2196.35,30.8736,NT,RO,…,−21.1314,−48.977,35151,SP,1
7,0,0,110002,110002,0,0,0,0,NT,RO,…,−9.90571,−63.0325,11001,RO,2
8,0,0,110002,110011,1,1,95.4727,2.33883,NT,RO,…,−10.4318,−62.4788,11003,RO,1
9,0,0,110002,110012,1,1,181.364,3.735,NT,RO,…,−10.8777,−61.9322,11003,RO,1


In [26]:
output_deslc_brasil = get_path('GESTANTES', 'deslocamentos_brasil.csv.gzip') # xlsx
df_deslc_brasil.to_csv(output_deslc_brasil)
pd.read_csv(output_deslc_brasil).to_excel(output_deslc_brasil.replace('csv.gzip', 'xlsx'), index=False)
# df_deslc_brasil.to_pandas().to_excel(output_deslc_brasil, index=False)

## RJ

### Com CNES

In [27]:
df_deslc_rj_cnes = df_deslc[(f.uf == 'RJ'), :]
df_deslc_rj_cnes

Unnamed: 0_level_0,bienio,parto_normal,origem,destino,mun_diff,regsau_diff,distancia,tempo,regiao,uf,…,hosp_longitude,hosp_cnes,hosp_regiao_saude,hosp_uf,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,0,0,330010,320245,1,1,614.613,7.66281,SD,RJ,…,−41.5087,2448203,32002,ES,1
1,0,0,330010,330010,0,0,0,0,SD,RJ,…,−44.3196,2280868,33001,RJ,2
2,0,0,330010,330010,0,0,0,0,SD,RJ,…,−44.3196,2281384,33001,RJ,2
3,0,0,330010,351840,1,1,222.988,3.47778,SD,RJ,…,−45.1938,2081512,35172,SP,1
4,0,0,330020,330020,0,0,0,0,SD,RJ,…,−42.3326,2273098,33002,RJ,2
5,0,0,330020,330070,1,0,43.1787,0.650056,SD,RJ,…,−42.0286,5903394,33002,RJ,2
6,0,0,330020,330455,1,1,118.052,1.55372,SD,RJ,…,−43.2003,2270390,33005,RJ,1
7,0,0,330020,330455,1,1,118.052,1.55372,SD,RJ,…,−43.2003,2273438,33005,RJ,1
8,0,0,330020,330520,1,0,31.0431,0.417139,SD,RJ,…,−42.1026,2280477,33002,RJ,1
9,0,0,330020,330550,1,0,22.8379,0.348,SD,RJ,…,−42.5099,2274299,33002,RJ,1


In [28]:
output_rj_cnes = get_path('GESTANTES', 'deslocamentos_rj_cnes.csv.gzip') # xlsx
df_deslc_rj_cnes.to_csv(output_rj_cnes)
pd.read_csv(output_rj_cnes).to_excel(output_rj_cnes.replace('csv.gzip', 'xlsx'), index=False)
# df_deslc_rj_cnes.to_pandas().to_excel(output_rj_cnes, index=False)

### Sem CNES

In [29]:
df_deslc_rj = df_deslc_rj_cnes[:, f[:].remove(f.hosp_cnes)]
df_deslc_rj = df_deslc_rj[:, dt.sum(f.count), dt.by(df_deslc_rj.names[:-1])]
df_deslc_rj

Unnamed: 0_level_0,bienio,parto_normal,origem,destino,mun_diff,regsau_diff,distancia,tempo,regiao,uf,…,hosp_latitude,hosp_longitude,hosp_regiao_saude,hosp_uf,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,330010,320245,1,1,614.613,7.66281,SD,RJ,…,−20.2347,−41.5087,32002,ES,1
1,0,0,330010,330010,0,0,0,0,SD,RJ,…,−23.0011,−44.3196,33001,RJ,4
2,0,0,330010,351840,1,1,222.988,3.47778,SD,RJ,…,−22.8075,−45.1938,35172,SP,1
3,0,0,330020,330020,0,0,0,0,SD,RJ,…,−22.8697,−42.3326,33002,RJ,2
4,0,0,330020,330070,1,0,43.1787,0.650056,SD,RJ,…,−22.8894,−42.0286,33002,RJ,2
5,0,0,330020,330455,1,1,118.052,1.55372,SD,RJ,…,−22.9129,−43.2003,33005,RJ,2
6,0,0,330020,330520,1,0,31.0431,0.417139,SD,RJ,…,−22.8429,−42.1026,33002,RJ,1
7,0,0,330020,330550,1,0,22.8379,0.348,SD,RJ,…,−22.9292,−42.5099,33002,RJ,1
8,0,0,330022,330600,1,0,25.1655,0.377083,SD,RJ,…,−22.1165,−43.2185,33003,RJ,2
9,0,0,330023,330023,0,0,0,0,SD,RJ,…,−22.7528,−41.8846,33002,RJ,1


In [30]:
output_rj = get_path('GESTANTES', 'deslocamentos_rj.csv.gzip') # xlsx
df_deslc_rj.to_csv(output_rj)
pd.read_csv(output_rj).to_excel(output_rj.replace('csv.gzip', 'xlsx'), index=False)
# df_deslc_rj.to_pandas().to_excel(output_rj, index=False)