In [1]:
import pandas as pd
from utils import get_path, group_cols
from datatable import dt, f, by

# Descrição

Neste notebook iremos formar a tabela deslocamento, com origem e destino.

# Tabelas base

## Tabela Localidades

### Colunas

In [2]:
cols_locs = {
  'codigo': 'codigo',
  'nome_municipio': 'nome',
  'socioecon': 'socioecon',
  'capital': 'capital',
  ...: None,
}

### Carregando tabela

In [3]:
path_locs = get_path('LOCALIDADES', 'localidades.csv.gzip')
df_locs = dt.fread(path_locs, columns=cols_locs)
df_locs

Unnamed: 0_level_0,codigo,nome,capital,socioecon
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪,▪▪▪▪
0,CO/GO/52001/520005,Abadia de Goiás,0,5
1,SD/MG/31052/310010,Abadia dos Dourados,0,2
2,CO/GO/52011/520010,Abadiânia,0,3
3,SD/MG/31067/310020,Abaeté,0,3
4,NT/PA/15011/150010,Abaetetuba,0,1
5,ND/CE/23019/230010,Abaiara,0,1
6,ND/BA/29023/290010,Abaíra,0,1
7,ND/BA/29017/290020,Abaré,0,1
8,SL/PR/41018/410010,Abatiá,0,3
9,SL/SC/42008/420005,Abdon Batista,0,3


## Tabela Procedimentos

### Colunas

In [4]:
cols_proc = {
  'bienio': 'bienio',
  'parto_normal': 'parto_normal',
  'origem': 'codigo',
  'criticidade': 'criticidade',
  'count': 'count',
  ...: None,
}

### Carregando tabela

In [5]:
path_proc = get_path('GESTANTES', 'procs_bienio.csv.gzip')
df_proc = dt.fread(path_proc, columns=cols_proc)
df_proc

Unnamed: 0_level_0,bienio,parto_normal,codigo,criticidade,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,0,0,CO/DF/53001/530010,0,260
1,0,0,CO/DF/53001/530010,0,282
2,0,0,CO/DF/53001/530010,0,3323
3,0,0,CO/DF/53001/530010,0,3368
4,0,0,CO/DF/53001/530010,0,964
5,0,0,CO/DF/53001/530010,0,832
6,0,0,CO/DF/53001/530010,0,1831
7,0,0,CO/DF/53001/530010,0,3707
8,0,0,CO/DF/53001/530010,0,728
9,0,0,CO/DF/53001/530010,0,1


### Definindo criticidade

In [6]:
df_proc[:, dt.update(criticidade = f.criticidade != 0)]
df_proc

Unnamed: 0_level_0,bienio,parto_normal,codigo,criticidade,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪,▪▪▪▪
0,0,0,CO/DF/53001/530010,0,260
1,0,0,CO/DF/53001/530010,0,282
2,0,0,CO/DF/53001/530010,0,3323
3,0,0,CO/DF/53001/530010,0,3368
4,0,0,CO/DF/53001/530010,0,964
5,0,0,CO/DF/53001/530010,0,832
6,0,0,CO/DF/53001/530010,0,1831
7,0,0,CO/DF/53001/530010,0,3707
8,0,0,CO/DF/53001/530010,0,728
9,0,0,CO/DF/53001/530010,0,1


### Tabela agregada

In [7]:
df_proc = df_proc[:, dt.sum(f.count), by(df_proc.names[:-1])]
df_proc

Unnamed: 0_level_0,bienio,parto_normal,codigo,criticidade,count
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪,▪▪▪▪▪▪▪▪
0,0,0,CO/DF/53001/530010,0,20906
1,0,0,CO/DF/53001/530010,1,14
2,0,0,CO/GO/52001/520005,1,42
3,0,0,CO/GO/52001/520130,0,47
4,0,0,CO/GO/52001/520130,1,34
5,0,0,CO/GO/52001/520160,0,1
6,0,0,CO/GO/52001/520160,1,6
7,0,0,CO/GO/52001/520280,1,14
8,0,0,CO/GO/52001/520360,1,9
9,0,0,CO/GO/52001/520460,1,17


### Total e Critícos

In [8]:
df_proc_total = df_proc[:, f[:].remove(f.criticidade)]
df_proc_total = df_proc_total[:, dt.sum(f.count), by(df_proc_total.names[:-1])]
df_proc_total.names = {'count': 'count_total'}
df_proc_total

Unnamed: 0_level_0,bienio,parto_normal,codigo,count_total
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,CO/DF/53001/530010,20920
1,0,0,CO/GO/52001/520005,42
2,0,0,CO/GO/52001/520130,81
3,0,0,CO/GO/52001/520160,7
4,0,0,CO/GO/52001/520280,14
5,0,0,CO/GO/52001/520360,9
6,0,0,CO/GO/52001/520460,17
7,0,0,CO/GO/52001/520520,36
8,0,0,CO/GO/52001/520680,4
9,0,0,CO/GO/52001/520870,9052


In [9]:
df_proc_critic = df_proc[f.criticidade == 1, f[:].remove(f.criticidade)]
df_proc_critic = df_proc_critic[:, dt.sum(f.count), by(df_proc_critic.names[:-1])]
df_proc_critic.names = {'count': 'count_critic'}
df_proc_critic.key = df_proc_critic.names[:-1]
df_proc_critic

bienio,parto_normal,codigo,count_critic
▪,▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,CO/DF/53001/530010,14
0,0,CO/GO/52001/520005,42
0,0,CO/GO/52001/520130,34
0,0,CO/GO/52001/520160,6
0,0,CO/GO/52001/520280,14
0,0,CO/GO/52001/520360,9
0,0,CO/GO/52001/520460,17
0,0,CO/GO/52001/520520,21
0,0,CO/GO/52001/520680,4
0,0,CO/GO/52001/520870,188


# Gerando tabela

## Coluna `count` pra `total` e `critic`

In [10]:
df_rank = df_proc_total[:, :, dt.join(df_proc_critic)]
df_rank['count_critic'] = dt.Frame([
  0 if c == None else c for c in df_rank['count_critic'].to_list()[0]])
df_rank

Unnamed: 0_level_0,bienio,parto_normal,codigo,count_total,count_critic
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪
0,0,0,CO/DF/53001/530010,20920,14
1,0,0,CO/GO/52001/520005,42,42
2,0,0,CO/GO/52001/520130,81,34
3,0,0,CO/GO/52001/520160,7,6
4,0,0,CO/GO/52001/520280,14,14
5,0,0,CO/GO/52001/520360,9,9
6,0,0,CO/GO/52001/520460,17,17
7,0,0,CO/GO/52001/520520,36,21
8,0,0,CO/GO/52001/520680,4,4
9,0,0,CO/GO/52001/520870,9052,188


## Coluna `percent` (%)

In [11]:
df_rank[:, dt.update(critic_percent = f.count_critic / f.count_total * 100)]
df_rank

Unnamed: 0_level_0,bienio,parto_normal,codigo,count_total,count_critic,critic_percent
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,CO/DF/53001/530010,20920,14,0.0669216
1,0,0,CO/GO/52001/520005,42,42,100
2,0,0,CO/GO/52001/520130,81,34,41.9753
3,0,0,CO/GO/52001/520160,7,6,85.7143
4,0,0,CO/GO/52001/520280,14,14,100
5,0,0,CO/GO/52001/520360,9,9,100
6,0,0,CO/GO/52001/520460,17,17,100
7,0,0,CO/GO/52001/520520,36,21,58.3333
8,0,0,CO/GO/52001/520680,4,4,100
9,0,0,CO/GO/52001/520870,9052,188,2.07689


## Informações da localidade

In [12]:
df_locs.key = 'codigo'
df_rank = df_rank[:, :, dt.join(df_locs)]
df_rank

Unnamed: 0_level_0,bienio,parto_normal,codigo,count_total,count_critic,critic_percent,nome,capital,socioecon
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪,▪▪▪▪
0,0,0,CO/DF/53001/530010,20920,14,0.0669216,Brasília,1,5
1,0,0,CO/GO/52001/520005,42,42,100,Abadia de Goiás,0,5
2,0,0,CO/GO/52001/520130,81,34,41.9753,Anicuns,0,5
3,0,0,CO/GO/52001/520160,7,6,85.7143,Araçu,0,5
4,0,0,CO/GO/52001/520280,14,14,100,Avelinópolis,0,5
5,0,0,CO/GO/52001/520360,9,9,100,Brazabrantes,0,5
6,0,0,CO/GO/52001/520460,17,17,100,Campestre de Goiás,0,5
7,0,0,CO/GO/52001/520520,36,21,58.3333,Caturaí,0,5
8,0,0,CO/GO/52001/520680,4,4,100,Damolândia,0,5
9,0,0,CO/GO/52001/520870,9052,188,2.07689,Goiânia,1,5


## Ordenando colunas

In [13]:
cols_order = [
  'bienio',
  'parto_normal',
  'codigo',
  'nome',
  'capital',
  'socioecon',
  'count_total',
  'count_critic',
  'critic_percent',
]

In [14]:
df_rank = df_rank[:, cols_order]
df_rank

Unnamed: 0_level_0,bienio,parto_normal,codigo,nome,capital,socioecon,count_total,count_critic,critic_percent
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,CO/DF/53001/530010,Brasília,1,5,20920,14,0.0669216
1,0,0,CO/GO/52001/520005,Abadia de Goiás,0,5,42,42,100
2,0,0,CO/GO/52001/520130,Anicuns,0,5,81,34,41.9753
3,0,0,CO/GO/52001/520160,Araçu,0,5,7,6,85.7143
4,0,0,CO/GO/52001/520280,Avelinópolis,0,5,14,14,100
5,0,0,CO/GO/52001/520360,Brazabrantes,0,5,9,9,100
6,0,0,CO/GO/52001/520460,Campestre de Goiás,0,5,17,17,100
7,0,0,CO/GO/52001/520520,Caturaí,0,5,36,21,58.3333
8,0,0,CO/GO/52001/520680,Damolândia,0,5,4,4,100
9,0,0,CO/GO/52001/520870,Goiânia,1,5,9052,188,2.07689


## Ordenando linhas

In [15]:
df_rank = df_rank.sort(f.bienio, f.parto_normal, -f.critic_percent, -f.count_critic)
df_rank

Unnamed: 0_level_0,bienio,parto_normal,codigo,nome,capital,socioecon,count_total,count_critic,critic_percent
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,SL/SC/42007/421190,Palhoça,0,5,1016,1016,100
1,0,0,ND/SE/28006/280480,Nossa Senhora do Socorro,0,1,988,988,100
2,0,0,ND/AL/27001/270770,Rio Largo,0,3,858,858,100
3,0,0,SL/PR/41002/410040,Almirante Tamandaré,0,5,762,762,100
4,0,0,ND/AL/27001/270470,Marechal Deodoro,0,3,730,730,100
5,0,0,ND/PE/26010/260680,Igarassu,0,5,658,658,100
6,0,0,SD/SP/35014/354730,Santana de Parnaíba,0,5,620,620,100
7,0,0,ND/PE/26008/260040,Água Preta,0,1,619,619,100
8,0,0,SD/ES/32002/320510,Viana,0,5,617,617,100
9,0,0,SL/RS/43009/430930,Guaíba,0,3,543,543,100


# Exportando tabela

In [16]:
output_rank = get_path('GESTANTES', 'rank_municipios.csv.gzip')
df_rank.to_csv(output_rank)

In [17]:
dt.fread(output_rank)

Unnamed: 0_level_0,bienio,parto_normal,codigo,nome,capital,socioecon,count_total,count_critic,critic_percent
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,SL/SC/42007/421190,Palhoça,0,5,1016,1016,100
1,0,0,ND/SE/28006/280480,Nossa Senhora do Socorro,0,1,988,988,100
2,0,0,ND/AL/27001/270770,Rio Largo,0,3,858,858,100
3,0,0,SL/PR/41002/410040,Almirante Tamandaré,0,5,762,762,100
4,0,0,ND/AL/27001/270470,Marechal Deodoro,0,3,730,730,100
5,0,0,ND/PE/26010/260680,Igarassu,0,5,658,658,100
6,0,0,SD/SP/35014/354730,Santana de Parnaíba,0,5,620,620,100
7,0,0,ND/PE/26008/260040,Água Preta,0,1,619,619,100
8,0,0,SD/ES/32002/320510,Viana,0,5,617,617,100
9,0,0,SL/RS/43009/430930,Guaíba,0,3,543,543,100


In [18]:
pd.read_csv(output_rank).to_excel(output_rank.replace('csv.gzip', 'xlsx'), index=False)