In [1]:
import pandas as pd
from utils import get_path, group_cols
from datatable import dt, f, by

# Descrição

Neste notebook iremos formar a tabela deslocamento, com origem e destino.

# Gerando tabela

In [2]:
def rank_locs(path_count, path_locs, scale):
  ref = 'destino' if scale == 'cnes' else 'origem'
  cols_locs = {'codigo': 'codigo', f'nome_{scale}': 'nome', ...: None}
  df_locs = dt.fread(path_locs, columns=cols_locs)
  if scale == 'cnes':
    df_locs['nome'] = ''
  df_locs.key = 'codigo'
  df_count = dt.fread(path_count)
  df_count = df_count[f.referencial == ref, f[:].remove(f.referencial)]
  df_count['escala'] = scale
  df_count = df_count[:, :, dt.join(df_locs)]
  i = {'cnes': 5, 'municipio': 4, 'regiao_saude': 3, 'uf': 2, 'regiao': 1}[scale]
  codigos = df_count['codigo'].to_list()[0]
  df_codigos = dt.Frame(['/'.join(c.split('/')[:i]) for c in codigos])
  df_count['codigo'] = df_codigos
  cols = df_count.names
  cols_order = cols[:3] + cols[5:] + cols[3:5]
  df_count = df_count[:, cols_order]
  df_count = df_count[:, [dt.sum(f.count_total), dt.sum(f.count_critic)], by(df_count.names[:-2])]
  df_count[:, dt.update(critic_percent = f.count_critic / f.count_total * 100)]
  df_count = df_count.sort(f.codigo, f.parto_normal, f.bienio)
  return df_count

In [3]:
path_locs = get_path('LOCALIDADES', 'localidades.csv.gzip')
path_rank = get_path('GESTANTES', 'contagens.csv.gzip')
scales = ['regiao', 'uf', 'regiao_saude', 'municipio', 'cnes']
df_counts = dt.rbind(*[
  rank_locs(path_rank, path_locs, scale) for scale in scales])
df_counts

Unnamed: 0_level_0,bienio,parto_normal,codigo,escala,nome,count_total,count_critic,critic_percent
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,CO,regiao,Centro-Oeste,100035,14967,14.9618
1,1,0,CO,regiao,Centro-Oeste,124971,23976,19.1853
2,0,1,CO,regiao,Centro-Oeste,167105,25501,15.2605
3,1,1,CO,regiao,Centro-Oeste,151310,33089,21.8683
4,0,0,ND,regiao,Nordeste,413100,163597,39.6023
5,1,0,ND,regiao,Nordeste,428756,181313,42.2882
6,0,1,ND,regiao,Nordeste,817947,250112,30.578
7,1,1,ND,regiao,Nordeste,634648,222477,35.0552
8,0,0,NT,regiao,Norte,146771,26529,18.0751
9,1,0,NT,regiao,Norte,172823,33069,19.1346


# Exportando tabela

In [4]:
output_rank = get_path('GESTANTES', 'ranks.csv.gzip')
df_counts.to_csv(output_rank)

In [5]:
dt.fread(output_rank)

Unnamed: 0_level_0,bienio,parto_normal,codigo,escala,nome,count_total,count_critic,critic_percent
Unnamed: 0_level_1,▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,CO,regiao,Centro-Oeste,100035,14967,14.9618
1,1,0,CO,regiao,Centro-Oeste,124971,23976,19.1853
2,0,1,CO,regiao,Centro-Oeste,167105,25501,15.2605
3,1,1,CO,regiao,Centro-Oeste,151310,33089,21.8683
4,0,0,ND,regiao,Nordeste,413100,163597,39.6023
5,1,0,ND,regiao,Nordeste,428756,181313,42.2882
6,0,1,ND,regiao,Nordeste,817947,250112,30.578
7,1,1,ND,regiao,Nordeste,634648,222477,35.0552
8,0,0,NT,regiao,Norte,146771,26529,18.0751
9,1,0,NT,regiao,Norte,172823,33069,19.1346


In [6]:
pd.read_csv(output_rank).to_excel(output_rank.replace('csv.gzip', 'xlsx'), index=False)