In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from zipfile import ZipFile
from datatable import dt, f, by
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3
from plotly import express as px, io as pio

pd.options.plotting.backend = 'plotly'
pio.renderers.default = 'plotly_mimetype+notebook_connected'

# Bases

## SIH

In [2]:
path_sihsus = f'{Path.home()}/Databases/SIHSUS/SIHSUS.csv.gzip'
df_sihsus = pd.read_csv(path_sihsus, low_memory=False)
df_sihsus

Unnamed: 0,ano,cnes,hosp_municipio,res_municipio,hosp_regiao_saude,res_regiao_saude,tipo_parto,idade,periodo
0,2010,2001020,120005,120005,1202,1202,normal,23,antes
1,2010,2000733,120040,120040,1201,1201,normal,28,antes
2,2010,2000733,120040,120040,1201,1201,normal,38,antes
3,2010,2000733,120040,120040,1201,1201,normal,16,antes
4,2010,2000733,120040,120040,1201,1201,normal,26,antes
...,...,...,...,...,...,...,...,...,...
16816986,2019,2765640,170930,171840,1704,1704,cesariano,24,depois
16816987,2019,2765640,170930,171670,1704,1704,cesariano,27,depois
16816988,2019,2765640,170930,171670,1704,1704,cesariano,22,depois
16816989,2019,2765640,170930,170930,1704,1704,cesariano,25,depois


In [3]:
df_sihsus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16816991 entries, 0 to 16816990
Data columns (total 9 columns):
 #   Column             Dtype 
---  ------             ----- 
 0   ano                int64 
 1   cnes               int64 
 2   hosp_municipio     int64 
 3   res_municipio      int64 
 4   hosp_regiao_saude  int64 
 5   res_regiao_saude   int64 
 6   tipo_parto         object
 7   idade              int64 
 8   periodo            object
dtypes: int64(7), object(2)
memory usage: 1.1+ GB


## SINASC

In [4]:
path_sinasc_sus = f'{Path.home()}/Databases/SINASC/sinasc_sus.csv.gzip'
# df_sinasc = pd.read_csv(path_sinasc_sus, low_memory=False)
# df_sinasc

In [5]:
# df_sinasc.info()

## Municípios

In [6]:
path_muns = f'{Path.home()}/Databases/MUNICIPIOS/municipios.csv.gzip'
df_muns = pd.read_csv(path_muns)
df_muns

Unnamed: 0,cod_ibge,regiao,uf,cod_uf,nome_uf,municipio,nome,capital,fronteira,amazonia,macroregiao_saude,regiao_saude,microregiao_saude,latitude,longitude,altitude,area
0,110000,Norte,RO,11,RONDONIA,Município ignorado - RO,MUNICIPIO IGNORADO - RO,False,False,False,1100,1100,11000,0.000000,0.000000,0.0,0.000000
1,110001,Norte,RO,11,RONDONIA,Alta Floresta D'Oeste,ALTA FLORESTA D'OESTE,False,True,True,1190,1102,11900,-11.929000,-61.995998,350.0,7066.702148
2,110002,Norte,RO,11,RONDONIA,Ariquemes,ARIQUEMES,False,False,True,1190,1104,11900,-9.913000,-63.041000,142.0,4426.558105
3,110003,Norte,RO,11,RONDONIA,Cabixi,CABIXI,False,True,True,1190,1103,11900,-13.492000,-60.544998,230.0,1314.354980
4,110004,Norte,RO,11,RONDONIA,Cacoal,CACOAL,False,False,True,1190,1102,11900,-11.438000,-61.448002,200.0,3792.637939
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4789,522200,Centro-Oeste,GO,52,GOIAS,Vianópolis,VIANOPOLIS,False,False,False,5201,5202,52900,-16.742001,-48.515999,1002.0,954.278992
4790,522205,Centro-Oeste,GO,52,GOIAS,Vicentinópolis,VICENTINOPOLIS,False,False,False,5201,5202,52900,-17.735001,-49.806000,646.0,737.250977
4791,522220,Centro-Oeste,GO,52,GOIAS,Vila Boa,VILA BOA,False,False,False,5202,5206,52900,-15.038000,-47.058998,0.0,1060.170044
4792,522230,Centro-Oeste,GO,52,GOIAS,Vila Propício,VILA PROPICIO,False,False,False,5203,5212,52900,-15.457000,-48.889000,744.0,2181.574951


In [7]:
df_muns.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4794 entries, 0 to 4793
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   cod_ibge           4794 non-null   int64  
 1   regiao             4794 non-null   object 
 2   uf                 4794 non-null   object 
 3   cod_uf             4794 non-null   int64  
 4   nome_uf            4794 non-null   object 
 5   municipio          4794 non-null   object 
 6   nome               4794 non-null   object 
 7   capital            4794 non-null   bool   
 8   fronteira          4794 non-null   bool   
 9   amazonia           4794 non-null   bool   
 10  macroregiao_saude  4794 non-null   int64  
 11  regiao_saude       4794 non-null   int64  
 12  microregiao_saude  4794 non-null   int64  
 13  latitude           4790 non-null   float64
 14  longitude          4790 non-null   float64
 15  altitude           4790 non-null   float64
 16  area               4790 

## Distâncias

In [8]:
path_dist = f'{Path.home()}/Databases/DISTANCIAS/matriz_distancias.zip'
df_dist = pd.read_csv(path_dist, low_memory=False)
df_dist

Unnamed: 0,origem,destino,distancia,tempo
0,110001,110001,0.000000,0.000000
1,110001,110002,309.050000,6.169056
2,110001,110003,399.499700,6.289056
3,110001,110004,81.201103,1.917750
4,110001,110005,391.704300,6.136361
...,...,...,...,...
31024895,530010,522200,161.728900,2.356000
31024896,530010,522205,382.708800,5.479556
31024897,530010,522220,161.603400,2.248944
31024898,530010,522230,190.000900,3.216083


In [9]:
df_dist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31024900 entries, 0 to 31024899
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   origem     int64  
 1   destino    int64  
 2   distancia  float64
 3   tempo      float64
dtypes: float64(2), int64(2)
memory usage: 946.8 MB


## Indicador SocioEconômico

In [10]:
path_socioeco = f'{Path.home()}/Databases/HEALTH/health_regions.csv'
df_socioeco = pd.read_csv(path_socioeco)
df_socioeco

Unnamed: 0,UF,Município,Cód IBGE,Cód Região de Saúde,Nome da Região de Saúde,Grupo
0,AC,Acrelândia,120001,12002,Baixo Acre e Purus,2
1,AC,Assis Brasil,120005,12001,Alto Acre,1
2,AC,Brasiléia,120010,12001,Alto Acre,1
3,AC,Bujari,120013,12002,Baixo Acre e Purus,2
4,AC,Capixaba,120017,12002,Baixo Acre e Purus,2
...,...,...,...,...,...,...
5566,TO,Tocantinópolis,172120,17002,Bico do Papagaio,1
5567,TO,Tupirama,172125,17004,Cerrado Tocantins Araguaia,2
5568,TO,Tupiratins,172130,17004,Cerrado Tocantins Araguaia,2
5569,TO,Wanderlândia,172208,17001,Médio Norte Araguaia,3


In [11]:
df_socioeco[['Cód IBGE', 'Grupo']]

Unnamed: 0,Cód IBGE,Grupo
0,120001,2
1,120005,1
2,120010,1
3,120013,2
4,120017,2
...,...,...
5566,172120,1
5567,172125,2
5568,172130,2
5569,172208,3


# Tabela Deslocamento

In [12]:
def merge_lat_lon(df_left, df_right, left_on, right_on):
  prefix = left_on.split('_')[0]
  df = pd.merge(df_left, df_right, how='left', left_on=left_on, right_on=right_on)
  df = df.rename(columns={
    'latitude': f'{prefix}_latitude',
    'longitude': f'{prefix}_longitude'})
  df = df.drop(right_on, axis=1)
  return df

In [13]:
cols = ['periodo', 'tipo_parto', 'res_municipio', 'hosp_municipio', 'res_regiao_saude', 'hosp_regiao_saude', 'cnes']
df_deslc = df_sihsus[cols] # df_sinasc
df_deslc = df_deslc[df_deslc['periodo'] != 'durante']
df_deslc = df_deslc.groupby(
  cols, as_index=False
).size().sort_values('size', ascending=False, ignore_index=True)
df_deslc = df_deslc.rename(columns={'size': 'nascimentos'})
df_deslc

Unnamed: 0,periodo,tipo_parto,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos
0,antes,normal,130260,130260,1301,1301,3151794,9330
1,antes,normal,355030,355030,3501,3501,2077701,8989
2,antes,normal,355030,355030,3501,3501,2077388,8722
3,antes,normal,355030,355030,3501,3501,2079186,8594
4,depois,normal,130260,130260,1301,1301,3151794,8524
...,...,...,...,...,...,...,...,...
124774,antes,normal,310920,310620,3114,3101,26972,1
124775,depois,cesariano,520725,521020,5214,5204,2519593,1
124776,antes,normal,310910,316370,3110,3111,2764814,1
124777,depois,cesariano,520725,521565,5214,5204,3485536,1


In [14]:
df_deslc['periodo'].value_counts()

depois    65116
antes     59663
Name: periodo, dtype: int64

In [15]:
df_deslc['mun_diff'] = df_deslc['res_municipio'] != df_deslc['hosp_municipio']
df_deslc['regsau_diff'] = df_deslc['res_regiao_saude'] != df_deslc['hosp_regiao_saude']
df_deslc

Unnamed: 0,periodo,tipo_parto,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos,mun_diff,regsau_diff
0,antes,normal,130260,130260,1301,1301,3151794,9330,False,False
1,antes,normal,355030,355030,3501,3501,2077701,8989,False,False
2,antes,normal,355030,355030,3501,3501,2077388,8722,False,False
3,antes,normal,355030,355030,3501,3501,2079186,8594,False,False
4,depois,normal,130260,130260,1301,1301,3151794,8524,False,False
...,...,...,...,...,...,...,...,...,...,...
124774,antes,normal,310920,310620,3114,3101,26972,1,True,True
124775,depois,cesariano,520725,521020,5214,5204,2519593,1,True,True
124776,antes,normal,310910,316370,3110,3111,2764814,1,True,True
124777,depois,cesariano,520725,521565,5214,5204,3485536,1,True,True


In [16]:
res_cols = [
  'cod_ibge',
  'regiao',
  'uf',
  'capital',
  'fronteira',
  'amazonia',
  'latitude',
  'longitude',]

hosp_cols = [
  'cod_ibge',
  'latitude',
  'longitude',]

In [17]:
df_deslc = merge_lat_lon(df_deslc, df_muns[res_cols], 'res_municipio', 'cod_ibge')
df_deslc = merge_lat_lon(df_deslc, df_muns[hosp_cols], 'hosp_municipio', 'cod_ibge')
df_deslc

Unnamed: 0,periodo,tipo_parto,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos,mun_diff,regsau_diff,regiao,uf,capital,fronteira,amazonia,res_latitude,res_longitude,hosp_latitude,hosp_longitude
0,antes,normal,130260,130260,1301,1301,3151794,9330,False,False,Norte,AM,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002
1,antes,normal,355030,355030,3501,3501,2077701,8989,False,False,Sudeste,SP,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002
2,antes,normal,355030,355030,3501,3501,2077388,8722,False,False,Sudeste,SP,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002
3,antes,normal,355030,355030,3501,3501,2079186,8594,False,False,Sudeste,SP,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002
4,depois,normal,130260,130260,1301,1301,3151794,8524,False,False,Norte,AM,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124776,antes,normal,310920,310620,3114,3101,26972,1,True,True,Sudeste,MG,False,False,False,-17.872999,-44.180000,-19.816999,-43.956001
124777,depois,cesariano,520725,521020,5214,5204,2519593,1,True,True,Centro-Oeste,GO,False,False,False,-16.719999,-52.319000,-16.441999,-51.118000
124778,antes,normal,310910,316370,3110,3111,2764814,1,True,True,Sudeste,MG,False,False,False,-22.441000,-46.351002,-22.115999,-45.054001
124779,depois,cesariano,520725,521565,5214,5204,3485536,1,True,True,Centro-Oeste,GO,False,False,False,-16.719999,-52.319000,-16.733000,-51.533001


In [18]:
df_deslc = pd.merge(df_deslc, df_dist, how='left', left_on=['res_municipio', 'hosp_municipio'], right_on=['origem', 'destino'])
df_deslc = df_deslc.drop(['origem', 'destino'], axis=1)
df_deslc

Unnamed: 0,periodo,tipo_parto,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos,mun_diff,regsau_diff,...,uf,capital,fronteira,amazonia,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo
0,antes,normal,130260,130260,1301,1301,3151794,9330,False,False,...,AM,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000
1,antes,normal,355030,355030,3501,3501,2077701,8989,False,False,...,SP,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000
2,antes,normal,355030,355030,3501,3501,2077388,8722,False,False,...,SP,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000
3,antes,normal,355030,355030,3501,3501,2079186,8594,False,False,...,SP,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000
4,depois,normal,130260,130260,1301,1301,3151794,8524,False,False,...,AM,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124776,antes,normal,310920,310620,3114,3101,26972,1,True,True,...,MG,False,False,False,-17.872999,-44.180000,-19.816999,-43.956001,286.4915,3.550611
124777,depois,cesariano,520725,521020,5214,5204,2519593,1,True,True,...,GO,False,False,False,-16.719999,-52.319000,-16.441999,-51.118000,172.7509,2.706389
124778,antes,normal,310910,316370,3110,3111,2764814,1,True,True,...,MG,False,False,False,-22.441000,-46.351002,-22.115999,-45.054001,196.7931,3.693056
124779,depois,cesariano,520725,521565,5214,5204,3485536,1,True,True,...,GO,False,False,False,-16.719999,-52.319000,-16.733000,-51.533001,107.2883,1.703556


In [19]:
df_deslc = pd.merge(df_deslc, df_socioeco[['Cód IBGE', 'Grupo']], how='left', left_on='res_municipio', right_on='Cód IBGE')
df_deslc = df_deslc.rename(columns={'Grupo': 'socioeconomico'})
df_deslc = df_deslc.drop(['Cód IBGE'], axis=1)
df_deslc

Unnamed: 0,periodo,tipo_parto,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos,mun_diff,regsau_diff,...,capital,fronteira,amazonia,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo,socioeconomico
0,antes,normal,130260,130260,1301,1301,3151794,9330,False,False,...,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000,4
1,antes,normal,355030,355030,3501,3501,2077701,8989,False,False,...,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,5
2,antes,normal,355030,355030,3501,3501,2077388,8722,False,False,...,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,5
3,antes,normal,355030,355030,3501,3501,2079186,8594,False,False,...,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,5
4,depois,normal,130260,130260,1301,1301,3151794,8524,False,False,...,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124776,antes,normal,310920,310620,3114,3101,26972,1,True,True,...,False,False,False,-17.872999,-44.180000,-19.816999,-43.956001,286.4915,3.550611,1
124777,depois,cesariano,520725,521020,5214,5204,2519593,1,True,True,...,False,False,False,-16.719999,-52.319000,-16.441999,-51.118000,172.7509,2.706389,3
124778,antes,normal,310910,316370,3110,3111,2764814,1,True,True,...,False,False,False,-22.441000,-46.351002,-22.115999,-45.054001,196.7931,3.693056,3
124779,depois,cesariano,520725,521565,5214,5204,3485536,1,True,True,...,False,False,False,-16.719999,-52.319000,-16.733000,-51.533001,107.2883,1.703556,3


## Tabela Final

In [20]:
path_deslc = f'{Path.home()}/Databases/GESTANTES/deslocamento_sih.csv.gzip'
df_deslc.to_csv(path_deslc, index=False)

In [21]:
pd.read_csv(path_deslc, low_memory=False)

Unnamed: 0,periodo,tipo_parto,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos,mun_diff,regsau_diff,...,capital,fronteira,amazonia,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo,socioeconomico
0,antes,normal,130260,130260,1301,1301,3151794,9330,False,False,...,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000,4
1,antes,normal,355030,355030,3501,3501,2077701,8989,False,False,...,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,5
2,antes,normal,355030,355030,3501,3501,2077388,8722,False,False,...,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,5
3,antes,normal,355030,355030,3501,3501,2079186,8594,False,False,...,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,5
4,depois,normal,130260,130260,1301,1301,3151794,8524,False,False,...,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124776,antes,normal,310920,310620,3114,3101,26972,1,True,True,...,False,False,False,-17.872999,-44.180000,-19.816999,-43.956001,286.4915,3.550611,1
124777,depois,cesariano,520725,521020,5214,5204,2519593,1,True,True,...,False,False,False,-16.719999,-52.319000,-16.441999,-51.118000,172.7509,2.706389,3
124778,antes,normal,310910,316370,3110,3111,2764814,1,True,True,...,False,False,False,-22.441000,-46.351002,-22.115999,-45.054001,196.7931,3.693056,3
124779,depois,cesariano,520725,521565,5214,5204,3485536,1,True,True,...,False,False,False,-16.719999,-52.319000,-16.733000,-51.533001,107.2883,1.703556,3


In [23]:
df_deslc = df_deslc[[
  'periodo', 'tipo_parto', 'cnes',
  'res_municipio', 'hosp_municipio',
  'res_regiao_saude', 'hosp_regiao_saude',
  'mun_diff', 'regsau_diff',
  'regiao', 'uf',
  'capital', 'fronteira', 'amazonia', 'socioeconomico',
  'res_latitude', 'res_longitude',
  'hosp_latitude', 'hosp_longitude',
  'distancia', 'tempo',
  'nascimentos']]
df_deslc

Unnamed: 0,periodo,tipo_parto,cnes,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,mun_diff,regsau_diff,regiao,...,fronteira,amazonia,socioeconomico,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo,nascimentos
0,antes,normal,3151794,130260,130260,1301,1301,False,False,Norte,...,False,True,4,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000,9330
1,antes,normal,2077701,355030,355030,3501,3501,False,False,Sudeste,...,False,False,5,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,8989
2,antes,normal,2077388,355030,355030,3501,3501,False,False,Sudeste,...,False,False,5,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,8722
3,antes,normal,2079186,355030,355030,3501,3501,False,False,Sudeste,...,False,False,5,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,8594
4,depois,normal,3151794,130260,130260,1301,1301,False,False,Norte,...,False,True,4,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000,8524
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124776,antes,normal,26972,310920,310620,3114,3101,True,True,Sudeste,...,False,False,1,-17.872999,-44.180000,-19.816999,-43.956001,286.4915,3.550611,1
124777,depois,cesariano,2519593,520725,521020,5214,5204,True,True,Centro-Oeste,...,False,False,3,-16.719999,-52.319000,-16.441999,-51.118000,172.7509,2.706389,1
124778,antes,normal,2764814,310910,316370,3110,3111,True,True,Sudeste,...,False,False,3,-22.441000,-46.351002,-22.115999,-45.054001,196.7931,3.693056,1
124779,depois,cesariano,3485536,520725,521565,5214,5204,True,True,Centro-Oeste,...,False,False,3,-16.719999,-52.319000,-16.733000,-51.533001,107.2883,1.703556,1


In [24]:
df_deslc.to_excel(path_deslc.replace('csv.gzip', 'xlsx'))

## RJ

In [25]:
df_deslc_rj = df_deslc[df_deslc['uf'] == 'RJ']
df_deslc_rj

Unnamed: 0,periodo,tipo_parto,cnes,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,mun_diff,regsau_diff,regiao,...,fronteira,amazonia,socioeconomico,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo,nascimentos
11,antes,normal,2280248,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.903000,-43.208000,0.0000,0.000000,6738
18,antes,normal,2270609,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.903000,-43.208000,0.0000,0.000000,6250
21,depois,normal,7027397,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.903000,-43.208000,0.0000,0.000000,5991
22,depois,normal,2270609,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.903000,-43.208000,0.0000,0.000000,5986
24,depois,normal,2798662,330350,330350,3305,3305,False,False,Sudeste,...,False,False,5,-22.759001,-43.451,-22.759001,-43.451000,0.0000,0.000000,5973
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121177,antes,normal,2296241,330455,330430,3305,3302,True,True,Sudeste,...,False,False,5,-22.903000,-43.208,-22.708000,-42.625999,75.5213,0.979028,1
121178,antes,normal,5042488,330455,330330,3305,3305,True,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.882999,-43.104000,17.8948,0.306417,1
121180,antes,normal,6146376,330455,330185,3305,3305,True,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.537001,-42.981998,73.8015,0.981694,1
121181,antes,normal,2280868,330455,330010,3305,3301,True,True,Sudeste,...,False,False,5,-22.903000,-43.208,-23.007000,-44.318001,154.9166,2.006583,1


In [26]:
df_deslc_rj.to_excel(path_deslc.replace('.csv.gzip', '_rj.xlsx'), index=False)

In [27]:
df_deslc_rj['periodo'].value_counts()

depois    2127
antes     1337
Name: periodo, dtype: int64

In [29]:
df_deslc_rj['tipo_parto'].value_counts()

normal       1887
cesariano    1577
Name: tipo_parto, dtype: int64