In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from zipfile import ZipFile
from datatable import dt, f, by
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3
from plotly import express as px, io as pio

pd.options.plotting.backend = 'plotly'
pio.renderers.default = 'plotly_mimetype+notebook_connected'

# Fontes de dados

## Procedimentos SIH

In [None]:
output_proc = f'{Path.home()}/Databases/GESTANTES/procedimentos_age.csv.gzip'
df_proc = dt.fread(output_proc)

In [2]:
path_sihsus = f'{Path.home()}/Databases/GESTANTES/procedimentos.csv.gzip'
df_sihsus = pd.read_csv(path_sihsus)
df_sihsus

Unnamed: 0,ano,periodo,tipo_parto,cnes,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count
0,2010,antes,normal,2077701,355030,3501,355030,3501,4358
1,2010,antes,normal,2077388,355030,3501,355030,3501,4207
2,2010,antes,normal,3151794,130260,1301,130260,1301,4145
3,2010,antes,normal,2079186,355030,3501,355030,3501,4059
4,2010,antes,normal,3956369,292740,2901,292740,2901,3935
...,...,...,...,...,...,...,...,...,...
461271,2019,depois,cesariano,9923837,270430,2701,355640,3520,1
461272,2019,depois,cesariano,9923837,270430,2701,420910,4213,1
461273,2019,depois,cesariano,9923837,270430,2701,510285,5109,1
461274,2019,depois,cesariano,9923837,270430,2701,510622,5114,1


In [3]:
df_sihsus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 461276 entries, 0 to 461275
Data columns (total 9 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   ano                 461276 non-null  int64 
 1   periodo             461276 non-null  object
 2   tipo_parto          461276 non-null  object
 3   cnes                461276 non-null  int64 
 4   hosp_cod_municipio  461276 non-null  int64 
 5   hosp_regiao_saude   461276 non-null  int64 
 6   cod_municipio       461276 non-null  int64 
 7   regiao_saude        461276 non-null  int64 
 8   count               461276 non-null  int64 
dtypes: int64(7), object(2)
memory usage: 31.7+ MB


### Criando coluna periodo

In [None]:
df_proc['periodo'] = df_proc['ano'].apply(
  lambda x: 'antes' if (x == 2010) | (x == 2011) else ('depois' if (x == 2018) | (x == 2019) else 'durante'))
df_proc

Unnamed: 0,ano,tipo_parto,cnes,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,idade,count,periodo
0,2011,normal,6627595,130260,1301,130260,1301,22,519,antes
1,2012,normal,6627595,130260,1301,130260,1301,27,449,durante
2,2011,normal,6627595,130260,1301,130260,1301,21,429,antes
3,2012,normal,2077388,355030,3501,355030,3501,20,404,durante
4,2011,cesariano,6627595,130260,1301,130260,1301,22,402,antes
...,...,...,...,...,...,...,...,...,...,...
3355686,2019,cesariano,9923837,270430,2701,355640,3520,29,1,depois
3355687,2019,cesariano,9923837,270430,2701,420910,4213,24,1,depois
3355688,2019,cesariano,9923837,270430,2701,510285,5109,28,1,depois
3355689,2019,cesariano,9923837,270430,2701,510622,5114,29,1,depois


## Municípios

In [4]:
path_muns = f'{Path.home()}/Databases/GESTANTES/municipios.csv.gzip'
df_muns = pd.read_csv(path_muns)
df_muns

Unnamed: 0,cod_municipio,regiao,uf,cod_uf,nome_uf,municipio,nome,capital,fronteira,amazonia,macroregiao_saude,regiao_saude,microregiao_saude,latitude,longitude,altitude,area,count
0,355030,Sudeste,SP,35,SAO PAULO,São Paulo,SAO PAULO,S,N,N,3590,3501,35900,-23.5480,-46.6360,760.0,1522.986,806352
1,330455,Sudeste,RJ,33,RIO DE JANEIRO,Rio de Janeiro,RIO DE JANEIRO,S,N,N,3390,3305,33900,-22.9030,-43.2080,2.0,1182.296,368216
2,130260,Norte,AM,13,AMAZONAS,Manaus,MANAUS,S,N,S,1390,1301,13900,-3.1020,-60.0250,92.0,11401.058,297036
3,230440,Nordeste,CE,23,CEARA,Fortaleza,FORTALEZA,S,N,N,2301,2301,23900,-3.7170,-38.5430,21.0,313.140,203370
4,292740,Nordeste,BA,29,BAHIA,Salvador,SALVADOR,S,N,N,2902,2901,29021,-12.9710,-38.5110,8.0,706.799,169924
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5564,520020,Centro-Oeste,GO,52,GOIAS,Água Limpa,AGUA LIMPA,N,N,N,5205,5216,52900,-18.0740,-48.7620,682.0,452.856,25
5565,510120,Centro-Oeste,MT,51,MATO GROSSO,Araguainha,ARAGUAINHA,N,N,S,5190,5107,51900,-16.8560,-53.0330,462.0,688.676,23
5566,350640,Sudeste,SP,35,SAO PAULO,Bilac,BILAC,N,N,N,3590,3506,35900,-21.4030,-50.4710,431.0,157.278,18
5567,500627,Centro-Oeste,MS,50,MATO GROSSO DO SUL,Paraíso das Águas,PARAISO DAS AGUAS,N,N,N,0,0,0,-19.0216,-53.0116,0.0,5032.469,15


In [5]:
df_muns.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5569 entries, 0 to 5568
Data columns (total 18 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   cod_municipio      5569 non-null   int64  
 1   regiao             5569 non-null   object 
 2   uf                 5569 non-null   object 
 3   cod_uf             5569 non-null   int64  
 4   nome_uf            5569 non-null   object 
 5   municipio          5569 non-null   object 
 6   nome               5569 non-null   object 
 7   capital            5569 non-null   object 
 8   fronteira          5569 non-null   object 
 9   amazonia           5569 non-null   object 
 10  macroregiao_saude  5569 non-null   int64  
 11  regiao_saude       5569 non-null   int64  
 12  microregiao_saude  5569 non-null   int64  
 13  latitude           5565 non-null   float64
 14  longitude          5565 non-null   float64
 15  altitude           5565 non-null   float64
 16  area               5565 

## Distâncias

In [6]:
path_dist = f'{Path.home()}/Databases/DISTANCIAS/matriz_distancias.zip'
df_dist = pd.read_csv(path_dist, low_memory=False)
df_dist

Unnamed: 0,origem,destino,distancia,tempo
0,110001,110001,0.000000,0.000000
1,110001,110002,309.050000,6.169056
2,110001,110003,399.499700,6.289056
3,110001,110004,81.201103,1.917750
4,110001,110005,391.704300,6.136361
...,...,...,...,...
31024895,530010,522200,161.728900,2.356000
31024896,530010,522205,382.708800,5.479556
31024897,530010,522220,161.603400,2.248944
31024898,530010,522230,190.000900,3.216083


In [7]:
df_dist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31024900 entries, 0 to 31024899
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   origem     int64  
 1   destino    int64  
 2   distancia  float64
 3   tempo      float64
dtypes: float64(2), int64(2)
memory usage: 946.8 MB


## Indicador SocioEconômico

In [8]:
path_socioeco = f'{Path.home()}/Databases/HEALTH/health_regions.csv'
df_socioeco = pd.read_csv(path_socioeco)
df_socioeco

Unnamed: 0,UF,Município,Cód IBGE,Cód Região de Saúde,Nome da Região de Saúde,Grupo
0,AC,Acrelândia,120001,12002,Baixo Acre e Purus,2
1,AC,Assis Brasil,120005,12001,Alto Acre,1
2,AC,Brasiléia,120010,12001,Alto Acre,1
3,AC,Bujari,120013,12002,Baixo Acre e Purus,2
4,AC,Capixaba,120017,12002,Baixo Acre e Purus,2
...,...,...,...,...,...,...
5566,TO,Tocantinópolis,172120,17002,Bico do Papagaio,1
5567,TO,Tupirama,172125,17004,Cerrado Tocantins Araguaia,2
5568,TO,Tupiratins,172130,17004,Cerrado Tocantins Araguaia,2
5569,TO,Wanderlândia,172208,17001,Médio Norte Araguaia,3


In [9]:
df_socioeco[['Cód IBGE', 'Grupo']]

Unnamed: 0,Cód IBGE,Grupo
0,120001,2
1,120005,1
2,120010,1
3,120013,2
4,120017,2
...,...,...
5566,172120,1
5567,172125,2
5568,172130,2
5569,172208,3


# Tabela Deslocamento

In [72]:
cols_sih = df_sihsus.columns[1:]
df_deslc = df_sihsus[cols_sih]
df_deslc = df_deslc[df_deslc['periodo'] != 'durante']
df_deslc

Unnamed: 0,periodo,tipo_parto,cnes,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count
0,antes,normal,2077701,355030,3501,355030,3501,4358
1,antes,normal,2077388,355030,3501,355030,3501,4207
2,antes,normal,3151794,130260,1301,130260,1301,4145
3,antes,normal,2079186,355030,3501,355030,3501,4059
4,antes,normal,3956369,292740,2901,292740,2901,3935
...,...,...,...,...,...,...,...,...
461271,depois,cesariano,9923837,270430,2701,355640,3520,1
461272,depois,cesariano,9923837,270430,2701,420910,4213,1
461273,depois,cesariano,9923837,270430,2701,510285,5109,1
461274,depois,cesariano,9923837,270430,2701,510622,5114,1


In [73]:
df_deslc = df_deslc.groupby(
  by=list(cols_sih[:-1]), as_index=False
).sum().sort_values('count', ascending=False, ignore_index=True)
df_deslc

Unnamed: 0,periodo,tipo_parto,cnes,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count
0,antes,normal,3151794,130260,1301,130260,1301,9330
1,antes,normal,2077701,355030,3501,355030,3501,8989
2,antes,normal,2077388,355030,3501,355030,3501,8722
3,antes,normal,2079186,355030,3501,355030,3501,8594
4,depois,normal,3151794,130260,1301,130260,1301,8524
...,...,...,...,...,...,...,...,...
124774,depois,cesariano,2101432,315340,3107,314800,3107,1
124775,depois,cesariano,2101777,313630,3107,310020,3114,1
124776,depois,cesariano,2101777,313630,3107,311580,3125,1
124777,depois,cesariano,2101777,313630,3107,311680,3104,1


In [74]:
df_deslc['periodo'].value_counts()

depois    65116
antes     59663
Name: periodo, dtype: int64

In [75]:
df_deslc['mun_diff'] = df_deslc['cod_municipio'] != df_deslc['hosp_cod_municipio']
df_deslc['regsau_diff'] = df_deslc['regiao_saude'] != df_deslc['hosp_regiao_saude']
df_deslc

Unnamed: 0,periodo,tipo_parto,cnes,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count,mun_diff,regsau_diff
0,antes,normal,3151794,130260,1301,130260,1301,9330,False,False
1,antes,normal,2077701,355030,3501,355030,3501,8989,False,False
2,antes,normal,2077388,355030,3501,355030,3501,8722,False,False
3,antes,normal,2079186,355030,3501,355030,3501,8594,False,False
4,depois,normal,3151794,130260,1301,130260,1301,8524,False,False
...,...,...,...,...,...,...,...,...,...,...
124774,depois,cesariano,2101432,315340,3107,314800,3107,1,True,False
124775,depois,cesariano,2101777,313630,3107,310020,3114,1,True,True
124776,depois,cesariano,2101777,313630,3107,311580,3125,1,True,True
124777,depois,cesariano,2101777,313630,3107,311680,3104,1,True,True


In [76]:
res_cols = [
  'cod_municipio',
  'regiao',
  'uf',
  'capital',
  'fronteira',
  'amazonia',
  'latitude',
  'longitude',]

hosp_cols = [
  'cod_municipio',
  'latitude',
  'longitude',]

In [77]:
def merge_lat_lon(df_left, df_right, left_on, right_on):
  prefix = left_on.split('_')[0]
  df = pd.merge(df_left, df_right, how='left', left_on=left_on, right_on=right_on)
  if prefix == 'hosp':
    df = df.rename(columns={
      'latitude': f'{prefix}_latitude',
      'longitude': f'{prefix}_longitude'})
  df = df.rename(columns={'cod_municipio_x': right_on})
  return df

In [78]:
df_deslc_res = merge_lat_lon(df_deslc, df_muns[res_cols], 'cod_municipio', 'cod_municipio')
df_deslc_hosp = merge_lat_lon(df_deslc, df_muns[hosp_cols], 'hosp_cod_municipio', 'cod_municipio')
df_deslc_res[['hosp_latitude', 'hosp_longitude']] = df_deslc_hosp[['hosp_latitude', 'hosp_longitude']]
df_deslc = df_deslc_res
df_deslc

Unnamed: 0,periodo,tipo_parto,cnes,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count,mun_diff,regsau_diff,regiao,uf,capital,fronteira,amazonia,latitude,longitude,hosp_latitude,hosp_longitude
0,antes,normal,3151794,130260,1301,130260,1301,9330,False,False,Norte,AM,S,N,S,-3.102,-60.025,-3.102,-60.025
1,antes,normal,2077701,355030,3501,355030,3501,8989,False,False,Sudeste,SP,S,N,N,-23.548,-46.636,-23.548,-46.636
2,antes,normal,2077388,355030,3501,355030,3501,8722,False,False,Sudeste,SP,S,N,N,-23.548,-46.636,-23.548,-46.636
3,antes,normal,2079186,355030,3501,355030,3501,8594,False,False,Sudeste,SP,S,N,N,-23.548,-46.636,-23.548,-46.636
4,depois,normal,3151794,130260,1301,130260,1301,8524,False,False,Norte,AM,S,N,S,-3.102,-60.025,-3.102,-60.025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124774,depois,cesariano,2101432,315340,3107,314800,3107,1,True,False,Sudeste,MG,N,N,N,-18.579,-46.518,-18.418,-46.418
124775,depois,cesariano,2101777,313630,3107,310020,3114,1,True,True,Sudeste,MG,N,N,N,-19.160,-45.446,-17.743,-46.173
124776,depois,cesariano,2101777,313630,3107,311580,3125,1,True,True,Sudeste,MG,N,N,N,-18.584,-49.199,-17.743,-46.173
124777,depois,cesariano,2101777,313630,3107,311680,3104,1,True,True,Sudeste,MG,N,N,N,-18.234,-42.840,-17.743,-46.173


In [79]:
df_deslc = pd.merge(df_deslc, df_dist, how='left', left_on=['cod_municipio', 'hosp_cod_municipio'], right_on=['origem', 'destino'])
df_deslc = df_deslc.drop(['origem', 'destino'], axis=1)
df_deslc

Unnamed: 0,periodo,tipo_parto,cnes,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count,mun_diff,regsau_diff,...,uf,capital,fronteira,amazonia,latitude,longitude,hosp_latitude,hosp_longitude,distancia,tempo
0,antes,normal,3151794,130260,1301,130260,1301,9330,False,False,...,AM,S,N,S,-3.102,-60.025,-3.102,-60.025,0.0000,0.000000
1,antes,normal,2077701,355030,3501,355030,3501,8989,False,False,...,SP,S,N,N,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000
2,antes,normal,2077388,355030,3501,355030,3501,8722,False,False,...,SP,S,N,N,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000
3,antes,normal,2079186,355030,3501,355030,3501,8594,False,False,...,SP,S,N,N,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000
4,depois,normal,3151794,130260,1301,130260,1301,8524,False,False,...,AM,S,N,S,-3.102,-60.025,-3.102,-60.025,0.0000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124774,depois,cesariano,2101432,315340,3107,314800,3107,1,True,False,...,MG,N,N,N,-18.579,-46.518,-18.418,-46.418,28.4474,0.531889
124775,depois,cesariano,2101777,313630,3107,310020,3114,1,True,True,...,MG,N,N,N,-19.160,-45.446,-17.743,-46.173,373.7197,4.552083
124776,depois,cesariano,2101777,313630,3107,311580,3125,1,True,True,...,MG,N,N,N,-18.584,-49.199,-17.743,-46.173,555.0323,6.626444
124777,depois,cesariano,2101777,313630,3107,311680,3104,1,True,True,...,MG,N,N,N,-18.234,-42.840,-17.743,-46.173,656.4867,7.842056


In [80]:
df_deslc = pd.merge(df_deslc, df_socioeco[['Cód IBGE', 'Grupo']], how='left', left_on='cod_municipio', right_on='Cód IBGE')
df_deslc = df_deslc.rename(columns={'Grupo': 'socioeconomico'})
df_deslc = df_deslc.drop(['Cód IBGE'], axis=1)
df_deslc

Unnamed: 0,periodo,tipo_parto,cnes,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count,mun_diff,regsau_diff,...,capital,fronteira,amazonia,latitude,longitude,hosp_latitude,hosp_longitude,distancia,tempo,socioeconomico
0,antes,normal,3151794,130260,1301,130260,1301,9330,False,False,...,S,N,S,-3.102,-60.025,-3.102,-60.025,0.0000,0.000000,4
1,antes,normal,2077701,355030,3501,355030,3501,8989,False,False,...,S,N,N,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000,5
2,antes,normal,2077388,355030,3501,355030,3501,8722,False,False,...,S,N,N,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000,5
3,antes,normal,2079186,355030,3501,355030,3501,8594,False,False,...,S,N,N,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000,5
4,depois,normal,3151794,130260,1301,130260,1301,8524,False,False,...,S,N,S,-3.102,-60.025,-3.102,-60.025,0.0000,0.000000,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124774,depois,cesariano,2101432,315340,3107,314800,3107,1,True,False,...,N,N,N,-18.579,-46.518,-18.418,-46.418,28.4474,0.531889,3
124775,depois,cesariano,2101777,313630,3107,310020,3114,1,True,True,...,N,N,N,-19.160,-45.446,-17.743,-46.173,373.7197,4.552083,3
124776,depois,cesariano,2101777,313630,3107,311580,3125,1,True,True,...,N,N,N,-18.584,-49.199,-17.743,-46.173,555.0323,6.626444,5
124777,depois,cesariano,2101777,313630,3107,311680,3104,1,True,True,...,N,N,N,-18.234,-42.840,-17.743,-46.173,656.4867,7.842056,3


## Tabela Final

In [81]:
path_deslc = f'{Path.home()}/Databases/GESTANTES/deslocamento.csv.gzip'
df_deslc.to_csv(path_deslc, index=False)

In [82]:
pd.read_csv(path_deslc, low_memory=False)

Unnamed: 0,periodo,tipo_parto,cnes,hosp_cod_municipio,hosp_regiao_saude,cod_municipio,regiao_saude,count,mun_diff,regsau_diff,...,capital,fronteira,amazonia,latitude,longitude,hosp_latitude,hosp_longitude,distancia,tempo,socioeconomico
0,antes,normal,3151794,130260,1301,130260,1301,9330,False,False,...,S,N,S,-3.102,-60.025,-3.102,-60.025,0.0000,0.000000,4
1,antes,normal,2077701,355030,3501,355030,3501,8989,False,False,...,S,N,N,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000,5
2,antes,normal,2077388,355030,3501,355030,3501,8722,False,False,...,S,N,N,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000,5
3,antes,normal,2079186,355030,3501,355030,3501,8594,False,False,...,S,N,N,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000,5
4,depois,normal,3151794,130260,1301,130260,1301,8524,False,False,...,S,N,S,-3.102,-60.025,-3.102,-60.025,0.0000,0.000000,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124774,depois,cesariano,2101432,315340,3107,314800,3107,1,True,False,...,N,N,N,-18.579,-46.518,-18.418,-46.418,28.4474,0.531889,3
124775,depois,cesariano,2101777,313630,3107,310020,3114,1,True,True,...,N,N,N,-19.160,-45.446,-17.743,-46.173,373.7197,4.552083,3
124776,depois,cesariano,2101777,313630,3107,311580,3125,1,True,True,...,N,N,N,-18.584,-49.199,-17.743,-46.173,555.0323,6.626444,5
124777,depois,cesariano,2101777,313630,3107,311680,3104,1,True,True,...,N,N,N,-18.234,-42.840,-17.743,-46.173,656.4867,7.842056,3


In [83]:
df_deslc = df_deslc[[
  'periodo', 'tipo_parto', 'cnes',
  'cod_municipio', 'hosp_cod_municipio',
  'regiao_saude', 'hosp_regiao_saude',
  'mun_diff', 'regsau_diff',
  'regiao', 'uf',
  'capital', 'fronteira', 'amazonia', 'socioeconomico',
  'latitude', 'longitude',
  'hosp_latitude', 'hosp_longitude',
  'distancia', 'tempo',
  'count']]
df_deslc

Unnamed: 0,periodo,tipo_parto,cnes,cod_municipio,hosp_cod_municipio,regiao_saude,hosp_regiao_saude,mun_diff,regsau_diff,regiao,...,fronteira,amazonia,socioeconomico,latitude,longitude,hosp_latitude,hosp_longitude,distancia,tempo,count
0,antes,normal,3151794,130260,130260,1301,1301,False,False,Norte,...,N,S,4,-3.102,-60.025,-3.102,-60.025,0.0000,0.000000,9330
1,antes,normal,2077701,355030,355030,3501,3501,False,False,Sudeste,...,N,N,5,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000,8989
2,antes,normal,2077388,355030,355030,3501,3501,False,False,Sudeste,...,N,N,5,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000,8722
3,antes,normal,2079186,355030,355030,3501,3501,False,False,Sudeste,...,N,N,5,-23.548,-46.636,-23.548,-46.636,0.0000,0.000000,8594
4,depois,normal,3151794,130260,130260,1301,1301,False,False,Norte,...,N,S,4,-3.102,-60.025,-3.102,-60.025,0.0000,0.000000,8524
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124774,depois,cesariano,2101432,314800,315340,3107,3107,True,False,Sudeste,...,N,N,3,-18.579,-46.518,-18.418,-46.418,28.4474,0.531889,1
124775,depois,cesariano,2101777,310020,313630,3114,3107,True,True,Sudeste,...,N,N,3,-19.160,-45.446,-17.743,-46.173,373.7197,4.552083,1
124776,depois,cesariano,2101777,311580,313630,3125,3107,True,True,Sudeste,...,N,N,5,-18.584,-49.199,-17.743,-46.173,555.0323,6.626444,1
124777,depois,cesariano,2101777,311680,313630,3104,3107,True,True,Sudeste,...,N,N,3,-18.234,-42.840,-17.743,-46.173,656.4867,7.842056,1


In [84]:
df_deslc.to_excel(path_deslc.replace('csv.gzip', 'xlsx'))

## RJ

In [25]:
df_deslc_rj = df_deslc[df_deslc['uf'] == 'RJ']
df_deslc_rj

Unnamed: 0,periodo,tipo_parto,cnes,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,mun_diff,regsau_diff,regiao,...,fronteira,amazonia,socioeconomico,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo,nascimentos
11,antes,normal,2280248,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.903000,-43.208000,0.0000,0.000000,6738
18,antes,normal,2270609,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.903000,-43.208000,0.0000,0.000000,6250
21,depois,normal,7027397,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.903000,-43.208000,0.0000,0.000000,5991
22,depois,normal,2270609,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.903000,-43.208000,0.0000,0.000000,5986
24,depois,normal,2798662,330350,330350,3305,3305,False,False,Sudeste,...,False,False,5,-22.759001,-43.451,-22.759001,-43.451000,0.0000,0.000000,5973
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121177,antes,normal,2296241,330455,330430,3305,3302,True,True,Sudeste,...,False,False,5,-22.903000,-43.208,-22.708000,-42.625999,75.5213,0.979028,1
121178,antes,normal,5042488,330455,330330,3305,3305,True,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.882999,-43.104000,17.8948,0.306417,1
121180,antes,normal,6146376,330455,330185,3305,3305,True,False,Sudeste,...,False,False,5,-22.903000,-43.208,-22.537001,-42.981998,73.8015,0.981694,1
121181,antes,normal,2280868,330455,330010,3305,3301,True,True,Sudeste,...,False,False,5,-22.903000,-43.208,-23.007000,-44.318001,154.9166,2.006583,1


In [26]:
df_deslc_rj.to_excel(path_deslc.replace('.csv.gzip', '_rj.xlsx'), index=False)

In [27]:
df_deslc_rj['periodo'].value_counts()

depois    2127
antes     1337
Name: periodo, dtype: int64

In [29]:
df_deslc_rj['tipo_parto'].value_counts()

normal       1887
cesariano    1577
Name: tipo_parto, dtype: int64