In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from zipfile import ZipFile
from datatable import dt, f, by
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3
from plotly import express as px, io as pio

pd.options.plotting.backend = 'plotly'
pio.renderers.default = 'plotly_mimetype+notebook_connected'

# Bases

## SINASC

In [2]:
path_sinasc_sus = f'{Path.home()}/Databases/SINASC/sinasc_sus.csv.gzip'
df_sinasc = pd.read_csv(path_sinasc_sus, low_memory=False)
df_sinasc

Unnamed: 0,ano,cnes,hosp_municipio,res_municipio,hosp_regiao_saude,res_regiao_saude,parto_normal,n_gestados,n_pre_natal,idade,nivel_escolaridade,raca_cor,nasc_raca_cor,sexo_fem,nasc_peso,nasc_apgar1,nasc_apgar5,periodo
0,2010,2798484,110030,120040,1103,1201,True,1,4,20,3,,Branca,False,3550.0,8.0,9.0,antes
1,2010,5701929,120001,120001,1201,1201,True,1,3,21,4,,Parda,True,3000.0,9.0,10.0,antes
2,2010,5701929,120001,120001,1201,1201,True,1,3,31,3,,Parda,True,3000.0,9.0,10.0,antes
3,2010,5701929,120001,120001,1201,1201,True,1,4,23,3,,Parda,False,3900.0,7.0,9.0,antes
4,2010,5701929,120001,120001,1201,1201,True,1,4,26,4,,Parda,True,3250.0,7.0,10.0,antes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22773577,2019,2361787,520110,170730,5211,1710,False,1,3,18,3,Parda,Parda,True,2840.0,8.0,9.0,depois
22773578,2019,2338564,520870,171110,5201,1704,False,1,4,27,4,Parda,Parda,True,3082.0,8.0,9.0,depois
22773579,2019,10537,530010,170240,5301,1709,False,1,3,36,5,,Ignorado,,2870.0,8.0,9.0,depois
22773580,2019,5717515,530010,170610,5301,1710,False,1,3,21,4,Parda,Parda,False,2554.0,8.0,9.0,depois


In [3]:
df_sinasc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22773582 entries, 0 to 22773581
Data columns (total 18 columns):
 #   Column              Dtype  
---  ------              -----  
 0   ano                 int64  
 1   cnes                int64  
 2   hosp_municipio      int64  
 3   res_municipio       int64  
 4   hosp_regiao_saude   int64  
 5   res_regiao_saude    int64  
 6   parto_normal        bool   
 7   n_gestados          int64  
 8   n_pre_natal         int64  
 9   idade               int64  
 10  nivel_escolaridade  int64  
 11  raca_cor            object 
 12  nasc_raca_cor       object 
 13  sexo_fem            object 
 14  nasc_peso           float64
 15  nasc_apgar1         float64
 16  nasc_apgar5         float64
 17  periodo             object 
dtypes: bool(1), float64(3), int64(10), object(4)
memory usage: 2.9+ GB


## Municípios

In [4]:
path_muns = f'{Path.home()}/Databases/MUNICIPIOS/municipios.csv.gzip'
df_muns = pd.read_csv(path_muns)
df_muns

Unnamed: 0,cod_ibge,regiao,uf,cod_uf,nome_uf,municipio,nome,capital,fronteira,amazonia,macroregiao_saude,regiao_saude,microregiao_saude,latitude,longitude,altitude,area
0,110000,Norte,RO,11,RONDONIA,Município ignorado - RO,MUNICIPIO IGNORADO - RO,False,False,False,1100,1100,11000,0.000000,0.000000,0.0,0.000000
1,110001,Norte,RO,11,RONDONIA,Alta Floresta D'Oeste,ALTA FLORESTA D'OESTE,False,True,True,1190,1102,11900,-11.929000,-61.995998,350.0,7066.702148
2,110002,Norte,RO,11,RONDONIA,Ariquemes,ARIQUEMES,False,False,True,1190,1104,11900,-9.913000,-63.041000,142.0,4426.558105
3,110003,Norte,RO,11,RONDONIA,Cabixi,CABIXI,False,True,True,1190,1103,11900,-13.492000,-60.544998,230.0,1314.354980
4,110004,Norte,RO,11,RONDONIA,Cacoal,CACOAL,False,False,True,1190,1102,11900,-11.438000,-61.448002,200.0,3792.637939
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4789,522200,Centro-Oeste,GO,52,GOIAS,Vianópolis,VIANOPOLIS,False,False,False,5201,5202,52900,-16.742001,-48.515999,1002.0,954.278992
4790,522205,Centro-Oeste,GO,52,GOIAS,Vicentinópolis,VICENTINOPOLIS,False,False,False,5201,5202,52900,-17.735001,-49.806000,646.0,737.250977
4791,522220,Centro-Oeste,GO,52,GOIAS,Vila Boa,VILA BOA,False,False,False,5202,5206,52900,-15.038000,-47.058998,0.0,1060.170044
4792,522230,Centro-Oeste,GO,52,GOIAS,Vila Propício,VILA PROPICIO,False,False,False,5203,5212,52900,-15.457000,-48.889000,744.0,2181.574951


In [5]:
df_muns.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4794 entries, 0 to 4793
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   cod_ibge           4794 non-null   int64  
 1   regiao             4794 non-null   object 
 2   uf                 4794 non-null   object 
 3   cod_uf             4794 non-null   int64  
 4   nome_uf            4794 non-null   object 
 5   municipio          4794 non-null   object 
 6   nome               4794 non-null   object 
 7   capital            4794 non-null   bool   
 8   fronteira          4794 non-null   bool   
 9   amazonia           4794 non-null   bool   
 10  macroregiao_saude  4794 non-null   int64  
 11  regiao_saude       4794 non-null   int64  
 12  microregiao_saude  4794 non-null   int64  
 13  latitude           4790 non-null   float64
 14  longitude          4790 non-null   float64
 15  altitude           4790 non-null   float64
 16  area               4790 

## Distâncias

In [6]:
path_dist = f'{Path.home()}/Databases/DISTANCIAS/matriz_distancias.zip'
df_dist = pd.read_csv(path_dist, low_memory=False)
df_dist

Unnamed: 0,origem,destino,distancia,tempo
0,110001,110001,0.000000,0.000000
1,110001,110002,309.050000,6.169056
2,110001,110003,399.499700,6.289056
3,110001,110004,81.201103,1.917750
4,110001,110005,391.704300,6.136361
...,...,...,...,...
31024895,530010,522200,161.728900,2.356000
31024896,530010,522205,382.708800,5.479556
31024897,530010,522220,161.603400,2.248944
31024898,530010,522230,190.000900,3.216083


In [7]:
df_dist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31024900 entries, 0 to 31024899
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   origem     int64  
 1   destino    int64  
 2   distancia  float64
 3   tempo      float64
dtypes: float64(2), int64(2)
memory usage: 946.8 MB


## Indicador SocioEconômico

In [8]:
path_socioeco = f'{Path.home()}/Databases/HEALTH/health_regions.csv'
df_socioeco = pd.read_csv(path_socioeco)
df_socioeco

Unnamed: 0,UF,Município,Cód IBGE,Cód Região de Saúde,Nome da Região de Saúde,Grupo
0,AC,Acrelândia,120001,12002,Baixo Acre e Purus,2
1,AC,Assis Brasil,120005,12001,Alto Acre,1
2,AC,Brasiléia,120010,12001,Alto Acre,1
3,AC,Bujari,120013,12002,Baixo Acre e Purus,2
4,AC,Capixaba,120017,12002,Baixo Acre e Purus,2
...,...,...,...,...,...,...
5566,TO,Tocantinópolis,172120,17002,Bico do Papagaio,1
5567,TO,Tupirama,172125,17004,Cerrado Tocantins Araguaia,2
5568,TO,Tupiratins,172130,17004,Cerrado Tocantins Araguaia,2
5569,TO,Wanderlândia,172208,17001,Médio Norte Araguaia,3


In [9]:
df_socioeco[['Cód IBGE', 'Grupo']]

Unnamed: 0,Cód IBGE,Grupo
0,120001,2
1,120005,1
2,120010,1
3,120013,2
4,120017,2
...,...,...
5566,172120,1
5567,172125,2
5568,172130,2
5569,172208,3


# Tabela Deslocamento

In [10]:
def merge_lat_lon(df_left, df_right, left_on, right_on):
  prefix = left_on.split('_')[0]
  df = pd.merge(df_left, df_right, how='left', left_on=left_on, right_on=right_on)
  df = df.rename(columns={
    'latitude': f'{prefix}_latitude',
    'longitude': f'{prefix}_longitude'})
  df = df.drop(right_on, axis=1)
  return df

In [11]:
cols = ['periodo', 'parto_normal', 'res_municipio', 'hosp_municipio', 'res_regiao_saude', 'hosp_regiao_saude', 'cnes']
df_deslc = df_sinasc[cols]
df_deslc = df_deslc[df_deslc['periodo'] != 'durante']
df_deslc = df_deslc.groupby(
  cols, as_index=False
).size().sort_values('size', ascending=False, ignore_index=True)
df_deslc = df_deslc.rename(columns={'size': 'nascimentos'})
df_deslc

Unnamed: 0,periodo,parto_normal,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos
0,antes,True,130260,130260,1301,1301,3151794,11011
1,antes,True,355030,355030,3501,3501,2077388,10426
2,depois,False,500270,500270,5011,5011,9768,10012
3,depois,True,140010,140010,1401,1401,2566168,9647
4,antes,True,292740,292740,2901,2901,3956369,9109
...,...,...,...,...,...,...,...,...
169160,antes,True,510622,510267,5114,5107,2396106,1
169161,antes,True,510621,510340,5104,5101,2659107,1
169162,antes,True,510621,510340,5104,5101,2311682,1
169163,antes,True,510621,510025,5104,5104,2471345,1


In [12]:
df_deslc['periodo'].value_counts()

antes     85023
depois    84142
Name: periodo, dtype: int64

In [13]:
df_deslc['mun_diff'] = df_deslc['res_municipio'] != df_deslc['hosp_municipio']
df_deslc['regsau_diff'] = df_deslc['res_regiao_saude'] != df_deslc['hosp_regiao_saude']
df_deslc

Unnamed: 0,periodo,parto_normal,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos,mun_diff,regsau_diff
0,antes,True,130260,130260,1301,1301,3151794,11011,False,False
1,antes,True,355030,355030,3501,3501,2077388,10426,False,False
2,depois,False,500270,500270,5011,5011,9768,10012,False,False
3,depois,True,140010,140010,1401,1401,2566168,9647,False,False
4,antes,True,292740,292740,2901,2901,3956369,9109,False,False
...,...,...,...,...,...,...,...,...,...,...
169160,antes,True,510622,510267,5114,5107,2396106,1,True,True
169161,antes,True,510621,510340,5104,5101,2659107,1,True,True
169162,antes,True,510621,510340,5104,5101,2311682,1,True,True
169163,antes,True,510621,510025,5104,5104,2471345,1,True,False


In [14]:
res_cols = [
  'cod_ibge',
  'regiao',
  'uf',
  'capital',
  'fronteira',
  'amazonia',
  'latitude',
  'longitude',]

hosp_cols = [
  'cod_ibge',
  'latitude',
  'longitude',]

In [15]:
df_deslc = merge_lat_lon(df_deslc, df_muns[res_cols], 'res_municipio', 'cod_ibge')
df_deslc = merge_lat_lon(df_deslc, df_muns[hosp_cols], 'hosp_municipio', 'cod_ibge')
df_deslc

Unnamed: 0,periodo,parto_normal,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos,mun_diff,regsau_diff,regiao,uf,capital,fronteira,amazonia,res_latitude,res_longitude,hosp_latitude,hosp_longitude
0,antes,True,130260,130260,1301,1301,3151794,11011,False,False,Norte,AM,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002
1,antes,True,355030,355030,3501,3501,2077388,10426,False,False,Sudeste,SP,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002
2,depois,False,500270,500270,5011,5011,9768,10012,False,False,Centro-Oeste,MS,True,False,False,-20.443001,-54.646000,-20.443001,-54.646000
3,depois,True,140010,140010,1401,1401,2566168,9647,False,False,Norte,RR,True,True,True,2.820000,-60.673000,2.820000,-60.673000
4,antes,True,292740,292740,2901,2901,3956369,9109,False,False,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169163,antes,True,510622,510267,5114,5107,2396106,1,True,True,Centro-Oeste,MT,False,False,True,-13.838000,-56.084000,-15.547000,-55.168999
169164,antes,True,510621,510340,5104,5101,2659107,1,True,True,Centro-Oeste,MT,False,False,True,-10.558000,-55.952999,-15.596000,-56.097000
169165,antes,True,510621,510340,5104,5101,2311682,1,True,True,Centro-Oeste,MT,False,False,True,-10.558000,-55.952999,-15.596000,-56.097000
169166,antes,True,510621,510025,5104,5104,2471345,1,True,False,Centro-Oeste,MT,False,False,True,-10.558000,-55.952999,-9.876000,-56.085999


In [16]:
df_deslc = pd.merge(df_deslc, df_dist, how='left', left_on=['res_municipio', 'hosp_municipio'], right_on=['origem', 'destino'])
df_deslc = df_deslc.drop(['origem', 'destino'], axis=1)
df_deslc

Unnamed: 0,periodo,parto_normal,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos,mun_diff,regsau_diff,...,uf,capital,fronteira,amazonia,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo
0,antes,True,130260,130260,1301,1301,3151794,11011,False,False,...,AM,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000
1,antes,True,355030,355030,3501,3501,2077388,10426,False,False,...,SP,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000
2,depois,False,500270,500270,5011,5011,9768,10012,False,False,...,MS,True,False,False,-20.443001,-54.646000,-20.443001,-54.646000,0.0000,0.000000
3,depois,True,140010,140010,1401,1401,2566168,9647,False,False,...,RR,True,True,True,2.820000,-60.673000,2.820000,-60.673000,0.0000,0.000000
4,antes,True,292740,292740,2901,2901,3956369,9109,False,False,...,,,,,,,,,0.0000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169163,antes,True,510622,510267,5114,5107,2396106,1,True,True,...,MT,False,False,True,-13.838000,-56.084000,-15.547000,-55.168999,391.9461,4.769250
169164,antes,True,510621,510340,5104,5101,2659107,1,True,True,...,MT,False,False,True,-10.558000,-55.952999,-15.596000,-56.097000,701.7827,9.122222
169165,antes,True,510621,510340,5104,5101,2311682,1,True,True,...,MT,False,False,True,-10.558000,-55.952999,-15.596000,-56.097000,701.7827,9.122222
169166,antes,True,510621,510025,5104,5104,2471345,1,True,False,...,MT,False,False,True,-10.558000,-55.952999,-9.876000,-56.085999,117.8282,1.871250


In [17]:
df_deslc = pd.merge(df_deslc, df_socioeco[['Cód IBGE', 'Grupo']], how='left', left_on='res_municipio', right_on='Cód IBGE')
df_deslc = df_deslc.rename(columns={'Grupo': 'socioeconomico'})
df_deslc = df_deslc.drop(['Cód IBGE'], axis=1)
df_deslc

Unnamed: 0,periodo,parto_normal,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos,mun_diff,regsau_diff,...,capital,fronteira,amazonia,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo,socioeconomico
0,antes,True,130260,130260,1301,1301,3151794,11011,False,False,...,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000,4.0
1,antes,True,355030,355030,3501,3501,2077388,10426,False,False,...,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,5.0
2,depois,False,500270,500270,5011,5011,9768,10012,False,False,...,True,False,False,-20.443001,-54.646000,-20.443001,-54.646000,0.0000,0.000000,4.0
3,depois,True,140010,140010,1401,1401,2566168,9647,False,False,...,True,True,True,2.820000,-60.673000,2.820000,-60.673000,0.0000,0.000000,2.0
4,antes,True,292740,292740,2901,2901,3956369,9109,False,False,...,,,,,,,,0.0000,0.000000,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169163,antes,True,510622,510267,5114,5107,2396106,1,True,True,...,False,False,True,-13.838000,-56.084000,-15.547000,-55.168999,391.9461,4.769250,2.0
169164,antes,True,510621,510340,5104,5101,2659107,1,True,True,...,False,False,True,-10.558000,-55.952999,-15.596000,-56.097000,701.7827,9.122222,2.0
169165,antes,True,510621,510340,5104,5101,2311682,1,True,True,...,False,False,True,-10.558000,-55.952999,-15.596000,-56.097000,701.7827,9.122222,2.0
169166,antes,True,510621,510025,5104,5104,2471345,1,True,False,...,False,False,True,-10.558000,-55.952999,-9.876000,-56.085999,117.8282,1.871250,2.0


## Tabela Final

In [18]:
path_deslc = f'{Path.home()}/Databases/GESTANTES/deslocamento.csv.gzip'
df_deslc.to_csv(path_deslc, index=False)

In [19]:
pd.read_csv(path_deslc, low_memory=False)


Columns (12,13,14) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,periodo,parto_normal,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,cnes,nascimentos,mun_diff,regsau_diff,...,capital,fronteira,amazonia,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo,socioeconomico
0,antes,True,130260,130260,1301,1301,3151794,11011,False,False,...,True,False,True,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000,4.0
1,antes,True,355030,355030,3501,3501,2077388,10426,False,False,...,True,False,False,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,5.0
2,depois,False,500270,500270,5011,5011,9768,10012,False,False,...,True,False,False,-20.443001,-54.646000,-20.443001,-54.646000,0.0000,0.000000,4.0
3,depois,True,140010,140010,1401,1401,2566168,9647,False,False,...,True,True,True,2.820000,-60.673000,2.820000,-60.673000,0.0000,0.000000,2.0
4,antes,True,292740,292740,2901,2901,3956369,9109,False,False,...,,,,,,,,0.0000,0.000000,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169163,antes,True,510622,510267,5114,5107,2396106,1,True,True,...,False,False,True,-13.838000,-56.084000,-15.547000,-55.168999,391.9461,4.769250,2.0
169164,antes,True,510621,510340,5104,5101,2659107,1,True,True,...,False,False,True,-10.558000,-55.952999,-15.596000,-56.097000,701.7827,9.122222,2.0
169165,antes,True,510621,510340,5104,5101,2311682,1,True,True,...,False,False,True,-10.558000,-55.952999,-15.596000,-56.097000,701.7827,9.122222,2.0
169166,antes,True,510621,510025,5104,5104,2471345,1,True,False,...,False,False,True,-10.558000,-55.952999,-9.876000,-56.085999,117.8282,1.871250,2.0


In [28]:
df_deslc = df_deslc[[
  'periodo', 'parto_normal', 'cnes',
  'res_municipio', 'hosp_municipio',
  'res_regiao_saude', 'hosp_regiao_saude',
  'mun_diff', 'regsau_diff',
  'regiao', 'uf',
  'capital', 'fronteira', 'amazonia', 'socioeconomico',
  'res_latitude', 'res_longitude',
  'hosp_latitude', 'hosp_longitude',
  'distancia', 'tempo',
  'nascimentos']]
df_deslc

Unnamed: 0,periodo,parto_normal,cnes,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,mun_diff,regsau_diff,regiao,...,fronteira,amazonia,socioeconomico,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo,nascimentos
0,antes,True,3151794,130260,130260,1301,1301,False,False,Norte,...,False,True,4.0,-3.102000,-60.025002,-3.102000,-60.025002,0.0000,0.000000,11011
1,antes,True,2077388,355030,355030,3501,3501,False,False,Sudeste,...,False,False,5.0,-23.548000,-46.636002,-23.548000,-46.636002,0.0000,0.000000,10426
2,depois,False,9768,500270,500270,5011,5011,False,False,Centro-Oeste,...,False,False,4.0,-20.443001,-54.646000,-20.443001,-54.646000,0.0000,0.000000,10012
3,depois,True,2566168,140010,140010,1401,1401,False,False,Norte,...,True,True,2.0,2.820000,-60.673000,2.820000,-60.673000,0.0000,0.000000,9647
4,antes,True,3956369,292740,292740,2901,2901,False,False,,...,,,5.0,,,,,0.0000,0.000000,9109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169163,antes,True,2396106,510622,510267,5114,5107,True,True,Centro-Oeste,...,False,True,2.0,-13.838000,-56.084000,-15.547000,-55.168999,391.9461,4.769250,1
169164,antes,True,2659107,510621,510340,5104,5101,True,True,Centro-Oeste,...,False,True,2.0,-10.558000,-55.952999,-15.596000,-56.097000,701.7827,9.122222,1
169165,antes,True,2311682,510621,510340,5104,5101,True,True,Centro-Oeste,...,False,True,2.0,-10.558000,-55.952999,-15.596000,-56.097000,701.7827,9.122222,1
169166,antes,True,2471345,510621,510025,5104,5104,True,False,Centro-Oeste,...,False,True,2.0,-10.558000,-55.952999,-9.876000,-56.085999,117.8282,1.871250,1


In [29]:
df_deslc.to_csv(path_deslc, index=False)
df_deslc.to_excel(path_deslc.replace('csv.gzip', 'xlsx'))

## RJ

In [30]:
df_deslc_rj = df_deslc[df_deslc['uf'] == 'RJ']
df_deslc_rj

Unnamed: 0,periodo,parto_normal,cnes,res_municipio,hosp_municipio,res_regiao_saude,hosp_regiao_saude,mun_diff,regsau_diff,regiao,...,fronteira,amazonia,socioeconomico,res_latitude,res_longitude,hosp_latitude,hosp_longitude,distancia,tempo,nascimentos
12,antes,True,2295407,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5.0,-22.903000,-43.208000,-22.903000,-43.208000,0.0000,0.000000,7914
16,depois,True,2270609,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5.0,-22.903000,-43.208000,-22.903000,-43.208000,0.0000,0.000000,7303
19,antes,True,2280248,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5.0,-22.903000,-43.208000,-22.903000,-43.208000,0.0000,0.000000,7121
25,antes,True,2270609,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5.0,-22.903000,-43.208000,-22.903000,-43.208000,0.0000,0.000000,6855
28,depois,True,2280248,330455,330455,3305,3305,False,False,Sudeste,...,False,False,5.0,-22.903000,-43.208000,-22.903000,-43.208000,0.0000,0.000000,6568
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159578,antes,True,2288109,330555,330260,3305,3305,True,False,Sudeste,...,False,False,5.0,-22.743999,-43.708000,-22.959999,-44.041000,49.0472,0.669389,1
159579,antes,True,2281589,330555,330227,3305,3305,True,False,Sudeste,...,False,False,5.0,-22.743999,-43.708000,-22.643000,-43.653000,18.2023,0.390139,1
159580,antes,True,2270714,330550,330455,3302,3305,True,True,Sudeste,...,False,False,4.0,-22.920000,-42.509998,-22.903000,-43.208000,88.6640,1.501028,1
159581,antes,True,2269783,330550,330455,3302,3305,True,True,Sudeste,...,False,False,4.0,-22.920000,-42.509998,-22.903000,-43.208000,88.6640,1.501028,1


In [31]:
df_deslc_rj.to_excel(path_deslc.replace('.csv.gzip', '_rj.xlsx'), index=False)

In [32]:
df_deslc_rj['periodo'].value_counts()

antes     2602
depois    2455
Name: periodo, dtype: int64