In [2]:
import pandas as pd
import numpy as np
from datetime import datetime

df = pd.read_csv('../data/external/vacinacao-se.csv', sep=';', parse_dates=['paciente_datanascimento', 'vacina_dataaplicacao'])
age_groups = [(0, 19), (20, 39), (40, 59), (60, 200)]

In [3]:
df = df.assign(vacinacao_completa=(df.vacina_descricao_dose == '2ª Dose').where((df.vacina_fabricante_nome != 'JANSSEN'), True))
df_municipios = df['estabelecimento_municipio_nome'].drop_duplicates().to_frame('municipios')

In [4]:
dfs = []
for i in range(1, 13):
    date = datetime.strptime('01-' + str(i) + '-2021', '%d-%m-%Y')
    
    df_ages = df_municipios
    for group in age_groups:
        temp1_F = df[   (df['paciente_idade'] > group[0]) &
                        (df['paciente_idade'] < group[1]) &
                        (df['vacina_dataaplicacao'] < date) &
                        (df['vacinacao_completa'] == False) &
                        (df['paciente_enumsexobiologico'] == 'F')].groupby(['estabelecimento_municipio_nome']).size().to_frame('vacinados_parcial_F_' + str(group[1]))
        df_ages = df_ages.merge(temp1_F, how='left', left_on='municipios', right_on='estabelecimento_municipio_nome').fillna(0)

        temp1_M = df[   (df['paciente_idade'] > group[0]) &
                        (df['paciente_idade'] < group[1]) &
                        (df['vacina_dataaplicacao'] < date) &
                        (df['vacinacao_completa'] == False) &
                        (df['paciente_enumsexobiologico'] == 'M')].groupby(['estabelecimento_municipio_nome']).size().to_frame('vacinados_parcial_M_' + str(group[1]))
        df_ages = df_ages.merge(temp1_M, how='left', left_on='municipios', right_on='estabelecimento_municipio_nome').fillna(0)

        temp2_F = df[   (df['paciente_idade'] > group[0]) &
                        (df['paciente_idade'] < group[1]) &
                        (df['vacina_dataaplicacao'] < date) &
                        (df['vacinacao_completa'] == True) &
                        (df['paciente_enumsexobiologico'] == 'F')].groupby(['estabelecimento_municipio_nome']).size().to_frame('vacinados_completo_F_' + str(group[1]))
        df_ages = df_ages.merge(temp2_F, how='left', left_on='municipios', right_on='estabelecimento_municipio_nome').fillna(0)

        temp2_M = df[   (df['paciente_idade'] > group[0]) &
                        (df['paciente_idade'] < group[1]) &
                        (df['vacina_dataaplicacao'] < date) &
                        (df['vacinacao_completa'] == True) &
                        (df['paciente_enumsexobiologico'] == 'M')].groupby(['estabelecimento_municipio_nome']).size().to_frame('vacinados_completo_M_' + str(group[1]))
        df_ages = df_ages.merge(temp2_M, how='left', left_on='municipios', right_on='estabelecimento_municipio_nome').fillna(0)

    df_ages['date'] = date

    dfs.append(df_ages)
final_df = pd.concat(dfs)

In [5]:
final_df

Unnamed: 0,municipios,vacinados_parcial_F_19,vacinados_parcial_M_19,vacinados_completo_F_19,vacinados_completo_M_19,vacinados_parcial_F_39,vacinados_parcial_M_39,vacinados_completo_F_39,vacinados_completo_M_39,vacinados_parcial_F_59,vacinados_parcial_M_59,vacinados_completo_F_59,vacinados_completo_M_59,vacinados_parcial_F_200,vacinados_parcial_M_200,vacinados_completo_F_200,vacinados_completo_M_200,date
0,SALGADO,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-01-01
1,LAGARTO,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-01-01
2,PORTO DA FOLHA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-01-01
3,LARANJEIRAS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-01-01
4,ARACAJU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,CUMBE,147.0,145.0,3.0,11.0,484.0,460.0,289.0,248.0,479.0,443.0,382.0,349.0,358.0,271.0,274.0,218.0,2021-12-01
71,DIVINA PASTORA,266.0,245.0,14.0,17.0,632.0,549.0,422.0,322.0,552.0,505.0,487.0,439.0,297.0,276.0,245.0,227.0,2021-12-01
72,TELHA,126.0,145.0,9.0,3.0,429.0,363.0,253.0,208.0,359.0,362.0,310.0,287.0,179.0,158.0,141.0,130.0,2021-12-01
73,CEDRO DE SAO JOAO,227.0,217.0,6.0,4.0,755.0,575.0,401.0,274.0,641.0,610.0,549.0,494.0,487.0,387.0,415.0,339.0,2021-12-01


In [7]:
final_df.to_csv('../data/processed/final_vacinacao.csv', index=False)