# Reproducir datos

In [1]:
import pandas as pd

In [2]:
URL_VACUNAS = "https://github.com/sociedatos/covid19-bo-vacunas_por_departamento/raw/master/vaccinations.csv"
URL_HABITANTES = "https://raw.githubusercontent.com/pr0nstar/covid19-pruebas/master/data/bolivia.population.depts.2021.csv"
COLUMNS_TO_RENAME = {'Potosi': 'Potosí'}
DEPT_ORDER = ['Bolivia', 'La Paz', 'Cochabamba', 'Santa Cruz', 'Oruro', 'Potosí', 'Chuquisaca', 'Tarija', 'Beni', 'Pando']

In [3]:
def prepare_data():
    "Read and format source data"
    
    # Datos de vacunas
    vac = pd.read_csv(URL_VACUNAS, header=[0,1], parse_dates=[0], index_col=[0])
    vac = vac.rename(columns=COLUMNS_TO_RENAME)
    
    # Datos de población
    habitantes = pd.read_csv(URL_HABITANTES, index_col=[0])
    habitantes['Bolivia'] = habitantes.sum(axis=1)
    habitantes = habitantes[DEPT_ORDER]
    
    # Format dose column names
    vac = vac.stack(level=[0])
    vac.columns = ['anuales', 'primeras bidosis', 'segundas bidosis', 'terceras', 'unicas']
    vac['al menos una'] = vac['primeras bidosis'] + vac['unicas']
    vac['segunda o unica'] = vac['segundas bidosis'] + vac['unicas']
    
    # Format region column names
    vac = vac.unstack().stack(level=[0])
    vac.index = vac.index.rename(['fecha', 'dosis'])
    vac['Bolivia'] = vac.sum(axis=1)
    vac = vac[DEPT_ORDER]
    
    # Produce useful views
    acumulado = vac
    diario = vac.groupby(level=['dosis']).diff().dropna()
    percapita = vac.div(habitantes.sum())
    pervacunable = vac.div(habitantes.iloc[5:].sum())
    
    return [df.reset_index(level=1) for df in [acumulado, diario, percapita, pervacunable]]

In [4]:
def save_data(df, sufix, directory='datos'):
    """
    Saves a csv file in `directory` for values 
    corresponding to each dose type in column `dosis`.
    Filenames are formatted to be compatible with 
    those specified previously by dquintani, and 
    are composed of a string for each dose type
    and a sufix.
    """
    
    # Map dose types and their filename components
    filename_dosis = {
        'al menos una': 'al_menos_una_dosis',
        'primeras bidosis': 'primeras_bidosis',
        'segunda o unica': 'segunda_o_unica',
        'segundas bidosis': 'segundas_bidosis',
        'terceras': 'dosis_refuerzo1',
        'unicas': 'unicas',
        'anuales': 'dosis_refuerzo2_anuales'
    }
    
    # For each dose type, format its filename and save data    
    for dosis, dfi in df.groupby('dosis'):
        filename = '{}/{}_{}.csv'.format(directory, filename_dosis[dosis], sufix)
        dfi[DEPT_ORDER].fillna(0).astype(int).to_csv(filename)

In [5]:
acumulado, diario, percapita, pervacunable = prepare_data()

## Datos

In [6]:
acumulado

Unnamed: 0_level_0,dosis,Bolivia,La Paz,Cochabamba,Santa Cruz,Oruro,Potosí,Chuquisaca,Tarija,Beni,Pando
fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-02-02,al menos una,1589.0,12.0,278.0,647.0,26.0,138.0,122.0,90.0,6.0,270.0
2021-02-02,primeras bidosis,1589.0,12.0,278.0,647.0,26.0,138.0,122.0,90.0,6.0,270.0
2021-02-02,segunda o unica,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-02-02,segundas bidosis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-02-02,terceras,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
2022-06-08,primeras bidosis,6213152.0,1511357.0,1197294.0,1778665.0,323863.0,376145.0,358579.0,363447.0,227957.0,75845.0
2022-06-08,segunda o unica,5981423.0,1453543.0,1158172.0,1769438.0,297040.0,366784.0,346261.0,347152.0,186202.0,56831.0
2022-06-08,segundas bidosis,4982771.0,1175463.0,986508.0,1427075.0,264383.0,290271.0,307023.0,309196.0,166021.0,56831.0
2022-06-08,terceras,1578531.0,371626.0,336770.0,428678.0,70395.0,89223.0,111686.0,108682.0,42112.0,19359.0


In [7]:
diario

Unnamed: 0_level_0,dosis,Bolivia,La Paz,Cochabamba,Santa Cruz,Oruro,Potosí,Chuquisaca,Tarija,Beni,Pando
fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-02-03,al menos una,1971.0,349.0,409.0,617.0,80.0,65.0,181.0,110.0,42.0,118.0
2021-02-03,primeras bidosis,1971.0,349.0,409.0,617.0,80.0,65.0,181.0,110.0,42.0,118.0
2021-02-03,segunda o unica,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-02-03,segundas bidosis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-02-03,terceras,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
2022-06-08,primeras bidosis,1357.0,262.0,263.0,581.0,43.0,42.0,61.0,45.0,55.0,5.0
2022-06-08,segunda o unica,1786.0,398.0,352.0,703.0,68.0,80.0,57.0,71.0,50.0,7.0
2022-06-08,segundas bidosis,1786.0,398.0,352.0,703.0,68.0,80.0,57.0,71.0,50.0,7.0
2022-06-08,terceras,8216.0,1650.0,1833.0,3389.0,172.0,241.0,398.0,379.0,134.0,20.0


In [8]:
percapita

Unnamed: 0_level_0,dosis,Bolivia,La Paz,Cochabamba,Santa Cruz,Oruro,Potosí,Chuquisaca,Tarija,Beni,Pando
fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-02-02,al menos una,0.000134,0.000004,0.000133,0.000192,0.000047,0.000152,0.000187,0.000152,0.000012,0.001702
2021-02-02,primeras bidosis,0.000134,0.000004,0.000133,0.000192,0.000047,0.000152,0.000187,0.000152,0.000012,0.001702
2021-02-02,segunda o unica,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2021-02-02,segundas bidosis,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2021-02-02,terceras,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
2022-06-08,primeras bidosis,0.524673,0.499822,0.573711,0.528833,0.590412,0.414400,0.548257,0.614109,0.449535,0.477987
2022-06-08,segunda o unica,0.505104,0.480702,0.554964,0.526090,0.541513,0.404087,0.529423,0.586576,0.367194,0.358158
2022-06-08,segundas bidosis,0.420773,0.388738,0.472708,0.424298,0.481978,0.319792,0.469429,0.522442,0.327396,0.358158
2022-06-08,terceras,0.133300,0.122901,0.161371,0.127455,0.128332,0.098297,0.170765,0.183638,0.083046,0.122003


In [9]:
pervacunable

Unnamed: 0_level_0,dosis,Bolivia,La Paz,Cochabamba,Santa Cruz,Oruro,Potosí,Chuquisaca,Tarija,Beni,Pando
fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-02-02,al menos una,0.000149,0.000004,0.000148,0.000215,0.000052,0.000170,0.000208,0.000168,0.000014,0.001941
2021-02-02,primeras bidosis,0.000149,0.000004,0.000148,0.000215,0.000052,0.000170,0.000208,0.000168,0.000014,0.001941
2021-02-02,segunda o unica,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2021-02-02,segundas bidosis,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2021-02-02,terceras,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
2022-06-08,primeras bidosis,0.583101,0.549067,0.636553,0.591757,0.647924,0.463810,0.610118,0.678675,0.514649,0.545220
2022-06-08,segunda o unica,0.561354,0.528064,0.615753,0.588687,0.594262,0.452268,0.589159,0.648247,0.420380,0.408536
2022-06-08,segundas bidosis,0.467631,0.427039,0.524486,0.474784,0.528928,0.357922,0.522396,0.577371,0.374819,0.408536
2022-06-08,terceras,0.148144,0.135010,0.179047,0.142620,0.140833,0.110018,0.190032,0.202945,0.095074,0.139164


## Serialización

In [10]:
save_data(diario, 'diario')
save_data(acumulado, 'acumulado')
save_data(percapita, 'percapita')
save_data(pervacunable, 'pervacunable')