In [1]:
# libraries 
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta

# data directory
data_dir = "../../basins/"

# list of countries
countries = ['Sri Lanka', 'El Salvador', 'Morocco', 'Bolivia', 'Honduras',
             'Philippines', 'Indonesia', 'Pakistan', 'Rwanda', 'Bangladesh',
             'Kyrgyzstan', 'Egypt', 'Mozambique', 'Afghanistan', 'Angola',
             'Ghana', 'Zambia', "Côte d'Ivoire", 'Kenya', 'Uganda']


pop_countries_scenarios = {"Italy": 59.55 * 10**6, "United Kingdom": 67.22 * 10**6, "United States": 329.5 * 10**6,
                           'European Union': 447007596, "Israel": 9.217 * 10**6}


def get_doses_scenario(df_dose, country, country_scenario, rescale, column):
    
    if rescale == True:
        N = pd.read_csv(os.path.join(data_dir, country, 'demographic/Nk_10.csv'))["value"].sum()
    else:
        N = pop_countries_scenarios[country_scenario]

    df_country = df_dose.loc[df_dose.Entity == country_scenario]
    df_country.index = pd.to_datetime(df_country.Day)
    df_country = df_country.resample("D")[[column]].mean()
    #df_country.fillna(method='ffill', inplace=True)
    df_country.interpolate(method='linear', inplace=True)

    if column == "people_vaccinated_per_hundred":
        dose = "one"
    else:
        dose = "two"
        
    df_country[dose + "_dose_cumulative"] = df_country[column] / 100 * N
    df_country[dose + "_dose_daily"] = df_country[dose + "_dose_cumulative"].diff().fillna(method='bfill')
    
    return df_country


def get_doses_country_scenario(country, country_scenario, rescale):
    one_dose_country = get_doses_scenario(one_dose, country, country_scenario, rescale, "people_vaccinated_per_hundred")
    two_dose_country = get_doses_scenario(two_dose, country, country_scenario, rescale, "people_fully_vaccinated_per_hundred")
    dose_country = pd.merge(one_dose_country, two_dose_country, on='Day', how='outer')
    dose_country.fillna(value=0.0, inplace=True)
    if dose_country.shape[0] == 0:
        print('Country not found:', country)
    return dose_country


# import data
one_dose = pd.read_csv("./share-people-vaccinated-covid.csv")
two_dose = pd.read_csv("./share-people-fully-vaccinated-covid.csv")
two_dose.head()

Unnamed: 0,Entity,Code,Day,people_fully_vaccinated_per_hundred,145609-annotations
0,Afghanistan,AFG,2021-05-11,0.14,
1,Afghanistan,AFG,2021-05-20,0.19,
2,Afghanistan,AFG,2021-05-24,0.24,
3,Afghanistan,AFG,2021-05-26,0.28,
4,Afghanistan,AFG,2021-05-27,0.29,


In [2]:
for country in countries:

    df_ita = get_doses_country_scenario(country, "Italy", False)
    df_ita.to_csv(os.path.join(data_dir, country, 'vaccinations/vaccinations_ita.csv'))


    df_ita_rescale = get_doses_country_scenario(country, "Italy", True)
    df_ita_rescale.to_csv(os.path.join(data_dir, country, 'vaccinations/vaccinations_ita_rescale.csv'))


    df_uk = get_doses_country_scenario(country, "United Kingdom", False)
    df_uk.to_csv(os.path.join(data_dir, country, 'vaccinations/vaccinations_uk.csv'))


    df_uk_rescale = get_doses_country_scenario(country, "United Kingdom", True)
    df_uk_rescale.to_csv(os.path.join(data_dir, country, 'vaccinations/vaccinations_uk_rescale.csv'))


    df_us = get_doses_country_scenario(country, "United States", False)
    df_us.to_csv(os.path.join(data_dir, country, 'vaccinations/vaccinations_us.csv'))


    df_us_rescale = get_doses_country_scenario(country, "United States", True)
    df_us_rescale.to_csv(os.path.join(data_dir, country, 'vaccinations/vaccinations_us_rescale.csv'))


    df_eu_rescale = get_doses_country_scenario(country, "European Union", True)
    df_eu_rescale.to_csv(os.path.join(data_dir, country, 'vaccinations/vaccinations_eu_rescale.csv'))


    df_isrl_rescale = get_doses_country_scenario(country, "Israel", True)
    df_isrl_rescale.to_csv(os.path.join(data_dir, country, 'vaccinations/vaccinations_isrl_rescale.csv'))