# COVID-19 Vaccine Rollout Score
Prompt: Construct a score for how well countries are doing at their vaccine rollout for COVID-19.

In [30]:
# Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import scipy.stats as st
from sklearn import linear_model
from sklearn.metrics import r2_score

In [3]:
# Import dataset
df = pd.read_csv('../data/country_vaccinations.csv')
df.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,,Oxford/AstraZeneca,Government of Afghanistan,http://www.xinhuanet.com/english/asiapacific/2...
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,35.0,Oxford/AstraZeneca,Government of Afghanistan,http://www.xinhuanet.com/english/asiapacific/2...
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,35.0,Oxford/AstraZeneca,Government of Afghanistan,http://www.xinhuanet.com/english/asiapacific/2...
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,35.0,Oxford/AstraZeneca,Government of Afghanistan,http://www.xinhuanet.com/english/asiapacific/2...
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,35.0,Oxford/AstraZeneca,Government of Afghanistan,http://www.xinhuanet.com/english/asiapacific/2...


## Total Vaccines Distributed and People Fully Vaccinated

In [18]:
# Group dataset by country and pull last date for each country
df_countries = df.groupby('country').max()['date'].reset_index()
df_countries.head()

Unnamed: 0,country,date
0,Afghanistan,2021-03-16
1,Albania,2021-03-27
2,Algeria,2021-02-19
3,Andorra,2021-03-19
4,Angola,2021-03-24


In [12]:
# Merge the last date by country dataset with the original dataset to have one dataset with one entry per country (last date only)
df_last_date = pd.merge(df, df_countries, how='inner', left_on=['country', 'date'], right_on=['country', 'date'])
df_last_date.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-03-16,54000.0,54000.0,,,2862.0,0.14,0.14,,74.0,Oxford/AstraZeneca,Government of Afghanistan,http://www.xinhuanet.com/english/asiapacific/2...
1,Albania,ALB,2021-03-27,64075.0,,,1827.0,3113.0,2.23,,,1082.0,Pfizer/BioNTech,Ministry of Health,https://coronavirus.al/lajme/covid19-ministria...
2,Algeria,DZA,2021-02-19,75000.0,,,,3748.0,0.17,,,85.0,Sputnik V,Ministry of Health,https://www.echoroukonline.com/%d9%84%d9%82%d8...
3,Andorra,AND,2021-03-19,9288.0,,,,428.0,12.02,,,5539.0,Pfizer/BioNTech,Government of Andorra,https://www.govern.ad/comunicats/item/12558-el...
4,Angola,AGO,2021-03-24,87022.0,87022.0,,,6044.0,0.26,0.26,,184.0,Oxford/AstraZeneca,Ministry of Health,https://www.angop.ao/en/noticias/saude/covid-1...


In [17]:
df_vax_distr = df_last_date[['country', 'total_vaccinations_per_hundred', 'people_fully_vaccinated_per_hundred']]
df_vax_distr.sort_values(by='people_fully_vaccinated_per_hundred', ascending=False).head(20)

Unnamed: 0,country,total_vaccinations_per_hundred,people_fully_vaccinated_per_hundred
48,Gibraltar,171.1,79.26
65,Israel,114.7,54.4
125,Seychelles,98.15,33.95
24,Cayman Islands,67.04,23.52
88,Monaco,46.07,21.23
18,Bermuda,54.93,21.12
25,Chile,50.45,17.04
148,United States,41.91,14.99
12,Bahrain,42.55,14.45
124,Serbia,33.55,13.47


## Rate of Vaccination

In [41]:
df_dates = df.set_index('date').sort_values(by='date')
df_dates.index = pd.to_datetime(df_dates.index)
df_dates['days_from_start'] = (df_dates.index - df_dates.index[0]).days
df_dates

Unnamed: 0_level_0,country,iso_code,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website,days_from_start
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-12-13,Scotland,OWID_SCT,19009.0,19009.0,,,,0.35,0.35,,,"Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/health...,0
2020-12-13,United Kingdom,GBR,86465.0,86465.0,,,,0.13,0.13,,,"Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/health...,0
2020-12-13,Wales,OWID_WLS,8257.0,8257.0,,,,0.26,0.26,,,"Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/health...,0
2020-12-13,Northern Ireland,OWID_NIR,3623.0,3623.0,,,,0.19,0.19,,,"Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/health...,0
2020-12-13,England,OWID_ENG,55576.0,55576.0,,,,0.10,0.10,,,"Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/health...,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-27,Latvia,LVA,126292.0,104379.0,21913.0,492.0,3372.0,6.70,5.53,1.16,1788.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",National Health Service,https://data.gov.lv/dati/eng/dataset/covid19-v...,104
2021-03-27,Turkey,TUR,14652659.0,8217124.0,6435535.0,161412.0,231844.0,17.37,9.74,7.63,2749.0,"Pfizer/BioNTech, Sinovac",COVID-19 Vaccine Information Platform,https://covid19asi.saglik.gov.tr/,104
2021-03-27,Italy,ITA,9210893.0,6290823.0,2920070.0,175240.0,206937.0,15.23,10.40,4.83,3423.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",Extraordinary commissioner for the Covid-19 em...,https://github.com/italia/covid19-opendata-vac...,104
2021-03-27,Indonesia,IDN,10425690.0,7190663.0,3235027.0,12866.0,370048.0,3.81,2.63,1.18,1353.0,Sinovac,Ministry of Health,https://www.kemkes.go.id/article/view/21030300...,104


In [65]:
# This function determines the linear regression coefficients for a given country in a dataframe
# df = dataframe
# country = desired country (string)
# column_1 = column name (string) that will be the x axis
# column_2 = column name (string) that will be the y axis

def determine_slope(df, country, column_1, column_2):
    # Filter DataFrame to desired country
    df_f = df[[column_1, column_2]].loc[df['country']==country].dropna(subset=[column_2])
    if len(df_f) > 1:
        # Define x and y
        x = df_f[column_1].values.reshape(-1,1)
        y = df_f[column_2]

        #Create linear regression object
        regr = linear_model.LinearRegression()

        # Train linear regression model
        regr.fit(x, y)

        # Make predictions
        y_pred = regr.predict(x)

        # Calculate coefficients
        coefficients = {'country': country, 'r2_score': r2_score(y, y_pred), 'rate': regr.coef_[0]}


    else:
        coefficients = {'country': country, 'r2_score': 0, 'rate': 0}
    return(coefficients)     

In [66]:
countries = list(df_dates['country'].unique())
all_coefs = [determine_slope(df_dates, country, 'days_from_start', 'people_fully_vaccinated_per_hundred') for country in countries]

In [70]:
df_coefs_fully = pd.DataFrame(all_coefs)
df_coefs_fully

Unnamed: 0,country,r2_score,rate
0,Scotland,0.797383,0.060308
1,United Kingdom,0.704878,0.038576
2,Wales,0.784124,0.164320
3,Northern Ireland,0.694973,0.047005
4,England,0.647956,0.030076
...,...,...,...
149,Malawi,0.000000,0.000000
150,Bahamas,0.000000,0.000000
151,Namibia,0.000000,0.000000
152,Guinea,0.000000,0.000000


In [71]:
df_coefs_fully.loc[(df_coefs['r2_score'] < .5) & (df_coefs['rate'] >0)]

Unnamed: 0,country,r2_score,rate


In [72]:
people_coefs = [determine_slope(df_dates, country, 'days_from_start', 'people_vaccinated_per_hundred') for country in countries]
df_coefs_people = pd.DataFrame(people_coefs)
df_coefs_people

Unnamed: 0,country,r2_score,rate
0,Scotland,0.964396,0.515474
1,United Kingdom,0.979414,0.509600
2,Wales,0.967615,0.505771
3,Northern Ireland,0.978591,0.453379
4,England,0.980338,0.520777
...,...,...,...
149,Malawi,0.870942,0.044074
150,Bahamas,0.000000,0.000000
151,Namibia,1.000000,0.001429
152,Guinea,0.000000,0.000000


In [74]:
df_coefs_people.loc[(df_coefs_people['r2_score'] < .5) & (df_coefs_people['rate'] >0)]

Unnamed: 0,country,r2_score,rate
