## Data Description

For this task, I used data from the **Our World in Data (OWID)** repository and the **United Nations World Population Prospects**. Both the vaccination data and world population data were collected from OWID github repository which references the UN World Population Prosects dataset for its population data. The vaccination data is updated daily thus running this script/ notebook daily should output different results

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read in the vaccinations data from OWID github repo
vaccinations = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv", error_bad_lines=False)

In [3]:
vaccinations.head()

Unnamed: 0,location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,35.0
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,35.0
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,35.0
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,35.0


In [4]:
# Convert the data to datetime format
vaccinations['date'] = pd.to_datetime(vaccinations['date'])

In [5]:
# Group data by location and get the id of the most recent updated date entry
idx_max = vaccinations.groupby(['location'])['date'].idxmax()

In [6]:
# Filter the dataframe to remain with the most recent vaccination entry
latest_vaccination_data = vaccinations.loc[idx_max].reset_index(drop=True)

In [7]:
latest_vaccination_data.head()

Unnamed: 0,location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million
0,Afghanistan,AFG,2021-04-22,240000.0,240000.0,,,8000.0,0.62,0.62,,206.0
1,Africa,OWID_AFR,2021-04-28,17593073.0,12566761.0,4819465.0,388919.0,151841.0,1.31,0.94,0.36,113.0
2,Albania,ALB,2021-04-28,433628.0,,,16773.0,8793.0,15.07,,,3055.0
3,Algeria,DZA,2021-02-19,75000.0,,,,3748.0,0.17,,,85.0
4,Andorra,AND,2021-04-19,26414.0,21733.0,4681.0,,1050.0,34.19,28.13,6.06,13590.0


In [8]:
# Read in the population data from OWID repo
population_data_2020 = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/scripts/input/un/population_2020.csv')

In [9]:
population_data_2020.head()

Unnamed: 0,entity,iso_code,year,population
0,Afghanistan,AFG,2020,38928341
1,Albania,ALB,2020,2877800
2,Algeria,DZA,2020,43851043
3,American Samoa,ASM,2020,55197
4,Andorra,AND,2020,77265


In [10]:
# Merge the vaccinations and the population data based on the country
vaccination_population = pd.merge(latest_vaccination_data, population_data_2020, how='left', left_on='location', right_on='entity')

In [11]:
vaccination_population.head()

Unnamed: 0,location,iso_code_x,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,entity,iso_code_y,year,population
0,Afghanistan,AFG,2021-04-22,240000.0,240000.0,,,8000.0,0.62,0.62,,206.0,Afghanistan,AFG,2020.0,38928340.0
1,Africa,OWID_AFR,2021-04-28,17593073.0,12566761.0,4819465.0,388919.0,151841.0,1.31,0.94,0.36,113.0,Africa,OWID_AFR,2020.0,1340598000.0
2,Albania,ALB,2021-04-28,433628.0,,,16773.0,8793.0,15.07,,,3055.0,Albania,ALB,2020.0,2877800.0
3,Algeria,DZA,2021-02-19,75000.0,,,,3748.0,0.17,,,85.0,Algeria,DZA,2020.0,43851040.0
4,Andorra,AND,2021-04-19,26414.0,21733.0,4681.0,,1050.0,34.19,28.13,6.06,13590.0,Andorra,AND,2020.0,77265.0


In [12]:
# Calculate the percentage population that is vaccinated i.e. has received at least one vaccine dose
vaccination_population['% population vaccinated'] = vaccination_population['people_vaccinated'] / vaccination_population['population'] * 100

In [13]:
# Display countries with the portion of the population vaccinated and sorted
countries = vaccination_population.sort_values(by='% population vaccinated', ascending=False)[['location', 'iso_code_x', 'date', 'people_vaccinated', 'population', '% population vaccinated']]

In [14]:
countries

Unnamed: 0,location,iso_code_x,date,people_vaccinated,population,% population vaccinated
70,Gibraltar,GIB,2021-04-28,37478.0,33691.0,111.240391
61,Falkland Islands,FLK,2021-04-14,2632.0,3483.0,75.567040
162,Seychelles,SYC,2021-04-26,66583.0,98340.0,67.706935
87,Isle of Man,IMN,2021-04-28,55945.0,85032.0,65.792878
88,Israel,ISR,2021-04-28,5399137.0,8655541.0,62.377811
...,...,...,...,...,...,...
158,Saudi Arabia,SAU,2021-04-28,,34813867.0,
159,Scotland,OWID_SCT,2021-04-28,2796810.0,,
180,Taiwan,TWN,2021-04-28,,23816775.0,
191,United Arab Emirates,ARE,2021-04-28,,9890400.0,


In [15]:
# Filter for the top 3 countries with the highest portion of the population vaccinated
countries[:3]

Unnamed: 0,location,iso_code_x,date,people_vaccinated,population,% population vaccinated
70,Gibraltar,GIB,2021-04-28,37478.0,33691.0,111.240391
61,Falkland Islands,FLK,2021-04-14,2632.0,3483.0,75.56704
162,Seychelles,SYC,2021-04-26,66583.0,98340.0,67.706935
