import libraries needed in this notebook

In [None]:
import numpy as np
import pandas as pd
import math
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
sns.set_theme(style='darkgrid')
import datetime
from collections import defaultdict

# for geospatial analysis
import geopandas as gpd
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


dataset = pd.read_csv('../input/covid-world-vaccination-progress/country_vaccinations.csv')

# dataset overview
dataset.head()

General overview of the dataset

In [None]:
dataset.info()

Summary of the dataset with attributes like sum, mean, count, etc

In [None]:
dataset.describe()

Data from the 'vaccines' column is collected and unique entries are stored in a variable

In [None]:

vaccine_groups = dataset['vaccines']
vaccine_combinations = vaccine_groups.unique()
vaccine_sep = []
for item in vaccine_combinations:
    if(item.find(',')==False):
        vaccine_sep.append(item.replace(' ', ''))
    else:
        sep = item.replace(' ', '').split(',')
        vaccine_sep.extend(sep)

vaccines_used = pd.Series(vaccine_sep).unique()

The list of vaccines being used by countries is now found

In [None]:

vaccines_used = pd.Series(vaccines_used)
vaccines_used

 The different vaccines being used by countries are analyzed

In [None]:
vaccines_used_by_countries = dataset.groupby('country').last()[['vaccines','iso_code']]
vaccines_used_by_countries.reset_index().set_index('country')
vaccines_used_by_countries.head()

The most common sets of vaccines being used by countries are found

In [None]:
vaccines_used_by_countries.vaccines.value_counts().head()

Merge the vaccinations dataset with the geopandas one to be create choropleth maps

Missing data in this merged dataset is handled by replacing NaN with 'Data N/A'

In [None]:
chart_data = world.merge(vaccines_used_by_countries,  how='outer', left_on='iso_a3', right_on='iso_code')
chart_data.fillna({'vaccines': 'Data N/A'}, inplace=True)
chart_data.info()

The two most clinically effective vaccines against COVID-19 are the m-RNA, Pfizer/BioNTech and Moderna.
 
This is based on their published efficacy rates of around 90-95%

This map shows the primary vaccine being used by countries to innoculate their population.

In [None]:

def checkVaccineType(x):
    if(x.find('Pfizer/BioNTech')!=-1 or x.find('Moderna')!=-1):
        return 'Pfizer/BioNTech or Moderna'
    elif(x.find('Oxford/AstraZeneca')!=-1):
        return 'Oxford/AstraZeneca'
    elif(x.find('Sputnik V')!=-1):
        return 'Sputnik V'
    elif(x.find('Sinopharm')!=-1):
        return 'Sinopharm'
    elif(x == 'Data N/A'):
        return 'Data N/A'
    else:
        return 'Other'

top_5_vaccine_types = vaccines_used_by_countries.vaccines.value_counts().head().index

chart_data['Pfizer/Moderna'] = chart_data.apply(lambda x: checkVaccineType(x['vaccines']), axis = 1,)

ax = chart_data.plot(column='Pfizer/Moderna',legend=True, cmap='Paired', alpha=0.5, figsize=(15,9))
ax.set_title('Primary COVID-19 Vaccine being used by countries', fontdict= 
            {'fontsize':18})
ax.set_axis_off()

We can infer from this map that the availabilty of these vaccines(Pfizer and Moderna) is limited to first world countries

1. Most Asian and South American countries are using Oxford/AZ

2. Russia is using its homegrown Sputnik-V vaccine while China is using Sinopharm.

3. India is using Oxford/Astrazeneca and its homegrown Covaxin

4. The US is primarily using Pfizer/BioNTech and Moderna. Doses of Johnson&Johnson are also being distributed

In [None]:
vaccines_used_by_countries.loc['India']['vaccines']

In [None]:
vaccines_used_by_countries.loc['United States']['vaccines']

Data of vaccine doses administered by country is now found from the dataset

In [None]:
vaccine_doses_administered = dataset.groupby('country').last()['total_vaccinations']
vaccine_doses_administered.head()

The US has administered more than 100M vaccine doses as of March 11, 2021.

In [None]:
vaccine_doses_administered['United States']

India has administered around 28M vaccine doses as of March 11, 2021

In [None]:
vaccine_doses_administered['India']

The top 10 countries by vaccine doses administered are now found from the dataset

In [None]:
top_countries_doses = vaccine_doses_administered.sort_values(ascending=False)
top_countries_doses.head()

Remove 'England', 'Wales' and 'Scotland' from the index column as they are redundant when their data is a part of 'United Kingdom'

We now find the top 10 countries by most doses administered

In [None]:
top_countries_doses_new = top_countries_doses.drop(labels=['England', 'Wales', 'Scotland'])
top_countries_doses_new.head()

The top 10 countries by population fully vaccinated are:

In [None]:
top_10_countries_fully_vaccinated = dataset.groupby('country').last()['people_fully_vaccinated']
top_10_countries_fully_vaccinated = top_10_countries_fully_vaccinated.sort_values(ascending=False)
top_10_countries_fully_vaccinated.head(10)

This bar chart shows the top 10 countries by population fully vaccinated

In [None]:
px.bar(top_10_countries_fully_vaccinated.head(10), y='people_fully_vaccinated', title='Top 10 countries by population fully vaccinated')

The top ten countries by population vaccinated per hundred people is found from the 'people_vaccinated_per_hundred' column. It makes more sense to arrange the top 10 countries by fully vaccinated population found in the previous cell, by vaccinations per hundred people.

In [None]:
vaccinations_per_100_people = dataset.groupby('country').last()[['people_vaccinated_per_hundred', 'total_vaccinations', 'iso_code']].drop(labels=['England', 'Wales', 'Scotland'])

top_10_total_vaccinations = vaccinations_per_100_people.sort_values(ascending=False,by='total_vaccinations').dropna().head(10)

top_10_per_hundred_vaccinations = top_10_total_vaccinations.sort_values(ascending=False, by='people_vaccinated_per_hundred')

top_10_per_hundred_vaccinations['people_vaccinated_per_hundred']

A bar chart showing data of these countries

In [None]:
chart_data_per_hundred_vaccinations = top_10_per_hundred_vaccinations['people_vaccinated_per_hundred']

px.bar(chart_data_per_hundred_vaccinations, y='people_vaccinated_per_hundred', title='Population fully vaccinated per 100 people')

Map showing population fully vaccinated per hundred people

In [None]:
map_data_per_hundred_vaccinations = world.merge(vaccinations_per_100_people, how='outer', left_on='iso_a3', right_on='iso_code')

ax = map_data_per_hundred_vaccinations.plot(column='people_vaccinated_per_hundred',legend=True, cmap='YlGn', figsize=(15,9), scheme='quantiles', alpha=0.7, missing_kwds={'color': 'lightgrey', 'label':'Data N/A'})

ax.set_title('Population fully vaccinated per hundred people', fontdict= 
            {'fontsize':18})
ax.set_axis_off()

The time series data of daily vaccinations in the US and India is found from the 'daily_vaccinations' column. Line plots for these countries are shown below

In [None]:
usa_data = dataset[dataset['country'] == 'United States'][['daily_vaccinations', 'date']].reset_index().set_index('date').dropna().drop(columns = ['index'])


usa_data['Date'] = pd.to_datetime(usa_data.index, utc=True)
usa_data.set_index('Date', inplace=True)

px.line(usa_data, y='daily_vaccinations', title='Trend of vaccinations in the US')

Around 2.3M people were vaccinated in the US on Thursday, March 12. We can notice a steady positive trend in daily vaccinations

India vaccinated about 1.2M people on the same day. 

In [None]:
 
india_data = dataset[dataset['country'] == 'India'][['daily_vaccinations', 'date']].reset_index().set_index('date').dropna().drop(columns = ['index'])

india_data['Date'] = pd.to_datetime(india_data.index, utc=True)
india_data.set_index('Date', inplace=True)

px.line(india_data, y='daily_vaccinations', title='Trend of vaccinations in India')