# COVID 19 Vaccination Progress - Exploratory Data Analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import plotly.express as px
import seaborn as sns

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

Let's take a look into the dataset, we start by importing the dataset and displaying the contents within it.

In [None]:
covid_df = pd.read_csv('/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv') 

In [None]:
covid_df

Now we remove the null values to work with the dataset.

In [None]:
covid_df.dropna()

The dataset can be better understood when we look into the information present in the dataset.

In [None]:
covid_df.info()

We will now see the different vaccinations used in different countries. 

In [None]:
covid_df['country_vaccination'] =  'Vaccination used in ' +  covid_df['country'] + ' is - ' + covid_df['vaccines'] 

for country in covid_df['country_vaccination'].unique()[0:20]:
    print(country)

## Daily Vaccination


We will explore the daily vaccinations in different countries using data visualization libraries such as Matplotlib, Seaborn and Plotly. 

> Note the key differences in the graphs in terms of interactivity and visualization. 

In [None]:
#Matplotlib
cov_df_vac = covid_df.groupby('country').max().sort_values('daily_vaccinations', ascending = False)[0:20]

plt.figure(figsize=(20,10))

plt.bar(cov_df_vac.index, cov_df_vac.daily_vaccinations, label = 'Daily vaccinations',color = '#ffc64a', edgecolor='#3f4033')

plt.xticks(rotation = 45)
plt.xlabel('Countries')
plt.ylabel('Daily vaccinations distributed')
plt.title('Countries vs Daily vaccinations')
plt.legend(prop={'size': 10})
plt.show()

In [None]:
#Seaborn - Daily vaccinations
cov_vac = covid_df.groupby('country').max().sort_values('daily_vaccinations', ascending = False)[0:20]

plt.figure(figsize=(30,20))
sns.catplot(cov_vac.index, y = "daily_vaccinations", data= cov_vac, kind='bar', palette='cool',ci=None, legend_out=True,aspect =2)
plt.xticks(rotation = 75)
plt.xlabel('Countries')
plt.ylabel('Daily Vaccinations')
plt.title('Countries with the highest daily vaccination rates')
plt.legend(prop={'size': 10})
plt.show()

In [None]:
#Plotly - Daily Vaccinations

cov_vac = covid_df.groupby('country').max().sort_values('daily_vaccinations', ascending = False)[0:20]

fig = px.bar(cov_vac, 
             y = 'daily_vaccinations',
             x = cov_vac.index,
             color = 'daily_vaccinations'
            )

fig.update_layout(
    title={
            'text' : "Countries vs Daily Vaccinations",
            'y':0.95,
            'x':0.5
        },
    xaxis_title="Countries",
    yaxis_title="Daily Vaccinations"
)

fig.show()

In [None]:
#Global mapping
daily_data = covid_df.groupby('country').max().sort_values('daily_vaccinations', ascending=False) 

fig_a = px.choropleth(locations= daily_data.index, 
                    locationmode='country names',
                    data_frame= daily_data,
                    color='daily_vaccinations',
                    title='Daily vaccinations - Worldwide',
                    labels={'daily_vaccinations':'Daily Vaccinations'},
                    color_continuous_scale='peach', projection = 'natural earth' 
                   )
fig_a.show()

## Analysis of people vaccinated and people fully vaccinated worldwide

Observation: As per the statistics,total number of people fully vaccinated - this is the number of people that received the entire set of immunization according to the immunization scheme (typically 2). 

Total number of people vaccinated - this is the number that certain number of people that received one vaccine and another number (smaller) of people that received all vaccines in the scheme.

> Using the timeseries graph, we can observe the comparison between people fully vaccinated and people vaccinated from 22 Jan 2021 to 01 Mar 2021. 

In [None]:
covid_df = pd.read_csv('/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv', parse_dates=['date'],index_col= ['date']) 
covid_df.index = pd.to_datetime(covid_df.index)
df = covid_df.sort_index()

df_sub3 = df['2021-01-22':'2021-03-03']

df_sub3.dropna()

plt.figure(figsize=(10,10))
df_sub3[['people_vaccinated', 'people_fully_vaccinated']].plot()
plt.xlabel('date')
plt.ylabel('Vaccination count')
plt.title('Vaccination analysis')
plt.legend()
plt.show()


## Fully Vaccinated Count


We will explore the fully vaccinated count in different countries using data visualization libraries such as Matplotlib, Seaborn and Plotly. 

> Note the key differences in the graphs in terms of interactivity and visualization. 

In [None]:
#Matplotlib
cov_vac = covid_df.groupby('country').max().sort_values('people_fully_vaccinated', ascending = False)[0:20]

plt.figure(figsize=(20,10))
plt.bar(cov_vac.index, cov_vac.people_fully_vaccinated, label ='People who have received the entire set of immunization according to the immunization scheme', color = '#c7f5ba', edgecolor='#3f4033')
plt.xticks(rotation = 45)
plt.xlabel('Countries')
plt.ylabel('Fully vaccinated')
plt.title('Countries vs Fully vaccinated people')
plt.legend(prop={'size': 10})
plt.show()

In [None]:
#Seaborn

cov_vac = covid_df.groupby('country').max().sort_values('people_fully_vaccinated', ascending = False)[0:20]

plt.figure(figsize=(30,20))
sns.catplot(cov_vac.index, y = "people_fully_vaccinated", data= cov_vac, kind='bar', palette='Greens_r',ci=None, legend_out=True,aspect =2)
plt.xticks(rotation = 75)
plt.xlabel('Countries')
plt.ylabel('Fully vaccinated')
plt.title('Countries with the highest rate of fully vaccinated people')
plt.legend(prop={'size': 10})
plt.show()

In [None]:
#Plotly

cov_vac = covid_df.groupby('country').max().sort_values('people_fully_vaccinated', ascending = False)[0:20]

fig = px.bar(cov_vac, 
             y = 'people_fully_vaccinated',
             x = cov_vac.index,
             color = 'people_fully_vaccinated'
            )

fig.update_layout(
    title={
            'text' : "Countries vs fully vaccinated people",
            'y':0.95,
            'x':0.5
        },
    xaxis_title="Countries",
    yaxis_title="Fully Vaccinated"
)

fig.show()

In [None]:
#Global mapping
ful_data = covid_df.groupby('country').max().sort_values('people_fully_vaccinated', ascending=False) 

fig_c = px.choropleth(locations= ful_data.index, 
                    locationmode='country names',
                    data_frame= ful_data,
                    color='people_fully_vaccinated',
                    title='Fully Vaccinated : People who have received the entire set of immunization according to the immunization scheme',
                    labels={'people_fully_vaccinated':"People fully vaccinated"},
                    color_continuous_scale='teal', projection = 'natural earth' 
                   )
fig_c.show() 

## Total number of people vaccinated 
This is the number that certain number of people that received one vaccine and another number (smaller) of people that received all vaccines in the scheme

In [None]:
peo_data = covid_df.groupby('country').max().sort_values('people_vaccinated', ascending=False) 

fig_b = px.choropleth(locations= peo_data.index, 
                    locationmode='country names',
                    data_frame= peo_data,
                    color='people_vaccinated',
                    title='People vaccinated : Those who have received the 1 set of immunization according to the immunization scheme',
                    labels={'people_vaccinated':"People vaccinated"},
                    color_continuous_scale='tealrose', projection = 'natural earth' 
                   )
fig_b.show()

## Countries with the highest vaccination rates per hundred of the population 

In [None]:
vaccine_percent = covid_df.groupby('country').max().sort_values('people_fully_vaccinated_per_hundred', ascending=False)
vac_p = vaccine_percent.dropna()[0:10]

vac_p

In [None]:
plt.figure(figsize=(20,10))

plt.bar(vac_p.index, vac_p.people_vaccinated_per_hundred, label ='People vaccinated per hundred',color = '#438599', edgecolor='#3f4033')
plt.bar(vac_p.index, vac_p.people_fully_vaccinated_per_hundred, label = 'People fully vaccinated per hundred', color = '#94dcf2', edgecolor='#3f4033')

plt.xticks(rotation = 45)
plt.xlabel('Countries')
plt.ylabel('Vaccinations per hundred')
plt.title('Countries with the highest number of vaccination per hundred')
plt.legend()
plt.show()

Disclaimer: All content provided in this notebook is for educational purposes only. The information provided in this notebook is true and complete to the best of my knowledge at the time of publishing. Please check the dataset for the latest updated version for use. 
