In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
vaccine=pd.read_csv('/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv')
vaccine.head()

In [None]:
#checking for missing values
vaccine.isnull().sum()

**(total_vaccinations,total_vaccinations_per_hundered)**,**(people_vaccinated,people_vaccinated_per_hundered)**,**(people_fully_vaccinated,people_fully_vaccinated_per_hundred)** and **(daily_vaccinations,daily_vaccinations_per_million) have equal missing values.**

**Let's analyze those missing features one by one.**

In [None]:
vaccine[vaccine['total_vaccinations'].isnull()].head()

In [None]:
#dropping irrelevant values from total_vaccinations column
vaccine=vaccine.drop(vaccine[vaccine.total_vaccinations.isna()].index)

In [None]:
vaccine.isnull().sum()

In [None]:
#making new column for identifying relation between missing values
vaccine_check = vaccine.drop(vaccine[vaccine.people_vaccinated.isna()].index)

In [None]:
vaccine_check.head()


As can bee seen from our data, the values of **total_vaccinations column are mostly the same as people_vaccinated column**.
as well as **values of total_vaccinations_per_hundred and people_vaccinated_per_hundred are also very similar**.

In [None]:
#checking for correlation
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10,10))
sns.heatmap(vaccine_check.corr(),annot=True)

By above figure we can say that many of the features are either have **positive or perfect** correlation.

Let's check the hypothesis that the **values of highly correlated features have same distribution** by using **two sided Kolmogorov-Smirnov test**.

In [None]:
#function to check hypothesis between two highly correlated features
def ktests(col1,col2):
    from scipy import stats
    ttest,pval = stats.ks_2samp(col1,col2)#Kolmogorov-Smirnov test syntax
    print(pval)
    if pval<0.05:
        return "Reject null hypothesis"
    else:
        return "Accept null hypothesis"

In [None]:
ktests(vaccine_check.total_vaccinations,vaccine_check.people_vaccinated)

In [None]:
#replacing mean values by differences of mean between the two columns
diff = abs(vaccine_check.total_vaccinations.mean() - vaccine_check.people_vaccinated.mean())
vaccine['people_vaccinated'] = vaccine.people_vaccinated.fillna(abs(vaccine.total_vaccinations - diff))

In [None]:
ktests(vaccine_check['total_vaccinations_per_hundred'],vaccine_check['people_vaccinated_per_hundred'])

In [None]:
#filling the missing values with 0
vaccine['people_vaccinated_per_hundred']=vaccine['people_vaccinated_per_hundred'].fillna(0)

In [None]:
ktests(vaccine_check['people_vaccinated'],vaccine_check['daily_vaccinations'])

In [None]:
#filling the missing values with 0
vaccine['daily_vaccinations']=vaccine['daily_vaccinations'].fillna(0)

In [None]:
ktests(vaccine_check['people_vaccinated_per_hundred'],vaccine_check['daily_vaccinations_per_million'])

In [None]:
#filling the missing values with 0
vaccine['daily_vaccinations_per_million']=vaccine['daily_vaccinations_per_million'].fillna(0)

In [None]:
vaccine.isnull().sum()

In [None]:
ktests(vaccine_check['people_fully_vaccinated'],vaccine_check['total_vaccinations'])

In [None]:
vaccine['people_fully_vaccinated']=vaccine['people_fully_vaccinated'].fillna(0)

In [None]:
ktests(vaccine_check['people_fully_vaccinated_per_hundred'],vaccine_check['total_vaccinations_per_hundred'])

In [None]:
vaccine['people_fully_vaccinated_per_hundred']=vaccine['people_fully_vaccinated_per_hundred'].fillna(0)

In [None]:
vaccine.isnull().sum()

In [None]:
ktests(vaccine_check['daily_vaccinations_raw'],vaccine_check['daily_vaccinations'])

In [None]:
vaccine['daily_vaccinations_raw'] = vaccine['daily_vaccinations_raw'].fillna(0)

In [None]:
vaccine.isnull().sum()

In [None]:
missing_countries=vaccine[vaccine['iso_code'].isnull()].country.unique()
missing_countries

All these countries except **"Northern Cyprus"** come under **"United Kingdom"** which has iso_code of GBR.

In [None]:
vaccine[vaccine['country']=='Northern Cyprus']=vaccine[vaccine['country']=='Northern Cyprus'].fillna('CPY')

In [None]:
vaccine[vaccine['country']=='Northern Cyprus']

In [None]:
#filling the countries missing iso_codes with GBR
for i in missing_countries:
    if i!='Northern Cyprus':
        vaccine[vaccine.country==i]=vaccine[vaccine.country==i].fillna('GBR')

In [None]:
vaccine.isnull().sum()

**Therefore all missing values have been removed.**

In [None]:
#renaming incorrect formatted columns
vaccine.country=vaccine.country.replace({
    'Czechia':'Czech Republic',
    'Northern Cyprus':'Cyprus',
    'England':'United Kingdom',
    'Wales':'United Kingdom',
    'Northern Ireland':'United Kingdom',
    'Ireland':'United Kingdom',
    'Scotland':'United Kingdom'
})

In [None]:
vaccine.country.unique()

In [None]:
#dropping irrelevant columns
vaccine.drop(columns=['source_name','source_website'],axis=0,inplace=True)

In [None]:
vaccine.head()

In [None]:
#importing necessary libraries for visualization
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pf
pf.init_notebook_mode()

In [None]:
vaccine_counts=vaccine.groupby('vaccines')['total_vaccinations'].max().sort_values(ascending=False)
fig=px.bar(vaccine_counts,y=vaccine_counts.index,x='total_vaccinations',color=vaccine_counts.index,
       labels={'vaccines':'Vaccines','total_vaccinations':'Total Vaccinations'},
       title="Most Popular Vaccines Around The World",
       color_discrete_sequence=px.colors.qualitative.Vivid)
fig.show()

It can be seen that ***Pfizer/Biotech*** is the most popular vaccine used overall around the world followed by **Sinovac** and **Oxford/AstraZeneca**.

In [None]:
fig=go.Figure()

title = "Total Vaccinations on daily basis by vaccines"

for vacc in vaccine['vaccines'].unique():
    data_vaccine = vaccine[vaccine['vaccines'] == vacc]
    fig.add_trace(go.Scatter(x=data_vaccine['date'], 
                             y=data_vaccine['total_vaccinations'], 
                             name=vacc,
                             mode="markers",
                             hovertemplate="Date: %{x}<br>Value: %{y}"))

fig.update_layout(title={"text": title})
fig.show()

# ***WHICH COUNTRY USES WHICH VACCINE?***

In [None]:
vaccine_country=vaccine.groupby('country')[['vaccines','iso_code']].max()
fig=px.choropleth(vaccine_country,locationmode='ISO-3',locations='iso_code',color='vaccines',
                  hover_name=vaccine_country.index,title='Distribution of Vaccines around the World')
fig.update_layout(title_x=0.5,
    legend_orientation = 'h')
fig.show()

* *We can see the large violet area of Sputnik V vaccine, but it is used only in Russia and in Argentina.*

* *The most European Middle Eastern and North American countries use Pfizer/BioNTech.*

* *CNBG, Sinovac is second most used vaccination scheme, but it is used only in China.*

* *The Oxford/AstraZeneca vaccine is distributed in United Kingdom, Brazil, Myanmar and India.*

* *Covaxin is also used in India.*

In [None]:
vaccination_rate=vaccine.groupby('date')['people_vaccinated'].sum().reset_index('date')
vaccination_rate=pd.DataFrame(vaccination_rate)
px.line(vaccination_rate,x='date',y='people_vaccinated',
        labels={'date':'Date','people_vaccinated':"People Vaccinated"},
               title="Overall Vaccination Trend Till Date")

* *The vaccination rate has increased gradually till end of December as only countries who developed the vaccine started the process of vaccination(atleast 1 doze).*
* *Then as the availability of vaccines increased in other countries either due to trade of vaccines or further developments,high vaccination rates are achieved during January and February till date.*

In [None]:
#function for choropleth analysis
def choropleth(col1,name,title,color):
    v=vaccine[['date','country','iso_code',col1]]
    v_c=v.groupby(['date','iso_code','country']).sum().reset_index()
    fig=px.choropleth(v_c,locations='iso_code',locationmode='ISO-3',color=col1,hover_name='country',animation_frame='date',
                    labels={col1:name,'iso_code':'Iso_code','date':'Date'},color_continuous_scale=color,
                      title=title)
    fig.show()

In [None]:
vaccinated_country=vaccine.groupby('country')['total_vaccinations'].max().sort_values(ascending=False)[:10]
fig=px.bar(vaccinated_country,x='total_vaccinations',y=vaccinated_country.index,
          labels={'country':'Country','total_vaccinations':'Total Vaccinations'},
           color=vaccinated_country.index,title='Top Ten Countries by Total Vaccinations Conducted',
           color_discrete_sequence=px.colors.qualitative.D3,orientation='h')
fig.show()

***USA and China*** *has conducted maximum number of vaccinations amongst all the top ten countries.*

In [None]:
choropleth('total_vaccinations','Total Vaccinations','Total Vaccinations in each country',
           px.colors.sequential.Purpor)

In [None]:
vaccinated_country_hundred=vaccine.groupby('country')['total_vaccinations_per_hundred'].max().sort_values(ascending=False)[:10]
fig=px.bar(vaccinated_country_hundred,x='total_vaccinations_per_hundred',y=vaccinated_country_hundred.index,
          labels={'country':'Country','total_vaccinations_per_hundred':'Percent(%)'},
           color=vaccinated_country_hundred.index,title='Top Ten Countries by Percentage of Total Vaccinations Conducted',
           color_discrete_sequence=px.colors.qualitative.Dark2,orientation='h')
fig.show()

***Israel(62.87% of its population) and Gibraltar(54.24% of its population)*** *has the highest percent of total vaccinations conducted around the world.*

In [None]:
choropleth('total_vaccinations_per_hundred','Total Vaccinations Per Hundred',
           'Total Vaccinations per hundred timeline',px.colors.sequential.Viridis)

In [None]:
people_vaccinated_country=vaccine.groupby('country')['people_vaccinated'].max().sort_values(ascending=False)[:10]
fig=px.bar(people_vaccinated_country,x='people_vaccinated',y=people_vaccinated_country.index,
          labels={'country':'Country','people_vaccinated':'People Vaccinated'},
           color=people_vaccinated_country.index,title='Top Ten Countries by People Vaccinated',
           color_discrete_sequence=px.colors.qualitative.G10,orientation='h')
fig.show()

***USA and China*** *has maximum number of people vaccinated amongst all the top ten countries(atleast 1 doze of vaccine).*

In [None]:
choropleth('people_vaccinated','People Vaccinated','People Vaccinated timeline',
          px.colors.sequential.Sunset)

In [None]:
people_vaccinated_country_percent=vaccine.groupby('country')['people_vaccinated_per_hundred'].max().sort_values(ascending=False)[:10]
fig=px.bar(people_vaccinated_country_percent,x='people_vaccinated_per_hundred',y=people_vaccinated_country_percent.index,
          labels={'country':'Country','people_vaccinated_per_hundred':'Percentage(%)'},
           color=people_vaccinated_country_percent.index,title='Top Ten Countries by Percentage of People Vaccinated',
           color_discrete_sequence=px.colors.qualitative.Bold,orientation='h')
fig.show()

***Gibraltar(39.77%) and UAE(39.95%)*** *has highest percent of population vaccinated amongst all top ten countries.*

In [None]:
choropleth('people_vaccinated_per_hundred','People Vaccinated Per Hundred','People Vaccinated per hundred timeline',
          px.colors.sequential.Mint)

In [None]:
fully_vaccinated_country=vaccine.groupby('country')['people_fully_vaccinated'].max().sort_values(ascending=False)[:10]
px.bar(fully_vaccinated_country,y=fully_vaccinated_country.index,x='people_fully_vaccinated',
        labels={'country':'Country','people_fully_vaccinated':'People Fully Vaccinated'},
       color=fully_vaccinated_country.index,title='Top Ten Countries By Number of Fully Vaccinated People',
      color_discrete_sequence=px.colors.qualitative.Set1,orientation='h')

***USA and Israel*** *has maximum number of people who are fully immunized amongst all the top ten countries.*

In [None]:
choropleth('people_fully_vaccinated','People Fully Vaccinated','People Fully Vaccinated timeline',
          px.colors.sequential.Cividis)

In [None]:
fully_vaccinated_country_percent=vaccine.groupby('country')['people_fully_vaccinated_per_hundred'].max().sort_values(ascending=False)[:10]
fig=px.bar(fully_vaccinated_country_percent,y=fully_vaccinated_country_percent.index,x='people_fully_vaccinated_per_hundred',
        labels={'country':'Country','people_fully_vaccinated_per_hundred':'Percentage(%)'},
       color=fully_vaccinated_country_percent.index,title='Top Ten Countries By Percentage of Fully Vaccinated People',
      color_discrete_sequence=px.colors.qualitative.Prism,orientation='h')
fig.show()

***Israel(23.28%) and Gibraltar(14.47%)*** *has highest percent of population who are fully immunized amongst all top ten countries.*

In [None]:
choropleth('people_fully_vaccinated_per_hundred','People Fully Vaccinated Per Hundred',
           'People Fully Vaccinated Per Hundred Timeline',
          px.colors.sequential.RdBu)

In [None]:
vaccinated_country=vaccine.groupby('country')['daily_vaccinations'].max().sort_values(ascending=False)[:10]
fig=px.bar(vaccinated_country,x='daily_vaccinations',y=vaccinated_country.index,
          labels={'country':'Country','daily_vaccinations':'Daily Vaccinations'},
           color=vaccinated_country.index,title='Top Ten Countries by Daily Vaccinations Conducted',
           color_discrete_sequence=px.colors.qualitative.Pastel,orientation='h')
fig.show()

***USA and China*** *has maximum number of people immunized daily amongst top ten countries.*

In [None]:
choropleth('daily_vaccinations','Daily Vaccination Progress','Daily Vaccination Progress Timeline',px.colors.sequential.amp)

In [None]:
vaccinated_country=vaccine.groupby('country')['daily_vaccinations_per_million'].max().sort_values(ascending=False)[:10]
fig=px.bar(vaccinated_country,x='daily_vaccinations_per_million',y=vaccinated_country.index,
          labels={'country':'Country','daily_vaccinations_per_million':'People Vaccinated Per Million'},
           color=vaccinated_country.index,title='Top Ten Countries by Daily Vaccinations per Million',
           color_discrete_sequence=px.colors.qualitative.Safe,orientation='h')
fig.show()

***Gibraltar and Seychelles*** *have maximum number of people vaccinated per million of its population amongst the top ten countries.*

In [None]:
choropleth('daily_vaccinations_per_million','Daily Vaccination Per Million',
           'Daily Vaccination per million timeline',px.colors.sequential.Oranges)

*By above analysis,we can say that* ***China and USA*** *has more individuals who have undergone either full or complete vaccination and* *therefore it can be seen that their scale of vaccination process is huge but due to their large population the rate of fully vaccinized people(having 2 shots of vaccine) is less than gulf countries namely* ***Israel and UAE*** *as well as other countries* namely ***Seychelles and Gibraltar.***

*Next we will evaluate which country outperforms all the 6 countries.*

# *WHICH COUNTRY HAS MORE ADVANCED VACCINATION PROGRAMME?*

In [None]:
#finding percent of population fully vaccinated amongst total vaccinated people
vaccine['people_fully_vaccinated_percent']=vaccine['people_fully_vaccinated']/vaccine['people_vaccinated'] * 100.0

In [None]:
vaccine_USA=vaccine[vaccine.country=='United States']
vaccine_gibraltar=vaccine[vaccine.country=='Gibraltar']
vaccine_china=vaccine[vaccine.country=='China']
vaccine_seychelles=vaccine[vaccine.country=='Seychelles']
vaccine_Israel=vaccine[vaccine.country=='Israel']
vaccine_UAE=vaccine[vaccine.country=='United Arab Emirates']

def vaccine_daily(column,ytitle,title):
    fig=go.Figure([
        go.Scatter(name='USA',x=vaccine_USA['date'],y=vaccine_USA[column],mode='markers + lines'),
        go.Scatter(name='Gibraltar',x=vaccine_gibraltar['date'],y=vaccine_gibraltar[column],mode='markers + lines'),
        go.Scatter(name='seychelles',x=vaccine_seychelles['date'],y=vaccine_seychelles[column],mode='markers + lines'),
        go.Scatter(name='China',x=vaccine_china['date'],y=vaccine_china[column],mode='markers + lines'),
        go.Scatter(name='Israel',x=vaccine_Israel['date'],y=vaccine_Israel[column],mode='markers + lines'),
        go.Scatter(name='UAE',x=vaccine_UAE['date'],y=vaccine_UAE[column],mode='markers + lines')
    ])
    fig.update_layout(
        yaxis_title=ytitle,
        title=title,
        hovermode="x"
    )
    fig.show()

In [None]:
vaccine_daily('people_fully_vaccinated_per_hundred','People Fully Vaccinated per hundred',
              'Trend of fully vaccinated people per hundred till date')

In [None]:
vaccine_daily('daily_vaccinations_per_million','Daily Vaccinations per million(in ppm)',
              'Trend of population vaccinated daily per million till date')

In [None]:
vaccine_daily('people_fully_vaccinated_percent','Fraction of population fully vaccinated(in Percent%)',
              'Trend of population Fully Vaccinated')

In [None]:
choropleth('people_fully_vaccinated_percent','Percentage of people fully vaccinated',
           'Percentage of people fully vaccinated timeline',px.colors.sequential.speed)

*Gulf Countries totally dominate the biggest of superpowers of the world in terms of execution of vaccination drives.*

*Among the gulf countries,**Israel** has conducted more advanced vaccination programme with* ***nearly 58.8% people fully vaccinated out of total people vaccinated.***

IF YOU LIKE IT DO UPVOTE IT.

DO SHARE YOUR VALUBALE FEEDBACK.

GOOD LUCK.