# Importing Libraries


In [None]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats


# Loading the data

In [None]:
df1 = pd.read_csv('../input/covid-world-vaccination-progress/country_vaccinations.csv')

# Reading the Data

In [None]:
df1.head()

# Finding if there's any missing values in our data set.

In [None]:
Missing_values=df1.isnull().sum()

In [None]:
Missing_values

# Viewing missing values within the data.

In [None]:
df1.isnull()

# Finding out the integer and float features from data.

In [None]:
df1.dtypes

#  Plotting missing values of vaccination.

In [None]:
sns.heatmap(df1.isnull(), cbar=False, cmap='nipy_spectral_r')
plt.title('Missing values vaccination')

# Since the data contains multiple null entries which are not in use for analysing the data so we just take the data which is important 

In [None]:
df_clean=df1[df1['people_fully_vaccinated_per_hundred'].notna()]
df_cleaned=df_clean[df_clean['daily_vaccinations_raw'].notna()]

In [None]:
df_cleaned.isnull().sum()

# Displaying all the names of vaccines used .

In [None]:
display(pd.DataFrame([df1['vaccines'].unique()], index=['Vaccine']).T.sort_values('Vaccine')\
          .style.set_caption('Vaccine names')
          )

# Grouping data by vaccines

In [None]:
vaccines = df1.groupby(['country', 'vaccines']).count().reset_index()[['country', 'vaccines']]\
                  .groupby('vaccines').count()['country'].reset_index().sort_values('country', ascending=False)
vaccines.columns = ['Vaccines', 'Number of countries used']
display(vaccines.style.set_caption('Number of countries used vaccine'))


#  Daily vaccinations by the type of vaccines

In [None]:
daily_total_vaccinations = df1.groupby(['date', 'vaccines'])['total_vaccinations'].sum().reset_index()
daily_total_vaccinations


#  Sorting the information given according to the iso code and countries 

In [None]:
df1["iso_code"].unique()

In [None]:
df1["country"].unique()

In [None]:
df2 = df_cleaned.groupby(["country",'iso_code','vaccines'])['total_vaccinations','people_vaccinated','people_fully_vaccinated',
                                           'daily_vaccinations','total_vaccinations_per_hundred','people_vaccinated_per_hundred',
                                           "people_fully_vaccinated_per_hundred",'daily_vaccinations_per_million'].max().reset_index()
df2.head(10)

# Visualisation of Country VS Total Vaccination

In [None]:
plt.figure(figsize=(20, 6))
df2.groupby('country')['total_vaccinations'].max().sort_values(ascending=False)[:50].plot(kind='bar');
plt.title('Country vs Total Vaccination', fontsize=24, fontweight='bold')
plt.ylabel('Total Vaccinations');

# Visualisation of Country VS People Vaccinated

In [None]:
plt.figure(figsize=(20, 6))
data = df2.groupby('country')['people_vaccinated'].max().sort_values(ascending=False)
plt.bar(data.index, data);
plt.xticks(rotation='90')
plt.yticks()
plt.title('Country vs People Vaccinated', fontsize=24, fontweight='bold')
plt.ylabel('people vaccinated');

# Visualisation for daily vaccination per million in countries 

In [None]:
plt.figure(figsize=(20, 6))
df2.groupby('country')['daily_vaccinations_per_million'].max().sort_values(ascending=False)[:50].plot(kind='bar')
plt.ylabel('daily vaccinations per million')
plt.title('Daily Vaccination in Country per million', fontsize=24, fontweight='bold');

# Visualisation for no. of people fully vaccinated in countries 

In [None]:
plt.figure(figsize=(20, 6))
df2.groupby('country')['people_fully_vaccinated'].max().sort_values(ascending=False)[:50].plot(kind='bar')
plt.ylabel('People fully vaccinated')
plt.title('People fully vaccinated in Country', fontsize=24, fontweight='bold');

In USA, maximum people got fully vaccinated all over the world. 

# Visualisation for Top 10 countries vaccinations (sorted by mean values)

In [None]:
df3 = df_cleaned.loc[:,['country', 'date', 'vaccines', 'daily_vaccinations']].dropna(subset=['daily_vaccinations'])
df3["date"] = pd.to_datetime(df3["date"], format = '%Y-%m-%d')

df3_agg = df3.groupby('country').daily_vaccinations.agg(
    sum_vaccination  = 'sum',
    mean_vaccination = 'mean')


final_df1 = df3.merge(df3_agg, on = 'country')
final_df1

# Average daily vaccinations by country - Top 10

In [None]:
df_cleaned.groupby(['country']).agg({'daily_vaccinations':'mean'}).sort_values(by='daily_vaccinations', ascending=False)[:10].plot.barh(color="blue", figsize=(15,5))
plt.title('Average daily vaccinations by country - Top 10')
plt.legend('')
plt.xlabel('average daily vaccinations')
plt.show()

In [None]:
sort = df3_agg.sort_values(by = 'mean_vaccination', ascending = False).reset_index()
countries = sort.country.to_list()

graphing_mean = final_df1.sort_values(by = 'mean_vaccination', ascending = False)
top10_mean = graphing_mean[graphing_mean['country'].isin(countries[:10])]

fig = plt.figure(figsize = (12,8))
ax = fig.add_subplot()
sns.lineplot( x = 'date', y = 'daily_vaccinations', hue = 'country', 
    data = top10_mean)
plt.legend(ncol = 3, frameon = False, title = '')
plt.xticks(rotation=45) 
plt.title('Top 10 countries vaccinations (sorted by mean values)')

# Visualisation for Top 10 countries vaccinations (sorted by total amount of vaccinations)

In [None]:
sort_sum = df3_agg.sort_values(by = 'sum_vaccination', ascending = False).reset_index()
countries_sum = sort_sum.country.to_list()

graphing_sum = final_df1.sort_values(by = 'sum_vaccination', ascending = False)
top10_sum = graphing_sum[graphing_sum['country'].isin(countries_sum[:10])]
top40_sum = graphing_sum[graphing_sum['country'].isin(countries_sum[:40])]

fig = plt.figure(figsize = (12,8))
ax = fig.add_subplot()
sns.lineplot( x = 'date', y = 'daily_vaccinations', hue = 'country', 
    data = top10_sum)
plt.legend(ncol = 3, frameon = False, title = '')
plt.xticks(rotation=45) 
plt.title('Top 10 countries vaccinations (sorted by total amount of vaccinations)')

# Visualisation of Total vaccinations per country, top 40

In [None]:
plt.figure(figsize=(8,14))
sns.barplot(x=top40_sum['sum_vaccination'], y=top40_sum['country'], palette="RdBu")
plt.xlabel("Sum of Vaccination")
plt.ylabel("Country")
plt.title("Total vaccinations per country, top 40")
plt.show()

# What vaccines are used and in which countries?

In [None]:
vaccines = final_df1.loc[:,['country', 'vaccines']]
vaccines = vaccines.groupby('vaccines')['country'].unique()
vaccines = pd.DataFrame(vaccines).reset_index()
vaccines

# Trend of total vaccination according to countries

In [None]:
def plot_trend(df_cleaned,feature,title,country):
    plt.style.use('ggplot')
    plt.figure(figsize=(20,25))
    
    for i,country in enumerate(country):
        plt.subplot(8,4,i+1)
        data = df_cleaned[df_cleaned['country'] == country]
        sns.lineplot(x=df_cleaned['date'] ,y=df_cleaned[feature],label = feature)
        plt.xlabel('')
        plt.tick_params(axis='x',which='both',top=False,bottom=False,labelbottom=False)
        plt.title(country)
        
    plt.suptitle(title,y=1.05)
    plt.tight_layout()
    plt.show()

In [None]:
country = ['Argentina', 'Austria', 'Belgium', 'Brazil','Canada','China','Czechia', 'Denmark', 'England','Finland', 'France',
       'Germany','India','Ireland', 'Israel', 'Italy', 'Kuwait','Mexico', 'Netherlands','Norway', 'Poland','Russia',
        'Saudi Arabia', 'Scotland','Singapore','Spain', 'Sweden', 'Switzerland', 'Turkey',
        'United Arab Emirates', 'United Kingdom', 'United States']
plot_trend(df_cleaned,'total_vaccinations','Trend of total vaccination',country)

# Sorting out India's Data from the complete dataset

In [None]:
df_in = df1[ df1['country'] == 'India' ]

In [None]:
df_in.head()

In [None]:
df_in.isnull().sum()

# Droping null values from the dataset

In [None]:
df_in = df_in[df_in['people_fully_vaccinated_per_hundred'].notna()]


In [None]:
df_in.isnull().sum()

In [None]:
df_in.head()

In [None]:
df_in[['people_vaccinated_per_hundred', 'people_fully_vaccinated_per_hundred']].plot(ylabel = '% vaccinated', linewidth = 3)
plt.grid()

In [None]:
df_in[['people_vaccinated', 'people_fully_vaccinated']].plot(ylabel = '(in 1000) vaccinated', linewidth = 3)
plt.grid()

# Names of vaccines used in INDIA

In [None]:
pd.DataFrame([df_in['vaccines'].unique()],columns=['Vaccine'],index=['India'])

# Visualisation of 'total Vaccinations' on a particular 'Date'

In [None]:
fig, ax = plt.subplots(1,1, figsize=(40,6))

g1 = sns.lineplot(x=df_in['date'],y=df_in['total_vaccinations'])

fig.text(0.3, 0.9, 'Number of total Vaccinations in India w.r.t Date', 
       fontsize=25, fontweight='bold',color='black')
fig.text(0.265, 0.33, 'India', 
        fontsize=25, fontweight='bold')
ax.yaxis.tick_right()
ax.tick_params(length=1)
plt.xlabel('DATE')
plt.xticks(rotation=45) 
plt.ylabel('Total Vaccinations')

# Visualisation of Max percentage of people vaccinated in India

In [None]:
y = [df_in['people_vaccinated_per_hundred'].max()]
x = ['India']
g=sns.barplot(x,y)
g.text(-0.5, 3.5, 'Max percentage of people vaccinated in India', 
       fontsize=14, fontweight='bold', fontfamily='Arial',color='black')
plt.show()

i.e approx 3%

# Visualisation of 'Max percentage of people fully vaccinated in India

In [None]:
y = [df_in['people_fully_vaccinated_per_hundred'].max()]
x = ['India']
g=sns.barplot(x,y)
g.text(-0.5, 0.7, 'Max percentage of people vaccinated in India', 
       fontsize=14, fontweight='bold', fontfamily='Arial',color='black')
plt.show()

i.e approx 1%

# Visualisation of daily vaccinations per million in India

In [None]:
fig, ax = plt.subplots(1,1, figsize=(40,6))

g1 = sns.lineplot(x=df_in['date'],y=df_in['daily_vaccinations_per_million'])

fig.text(0.3, 0.9, 'Vaccination per million in India', 
       fontsize=25, fontweight='bold',color='black')
fig.text(0.265, 0.33, 'India', 
        fontsize=25, fontweight='bold')
plt.xticks()
ax.yaxis.tick_right()
ax.tick_params(length=1)
plt.xlabel('DATE')
plt.ylabel('Daily Vaccinations per Million')

# Analysis of the number of people being vaccinated daily

In [None]:
plt.bar(df_in['date'][1:], df_in['daily_vaccinations_raw'][1:], label = 'daily vaccinations')
plt.xlabel('Date')
plt.ylabel('(in 1000)daily vaccination numbers')
plt.axhline(y = np.mean(df_in['daily_vaccinations_raw']), color = 'blue', ls = '--', linewidth = 9, label = 'population mean')
plt.legend(loc = 9)
plt.xticks(rotation = 90)
plt.grid()