# COVID-19 WORLD VACCINATION PROGRESS-EDA


![](https://www.emeraldgrouppublishing.com/sites/default/files/image/covid-cells.jpg)


# Coronaviruses are a group of related RNA viruses that cause diseases in mammals and birds. In humans and birds, they cause respiratory tract infections

# IMPORTING THE NECESSARY LIBRARIES

In [None]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import os
import re
import time
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [None]:
df= pd.read_csv("../input/covid-world-vaccination-progress/country_vaccinations.csv")

# INFORMATION ABOUT DATASET

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df.isnull().sum()

In [None]:
#converting date column datatype to date
df["date"]= pd.to_datetime(df.date)

In [None]:
df["Total_vaccinations(count)"]= df.groupby("country").total_vaccinations.tail(1)

In [None]:
#Top countries with most vaccinations
df.groupby("country")["Total_vaccinations(count)"].mean().sort_values(ascending= False).head(20)

# VISUALIZATION

In [None]:
#barplot visualization of top countries with most vaccinations
x= df.groupby("country")["Total_vaccinations(count)"].mean().sort_values(ascending= False).head(20)
sns.set_style("darkgrid")
plt.figure(figsize= (10,10))
ax= sns.barplot(x.values,x.index)
ax.set_xlabel("Total vaccinations(count)")
ax.set_ylabel("Country")
plt.show()

In [None]:
#Top countries with fully  vaccinated peoples
df["Full_vaccinations(count)"]= df.groupby("country").people_fully_vaccinated.tail(1)

df.groupby("country")["Full_vaccinations(count)"].mean().sort_values(ascending= False).head(20)

In [None]:
#barplot visualization of top countries with most full vaccinations

plt.style.use("ggplot")
plt.figure(figsize= (10,10))
ax= sns.barplot(x.values,x.index)
ax.set_xlabel("Fully vaccinated(count)")
ax.set_ylabel("Country")
plt.show()

In [None]:
#Vaccine types
x=df.vaccines.unique()
y= list(x)
for i in y: print(i)

In [None]:
#most common vaccines
df.vaccines.value_counts()

In [None]:
plt.figure(figsize=(16,16))
sns.countplot(y= "vaccines",data= df)
plt.ylabel("Vaccines")
plt.xlabel("Count")
plt.show()

In [None]:
#daily vaccinations
x= df.groupby("date").daily_vaccinations.sum()
plt.style.use("ggplot")
plt.figure(figsize= (15,5))
sns.lineplot(x.index,x.values)
plt.xlabel("Date")
plt.show()

In [None]:
#total vaccinations
plt.figure(figsize= (15,5))
sns.lineplot(x= "date",y= "total_vaccinations",data= df)
plt.xlabel("Date")
plt.ylabel("Total Vaccinations")
plt.show()

In [None]:
#Countries with best daily average vaccinations
x= df.groupby("country").daily_vaccinations.mean().sort_values(ascending= False).head(20)
x

In [None]:
#daily vaccinations barplot
plt.figure(figsize= (6,6))
ax= sns.barplot(x.values,x.index)
ax.set_title("Daily Vaccinations Count")
ax.set_xlabel("Daily Vaccinations(avg)")
ax.set_ylabel("Country")
plt.show()

In [None]:
#vaccination per hundred top countries
df["Total_vaccinations_per_hundred"]= df.groupby("country").total_vaccinations_per_hundred.tail(1)

In [None]:
x= df.groupby("country")["Total_vaccinations_per_hundred"].mean().sort_values(ascending= False).head(10)
plt.figure(figsize= (5,5))
ax= sns.barplot(x.values,x.index)
ax.set_ylabel("Country")
ax.set_xlabel("Vaccinations per hundred")
plt.show()

In [None]:
#daily vaccinations per million top countries
df.groupby("country")["daily_vaccinations_per_million"].mean().sort_values(ascending= False).head(20)

In [None]:
#daily vaccination per million 
plt.figure(figsize= (15,5))
plt.style.use("ggplot")
sns.lineplot(x= "date",y= "daily_vaccinations_per_million",data= df)
plt.title("Daily Vaccinations")
plt.ylabel("Daily Vaccinations per million")
plt.xlabel("Date")
plt.show()

In [None]:
#people fully vaccinated
plt.figure(figsize= (15,5))
sns.lineplot(x= "date",y= "people_fully_vaccinated",data= df)
plt.title("Fully Vaccinated")
plt.xlabel("Date")
plt.ylabel("People Fully Vaccinated")
plt.show()

In [None]:
#daily vaccinations in India
plt.figure(figsize= (15,5))
plt.xlabel("Date")
plt.ylabel("Daily Vaccinations")
plt.title("Vaccinations in India")
sns.lineplot(x= "date",y= "daily_vaccinations",data= df[df.country== "India"])
plt.show()

In [None]:
#Total vaccinations in India
plt.figure(figsize= (15,5))
plt.xlabel("Date")
plt.ylabel("Total Vaccinations")
plt.title("Total vaccinations in India")
sns.lineplot(x= "date",y= "total_vaccinations",data= df[df["country"]=="India"])
plt.show()

In [None]:
#people_vaccinated per hundred in India
plt.figure(figsize= (15,5))
sns.lineplot(x= "date",y= "people_vaccinated_per_hundred",data= df[df["country"]=="India"])
plt.show()

In [None]:
#preferred vaccine in India
x= df[df["country"]=="India"]
z= x.vaccines.value_counts()
c= list(z.index)
c

In [None]:
df.groupby("country")["Total_vaccinations(count)"].mean().sort_values(ascending= False).head(10)

In [None]:
#creating dataframe for top 5 vaccinated countries
x= df.loc[(df.country== "United States") | (df.country== "China")| (df.country== "India")| (df.country== "Unted Kingdom")|(df.country== "England")]

In [None]:
#total vaccination comparison
plt.figure(figsize= (15,5))
sns.lineplot(x= "date",y= "total_vaccinations" ,data= x,hue= "country")
plt.xlabel("Date")
plt.title("Total vaccination comparison")
plt.show()

In [None]:
#daily vaccination comparison
plt.figure(figsize= (15,5))
sns.lineplot(x= "date",y= "daily_vaccinations" ,data= x,hue= "country")
plt.show()

In [None]:
#full vaccinations comparison
plt.figure(figsize= (15,5))
sns.lineplot(x= "date",y= "people_fully_vaccinated" ,data= x,hue= "country")
plt.show()

In [None]:
#daily vaccination per million comparison
plt.figure(figsize= (15,5))
sns.lineplot(x= "date",y= "daily_vaccinations_per_million" ,data= x,hue= "country")
plt.show()

# IF YOU HAVE ANY QUESTIONS RELATED TO THIS THEN COMMENT DOWN 

# STAY SAFE 
# THANK YOUUU......