# This Notebook aims to determine and check the details of vaccination on among countries

<img src= "https://wwwassets.rand.org/content/rand/blog/2020/08/its-going-to-be-the-vaccination-stupid/jcr:content/par/blogpost.aspectcrop.868x455.lt.jpg/x1598055738172.jpg.pagespeed.ic.WPm0htSOxE.jpg" style='width: 900px;'>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objects as go
#plt.rcParams['figure.figsize']=17,8
import cufflinks as cf
import plotly.offline as pyo
from plotly.offline import init_notebook_mode,plot,iplot
import folium 
from folium import plugins
plt.rcParams['figure.figsize'] = 10, 12
import warnings
warnings.filterwarnings('ignore')

In [None]:
pyo.init_notebook_mode(connected=True)
cf.go_offline()

# Import the Dataset

In [None]:
df= pd.read_csv("../input/covid-world-vaccination-progress/country_vaccinations.csv")
df.head()

# Data Description
1. country = The name of the country
2. iso_code = ISO code for the country
3. Date = Date for the data entry
4. Total number of vaccinations = This is the absolute number of total vaccinations in the country
5. Total number of people vaccinated = The number of people vaccinated.
6. Total number of people fully vaccinated = The number of people fully vaccinated(may be 2-3 doses).
7. Daily vaccinations (raw) = For a certain data entry, the number of vaccination for that date/country
8. Daily vaccinations = for a certain data entry, the number of vaccination for that date/country;
9. Total vaccinations per hundred = ratio (in percent) between vaccination number and total population up to the date in the country;
10. Total number of people vaccinated per hundred = ratio (in percent) between population immunized and total population up to the date in the country;
11. Total number of people fully vaccinated per hundred = ratio (in percent) between population fully immunized and total population up to the date in the country;
12. Number of vaccinations per day = number of daily vaccination for that day and country
13. Daily vaccinations per million = ratio (in ppm) between vaccination number and total population for the current date in the country
14. Vaccines used in the country = total number of vaccines used in the country (up to date);
15. Source name = source of the information (national authority, international organization, local organization etc.);
16. Source website = website of the source of information;

In [None]:
# Let's check the information in this dataset
df.info()

In [None]:
# Check the NULL VAlues
df.isnull().sum()

In [None]:
#converting date to date
df['date'] = pd.to_datetime(df['date'])

# 1. Total Vaccinations among Countries? What country has vaccinated more people?

In [None]:
df.head()

In [None]:
df["people_vaccinated"]= df.groupby("country").total_vaccinations.tail(1)
vaccination_country=df.groupby("country")["people_vaccinated"].mean().sort_values(ascending= False).head(10).reset_index()

In [None]:
vaccination_country

In [None]:
# Top 10 Countries with hishest number of total vaccination
top_10=df.groupby("country")["people_vaccinated"].mean().sort_values(ascending= False).head(10).reset_index()
trace = go.Table(
    domain=dict(x=[0, 0.52],
                y=[0, 1.0]),
    header=dict(values=["Country","Total vaccinations"],
                fill = dict(color = '#119DFF'),
                font = dict(color = 'white', size = 14),
                align = ['center'],
               height = 30),
    cells=dict(values=[top_10['country'],top_10['people_vaccinated']],
               fill = dict(color = ['#25FEFD', 'white']),
               align = ['center'],height=20))

trace1 = go.Bar(x=top_10['country'],
                y=top_10['people_vaccinated'],
                xaxis='x1',
                yaxis='y1',
                marker=dict(color='blue'),opacity=0.60)
layout = dict(
    width=1000,
    height=400,
    autosize=False,
    title='Top-10 Countries with highest number of total vaccination',
    showlegend=False,   
    xaxis1=dict(**dict(domain=[0.58, 1], anchor='y1', showticklabels=True)),
    yaxis1=dict(**dict(domain=[0, 1.0], anchor='x1', hoverformat='.2f')),  
)
fig1 = dict(data=[trace, trace1], layout=layout)
iplot(fig1)

# 2. Which country has most number of Fully Vaccinated People?

In [None]:
fully_vaccinated=df.groupby("country")["people_fully_vaccinated"].max().sort_values(ascending= False).head(10).reset_index()

In [None]:
fully_vaccinated

In [None]:
top_10=df.groupby("country")["people_fully_vaccinated"].max().sort_values(ascending= False).head(10).reset_index()
trace = go.Table(
    domain=dict(x=[0, 0.52],
                y=[0, 1.0]),
    header=dict(values=["country","people_fully_vaccinated"],
                fill = dict(color = '#119DFF'),
                font = dict(color = 'white', size = 14),
                align = ['center'],
               height = 30),
    cells=dict(values=[top_10['country'].head(10),top_10['people_fully_vaccinated'].head(10)],
               fill = dict(color = ['lime', 'white']),
               align = ['center'],height=20))

trace1 = go.Bar(x=top_10['country'].head(10),
                y=top_10['people_fully_vaccinated'].head(10),
                xaxis='x1',
                yaxis='y1',
                marker=dict(color='lime'),opacity=0.60)
layout = dict(
    width=1000,
    height=400,
    autosize=False,
    title='Top-10 Countries with number of fully vaccinated people',
    showlegend=False,   
    xaxis1=dict(**dict(domain=[0.58, 1], anchor='y1', showticklabels=True)),
    yaxis1=dict(**dict(domain=[0, 1.0], anchor='x1', hoverformat='.2f')),  
)
fig1 = dict(data=[trace, trace1], layout=layout)
iplot(fig1)

# 3. Ratio between people_vaccinated vs fully vaccinated in different country
Here we have taken some specific countries like China, India, US, UK

In [None]:
people_vaccinated=vaccination_country.query('country in ["United States","India", "China" , "United Kingdom"]')
fully_vaccinated_sp=fully_vaccinated.query('country in ["United States", "India", "China" , "United Kingdom"]')

In [None]:
people_vaccinated

In [None]:
fully_vaccinated_sp

In [None]:
fully_vaccinated_sp['ratio']=fully_vaccinated_sp['people_fully_vaccinated'].values/people_vaccinated['people_vaccinated'].values

In [None]:
fully_vaccinated_sp

In [None]:
px.bar(fully_vaccinated_sp,x='country',y='ratio')

From thr above graph we can say that in US there is a good ratio between total no. of people get vaccinated vs fully vaccinated people, in India there are still many people need to be get fully vaccinated.

# 4. Trace the daily vaccinations dynamic?

In [None]:
plt.figure(figsize= (15,5))
sns.lineplot(x= "date",y= "daily_vaccinations",data= df)
plt.title("Exploring trend in the number of daily vaccinations")
plt.show()

From this above figure we can sat that the vaccination starts increase in the month of february, may be caused by the second peak pf coronavirus

# 5. From which month based on countries vaccination procedure go on rapidly?

In [None]:
countries = df.groupby('country')['total_vaccinations'].max().sort_values(ascending= False)[:5].index

top_countries = pd.DataFrame(columns= df.columns)
for country in countries:
    top_countries = top_countries.append(df.loc[df['country'] == country])
sns.lineplot(top_countries['date'], 
             top_countries['daily_vaccinations_per_million'], 
             hue= top_countries['country'], ci= False)

As we can see fromthe above graph that every country except US, increase thier vaccination procedure, some countries like India has some decrease in graph in the month of may(may be people at that time due to second wave can't able to get the vaccination also shortage of vaccination also can be a factor), but after may in the month of june onwards it's again increased

In [None]:
plt.figure(figsize= (15,5))
# In India
sns.lineplot(x= "date",y= "daily_vaccinations",data= df[df.country== "India"])
plt.title("Daily vaccinations in India")
plt.show()

# 6. What is the country that vaccinated completely most of the population?

In [None]:
population_country=df.groupby('country')['total_vaccinations_per_hundred'].max().sort_values(ascending=False).head(10).reset_index()

In [None]:
population_country

In [None]:
plt.figure(figsize= (8, 5))

ax = sns.barplot(data=population_country, y= 'country', x= 'total_vaccinations_per_hundred')

plt.title('Total Vaccinations / Population', size= 20)
plt.xlabel('Total Vaccinations', size= 15)
plt.ylabel('Country', size= 15)

for patch in ax.patches:
    width = patch.get_width()
    height = patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    plt.text(width + x, height + y, '{:.1f} %'.format(width))

# 7. What country has immunized the largest percent from its population?

In [None]:
population_people_vaccination=df.groupby('country')['people_vaccinated_per_hundred'].max().sort_values(ascending=False).head(10).reset_index()

In [None]:
population_people_vaccination

In [None]:
plt.figure(figsize= (8, 5))

ax = sns.barplot(data=population_people_vaccination, y= 'country', x= 'people_vaccinated_per_hundred')

plt.title('Percentage of the Vaccinated Population', size= 20)
plt.xlabel('People Vaccinated', size= 15)
plt.ylabel('Country', size= 15)

for patch in ax.patches:
    width = patch.get_width()
    height = patch.get_height()
    x = patch.get_x()
    y = patch.get_y()
    
    plt.text(width + x, height + y, '{:.1f} %'.format(width))

# 8. What is the vaccine used in the largest number of countries?

In [None]:
df['vaccines'].value_counts()

In [None]:
from wordcloud import WordCloud, STOPWORDS
plt.figure(figsize= (20,10))
all_words = "".join(df["vaccines"])
wc = WordCloud(stopwords=STOPWORDS, 
                 background_color="black",
               max_words=2000, max_font_size=280,
               random_state=42, width=1000,
               height=800).generate(all_words)

plt.imshow(wc, interpolation="bilinear")
plt.axis('off')
plt.show()

In [None]:
plt.figure(figsize=[10,10])
sns.countplot(y=df.vaccines,palette='CMRmap',
             order=df['vaccines'].value_counts().head(15).index)
plt.title("Popularly used vaccines")
plt.xticks(rotation=90);

From this above graph we can get that the most used vaccines are oxford/AstraZeneca and Pfizer

# 9. What vaccination schemes (combination of vaccines) are used and in which countries?

In [None]:
df.groupby('country')['vaccines'].value_counts().sort_values(ascending=False)

so here we can see some countries using some of the mixed vaccines, like norway is using modrena as well as pfizer

In [None]:
df.groupby('country')['vaccines'].value_counts().sort_values(ascending=False).head(20).plot(kind='bar')

# 10. Which country prefers what vaccine?

In [None]:
vaccines = df.groupby('vaccines')['country'].unique()
vaccines = pd.DataFrame(vaccines).reset_index()
vaccines