# Track the progress of COVID-19 vaccination around the World

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.express as px

#read files that contain the information about country profile and covid vaccine info
covid_file_path ="/kaggle/input/d/gpreda/covid-world-vaccination-progress/country_vaccinations.csv"
cntry_file_path ='/kaggle/input/country-profile-data/country_profile_variables.csv'

#read vaccination and country files
cntry_df = pd.read_csv(cntry_file_path)
covid_df = pd.read_csv(covid_file_path)


# World COVID Vaccine Data Analysis

View the Data and see what information we need and we donot need.

In [None]:
#Let us see the data and see some statistical measures
covid_df.head()

covid_df.describe()

We donot need the information of columns - 'source_name' and 'source_website for vaccination'. Remove these columns.

In [None]:
#remove unwanted columns
covid_df.drop(columns=['source_name', 'source_website'], axis=0, inplace=True)
covid_df.head()

# In which country the vaccination programme is more advanced??

Since each country has vaccine details given on many dates, lets group the data by country and then take the maximum value of the required columns. The latest value is the maximum, as it is the cumulative value. 

Group by country to get the latest value for 'People Fully Vaccinated'for each country.

In [None]:
total_people_fully_vaccinated = covid_df.groupby(by=['country'], sort=False,as_index=False)['people_fully_vaccinated'].max()

total_people_fully_vaccinated.isnull().sum()

Drop the rows with NaN values since there are about 45% rows with NaN

In [None]:
total_people_fully_vaccinated = total_people_fully_vaccinated.dropna()

total_people_fully_vaccinated.shape

In [None]:
#plot the graph and show results for people fully vaccinated by country for the top 20 countries
top20_people_fully_vaccinated = total_people_fully_vaccinated.nlargest(20,columns = ['people_fully_vaccinated'])

fig = px.bar(x = top20_people_fully_vaccinated['country'], y= top20_people_fully_vaccinated['people_fully_vaccinated'],
            labels={"x": "Country", "y": "Total People Fully vaccinated"},
             )
fig.show()
top20_people_fully_vaccinated

**United States of America has the highest number of "Fully Vaccinated People".**

Group by country to get the maximum value of total_vaccinations for each country. Get the top 20 from the list and plot

In [None]:
#group by country with maximum total_vaccinations and plot the top 20
total_vaccines = covid_df.groupby(by=['country'], sort=False, as_index=False)['total_vaccinations'].max()

#get the countries with top 20 total vaccinations
top20_total_vaccines = total_vaccines.nlargest(20,columns=['total_vaccinations'])



In [None]:
fig = px.bar(x=top20_total_vaccines['country'], y=top20_total_vaccines['total_vaccinations'],
            labels={"x": "Country", "y": "Total vaccinations"},
             )
fig.show()
top20_total_vaccines

>**United States of America has the highest total vaccinations recorded as we can see from the above.**

**We can conclude that United States of America has the more Advanced vaccination program. It has the highest "Fully Vaccinated People" as well as "Total Vaccinations".**

# Which country is using what vaccine?

Since each country has vaccine details given on many dates and the latest date has latest totals, lets group the data by country and vaccine company first and then take the maximum 'total vaccinations' for each country. Then group by the vaccination company to get the total vaccines for each company. Then get the top 20 highest and plot.

In [None]:
#group by country and vaccine company with maximum total_vaccinations
groupby_country_vaccine = covid_df.groupby(by=['country','vaccines'], sort=False, as_index=False)['total_vaccinations'].max()

#group by the vaccine company and take the sum of total vaccines for each vaccine company
top_vaccines = groupby_country_vaccine.groupby(by=['vaccines'], sort=False, as_index=False)['total_vaccinations'].sum()

#get the top 20
top20_vaccines = top_vaccines.nlargest(20,columns = ['total_vaccinations'])

#plot the graph
fig = px.bar(x=top20_vaccines['total_vaccinations'], y=top20_vaccines['vaccines'],
            labels={"x": "Total vaccinations", "y": "Vaccine Company"},
             )
fig.show()

top_vaccines

**From the above, we can conclude that " Johnson & Johnson, Moderna, Pfizer/BioNTech " have the highest vaccines contribution.**

*Vaccines used by countries around the world. * The below map shows which vaccine is used by different countries around the world


In [None]:
#group by country with vaccine company and total vaccinations details
cntry_total_vaccinations = covid_df.groupby(['country']).max()[["total_vaccinations", "vaccines"]].reset_index()
cntry_total_vaccinations

In [None]:
fig = px.choropleth(cntry_total_vaccinations, locations = 'country',locationmode = 'country names',color = 'vaccines',
                   title = 'Vaccines used by Country',hover_data= ['total_vaccinations'],
                   color_discrete_map=dict(zip(cntry_total_vaccinations['vaccines'], px.colors.sequential.Viridis)),
                   labels={'vaccines': 'Vaccine Name', 'country': 'Country', 'total_vaccinations': 'Total Vaccination'})
 
fig.show()


**Which country is leading in fully vaccinating the maximum percentage of its total population?  
This differs from our earlier results of the country where highest number of people are fully vaccinated. This calculation takes the population into consideration while calulating it. **


In [None]:
# group by country and take the maximum of percentage of people fully vaccinated
people_fully_vaccinated_percentage = covid_df.groupby(by=['country'], sort = False, as_index = False)['people_fully_vaccinated_per_hundred'].max()

#check for Nan entries
people_fully_vaccinated_percentage.isnull().sum()


In [None]:
#drop the NaN entries
people_fully_vaccinated_percentage.dropna()

#get the top 20 maximum values of people_fully_vaccinated_per_hundred
top20_people_fully_vaccinated_percentage = people_fully_vaccinated_percentage.nlargest(20,columns=['people_fully_vaccinated_per_hundred'])
top20_people_fully_vaccinated_percentage

In [None]:
#plot the graph of top 20 in Percentage of fully vaccinated people by country
fig = px.bar( x = top20_people_fully_vaccinated_percentage['people_fully_vaccinated_per_hundred'] , y = top20_people_fully_vaccinated_percentage['country'], labels = {"x" : "People Fully Vaccinated in %", "y": "Country"})
fig.show()

**Gibraltar is the Country/Territory that has the highest percentage of Fully Vaccinated people. This is because of its small population set when compare to countries that are in the top with total vaccinations and total fully vaccinated people.**

# World COVID Vaccine Progress by Time

Vaccines were first administered in early Dec 2020. Let us check to see how the world has made the progress in total vaccination numbers, people fully vaccinated over the past few months. 


Group by date and take the sum of each feature for all countries. Plot to see how 'total vaccinations' and 'people fully vaccinated'have progressed over a period of time. 


In [None]:
#groupby date and get the sum
covid_vacc_by_date = covid_df.groupby('date').sum()
covid_vacc_by_date

In [None]:
#plot 'date' vs 'total vaccinations' 
fig = px.bar(covid_vacc_by_date, x = covid_vacc_by_date.index, y ='total_vaccinations', hover_data=['total_vaccinations'],color='total_vaccinations',height=400, title='Total Vaccinations by Date')

fig.show()

In [None]:
#plot people fully vaccinated by date
fig = px.bar(covid_vacc_by_date,x = covid_vacc_by_date.index, y = 'people_fully_vaccinated', hover_data=['people_fully_vaccinated'], color='people_fully_vaccinated',height=450,title='People Fully Vaccinated by Date')
fig.update_layout(title_x = 0.5)
fig.show()

We can see there is a good overall progress in vaccine administration around the world since the time it strated in Dec 2020. There might be countries with not much progress. But this is the overall picture. Country wise analysis can be done too.