In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

This dataset includes information about:  

* **Country** - this is the country for which the vaccination information is provided;     
* **Country ISO Code** - ISO code for the country;   
* **Date**- date for the data entry; for some of the dates we have only the daily vaccinations, for others, only the (cumulative) total;   
* **Total number of vaccin ations** - this is the absolute number of total immunizations in the country;  
* **Total number of people vaccinated** - a person, depending on the immunization scheme, will receive one or more (typically 2) vaccines; at a certain moment, the number of vaccination might be larger than the number of people;  
* **Total number of people fully vaccinated** - this is the number of people that received the entire set of immunization according to the immunization scheme (typically 2); at a certain moment in time, there might be a certain number of people that received one vaccine and another number (smaller) of people that received all vaccines in the scheme;  
* **Daily vaccinations (raw)** - for a certain data entry, the number of vaccination for that date/country;  
* **Daily vaccinations** - for a certain data entry, the number of vaccination for that date/country;  
* **Total vaccinations per hundred** - ratio (in percent) between vaccination number and total population up to the date in the country;  
* **Total number of people vaccinated per hundred** - ratio (in percent) between population immunized and total population up to the date in the country;  
* **Total number of people fully vaccinated per hundred** - ratio (in percent) between population fully immunized and total population up to the date in the country;   
* **Number of vaccinations per day** - number of daily vaccination for that day and country;   
* **Daily vaccinations per million** -  ratio (in ppm) between vaccination number and total population for the current date in the country;    
* **Vaccines used in the country** - total number of vaccines used in the country (up to date);    
* **Source name** - source of the information (national authority, international organization, local organization etc.);   
* **Source website** - website of the source of information;  

In [None]:
# Importing libraries
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
data_df = pd.read_csv('/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv')
data_df.head()

In [None]:
manufacturer_df = pd.read_csv('/kaggle/input/covid-world-vaccination-progress/country_vaccinations_by_manufacturer.csv')
manufacturer_df.head()

In [None]:
data_df.info()

In [None]:
data_df['date'] = pd.to_datetime(data_df['date'])

In [None]:
# Printing the date range of data collected
print('Data is collected from {} to {} and total number of countries in dataset is {} and total number of vaccines is {}'.
       format(data_df.date.min(),
                data_df.date.max(),
                len(data_df.country.unique()),
                len(data_df.vaccines.unique())
                )
     )

In [None]:
data_df['vaccines'].value_counts()

### Let us study the trend of vaccinations done by date


In [None]:
daily_vaccination = data_df.groupby('date')['daily_vaccinations'].sum()
daily_vaccination = daily_vaccination / 1000000
daily_vaccination

In [None]:
sns.lineplot(x=daily_vaccination.index, y=daily_vaccination, zorder=2, linewidth=0.8)

As days are passing we can see the daily vaccination count increasing. This can be mainly attributed to the second wave of virus which is more contagious. With this the governments across the world are insisting people to get vaccinated.

#### What vaccination schemes (combination of vaccines) are used and in which countries?

In [None]:
df = data_df.reset_index()
df = df[['vaccines','country']]
df = df.groupby(['vaccines', 'country']).max().sort_values(by='vaccines', ascending=False)
df = pd.DataFrame(df)
with pd.option_context(
    'display.max_rows', None,
    'display.expand_frame_repr', True,
):
    display(df)

#### Observations:
- Countries are using different combinations of vaccines
- The vaccine which is most popular amongst countries is Oxford/AstraZeneca.

#### What country is vaccinated more people?
Let us analyse 'total_vaccinations' column with country

In [None]:
plt.figure(figsize=(20, 15))
countries = data_df.groupby('country')['total_vaccinations'].max().sort_values(ascending = False)[:50]
countries.plot(kind = 'barh');
top_5 = countries[:5]

In [None]:
#Countries with maximum number of vaccinations
print(top_5.index.values)


In [None]:
vaccine_df = data_df.groupby('vaccines')['total_vaccinations'].max().sort_values(ascending = False)[:50]
vaccine_df = vaccine_df/10000

In [None]:
vaccine_df = vaccine_df[:5]
vaccine_df = vaccine_df.reset_index()
# Plotting a barplot for data viz using matplotlib and seaborn

plt.figure(figsize=(15, 9))
plt.title('Top five most vaccinated countries in the world (by amount of people vaccinated)')
sns.barplot(x=vaccine_df.vaccines, y=vaccine_df['total_vaccinations'])
plt.xlabel(' ')
plt.ylabel('Amount of people vaccinated (in millions)');
plt.xticks(rotation = 90)

In [None]:
vaccine_df

In [None]:
## Top vaccines used are:
vaccine_df[:5].index.values

In [None]:
##Visualising vaccinations performed on a chloropleth map
vacc_data =data_df[['country', 'total_vaccinations']].groupby('country').max().sort_values('total_vaccinations', ascending=False) 
fig = px.choropleth(locations=vacc_data.index, locationmode='country names' ,
                    data_frame=vacc_data,
                    color='total_vaccinations', title='Total Vaccinated Population',
                    labels={'total_vaccinations':"No of Vaccinated Population"},color_continuous_scale='sunset'
                   )
fig.show('notebook')

It is intresting to see that USA has vaccinated more number of people than India which has much larger 
population than USA

### What country has immunized the largest percent from its population?

In [None]:
data_df.head()

#### Visualising world trend on map first

In [None]:
##Visualising vaccinations performed on a chloropleth map
vacc_data_per100 =data_df[['country', 'total_vaccinations_per_hundred', 'vaccines']].groupby('country').max().sort_values('total_vaccinations_per_hundred', ascending=False) 
fig = px.choropleth(locations=vacc_data_per100.index, locationmode='country names' ,
                    data_frame=vacc_data_per100,
                    color='total_vaccinations_per_hundred', title='Percentage of Total Vaccinated Population',
                    labels={'total_vaccinations':"No of Vaccinated Population along with vaccines used"},hover_name='vaccines',color_continuous_scale='sunset'
                   )
fig.show('notebook')

In [None]:
country_immunised_per100 = data_df.groupby('country')['total_vaccinations_per_hundred'].max().sort_values(ascending = False)[:50]
country_immunised = country_immunised_per100.reset_index()
country_immunised = country_immunised[0:5]
# Plotting a barplot for data viz using matplotlib and seaborn

plt.figure(figsize=(15, 9))
plt.title('Top five most vaccinated countries in the world (by amount of people vaccinated)')
sns.barplot(x=country_immunised.country, y=country_immunised['total_vaccinations_per_hundred'])
plt.xlabel(' ')
plt.ylabel('Amount of people vaccinated (in millions)');

- We see that countries like Canada, USA, Chile, UK have have vaccinated most of its population in the world. These countries use vaccines like Astra Zeneca, Pfizer, Moderna
- Other European countries come next in the list
- African countries have the least number of its population vaccinated.

### Lets closely check the percentage of vaccinations per 100

Gibraltar, Seychelles, Falkland Islands, Israel, United Arab Emirates have vaccinated largest 
percent of people. It is more than countries like USA, Canada, UK.


- We see that countries like Canada, USA, Chile, UK have have vaccinated most of its population in the world. These countries use vaccines like Astra Zeneca, Pfizer, Moderna
- Other European countries come next in the list
- African countries have the least number of its population vaccinated.