# Covid Vaccination data analysis

In [None]:
# Import necessary libraries
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import geopandas as gpd 
import seaborn as sns
sns.set_style('darkgrid')
sns.set_context('notebook')

# 1. Read the data set

In [None]:
#Read the data
country_vaccinations = pd.read_csv('../input/covid-world-vaccination-progress/country_vaccinations.csv')
country_vaccinations.head()

# 2. Examine the data set

In [None]:
#Examine column attributes of the data set
print(country_vaccinations.columns)

In [None]:
#Check the number of rows and columns
country_vaccinations.shape

In [None]:
#Check for missing data 
country_vaccinations.isna().sum()
#Except for country, date and vaccines, rest all columns contain missing values. 

# 3. Clean the data set

In [None]:
#Dropping columns that are not required, here source_name and source_website
country_vaccinations.drop(['source_name', 'source_website'], axis = 1, inplace = True)
country_vaccinations.head()

In [None]:
#Check the value counts in country column
country_vaccinations.value_counts('country')

In [None]:
#Check unique values in country column
country_vaccinations.country.unique()

In [None]:
#In country column, England,Scotland,Northern Ireland and Wales are part of UK, let us drop rows except UK
index_names = country_vaccinations[country_vaccinations.country.isin(['England', 'Scotland', 'Wales', 'Northern Ireland'])].index
country_vaccinations.drop(index_names, inplace = True)

In [None]:
#The data type of date is object, change it to datetime
print(country_vaccinations.date.dtype)

In [None]:
#Now convert the date to pandas datetime datatype format
country_vaccinations['date'] = pd.to_datetime(country_vaccinations.date)
print(country_vaccinations.date.dtype)

 # 4. Perform data analysis
         

The subsequent analysis will help us answer questions like,
* Which country is using what vaccine/s?
* In which countries vaccination program is advanced?
* Where are people vaccinated the most per day?

In [None]:
country_by_vaccine = pd.DataFrame(country_vaccinations.groupby('country').vaccines.unique())
country_by_vaccine

In [None]:
fully_vaccinated = country_vaccinations.groupby('iso_code', as_index = False).people_fully_vaccinated.max()
fully_vaccinated_sort = fully_vaccinated.sort_values(by = 'people_fully_vaccinated', ascending = False).head()
chart1 = sns.catplot(x = 'iso_code', y = 'people_fully_vaccinated', data = fully_vaccinated_sort, kind = 'bar')
plt.show()

In [None]:
#Import shapefile for world map
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

#Join the fully_vaccinated and world tables
merged_df1 = world.merge(fully_vaccinated, left_on = 'iso_a3', right_on = 'iso_code')
merged_df1['people_fully_vaccinated'].fillna(0, inplace = True)
merged_df1.head()

In [None]:
#Represent people fully vaccinated metrics on the world map
fig, ax = plt.subplots(1, figsize=(15, 8))
ax.axis('off')
ax.set_title('Chloropleth map depicting people fully vaccinated in various countries')
merged_df1.plot(edgecolor = 'black', column = 'people_fully_vaccinated',ax=ax,legend = True, cmap = 'viridis')


In [None]:
total_vaccinations = country_vaccinations.groupby('country', as_index = False).total_vaccinations.max()
total_vaccinations_sort = total_vaccinations.sort_values(by = 'total_vaccinations',ascending = False).head()
chart2 = sns.catplot(x = 'country', y = 'total_vaccinations', data = total_vaccinations_sort, kind = 'bar')

In [None]:
vaccinations_per_day = country_vaccinations.groupby('country', as_index = False).daily_vaccinations.max()
vacc_per_day_sorted = vaccinations_per_day.sort_values(by = 'daily_vaccinations', ascending = False).head()
sns.catplot(x = 'country', y = 'daily_vaccinations', data = vacc_per_day_sorted, kind = 'bar')

***Analysis on data for India***

In [None]:
#Select data entries for India 
India_stats = country_vaccinations.loc[country_vaccinations['country'] == 'India']
India_stats.head()

In [None]:
#Defining a function for plotting line plots
def plot_line(x, y, color, xlabel, ylabel, title):
    fig, ax = plt.subplots()
    fig.set_size_inches(10, 5)
    plt.xticks(rotation = 90)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    ax.plot(x, y, color = color)

In [None]:
plot_line(India_stats.date, India_stats['total_vaccinations'],'blue', 'Date', 'Total vaccinations', 'Total vaccinations date-wise trend in India')

In [None]:
plot_line(India_stats.date, India_stats['people_fully_vaccinated'], 'green', 'Date', 'People fully vaccinated', 'People fully vaccinated - date wise trend in India')

In [None]:
plot_line(India_stats.date, India_stats['daily_vaccinations'], 'brown', 'Date', 'Daily vaccinations', 'Daily vaccinations date wise trend in India')