In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict

In [None]:
data = pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv")

In [None]:
data.shape

In [None]:
data.head()

In [None]:
CountryList = data['country'].unique()

In [None]:
print(CountryList)

In [None]:
def plotData(countries, colName):
    fig = plt.figure(figsize=(20,5))
    ax = plt.subplot2grid((1,1), (0,0))
    for country in countries:
        dataCountry = data[data['country'] == country][['date',colName]]
        dataCountry.reset_index(inplace=True, drop=True)
        dataCountry.set_index('date', inplace=True)
        dataCountry.fillna(method='ffill', inplace=True)
        dataCountry.index = pd.to_datetime(dataCountry.index)
        ax.plot(dataCountry.index, dataCountry[colName], label=country, marker='.')
    
    for tick in ax.get_xticklabels():
        tick.set_rotation(90)
    plt.grid()
    plt.legend()
    plt.xlabel("Date")
    plt.ylabel(colName)
    plt.title(colName + " of different countries")
    plt.show()
    

# Total Vaccination + People Vaccinated + People Fully Vaccinated

In [None]:
plotData(['United States', 'India', 'United Kingdom', 'Germany', 'China'], 'total_vaccinations')

In [None]:
plotData(['United States', 'India', 'United Kingdom', 'Germany', 'China'], 'people_vaccinated')

In [None]:
plotData(['United States', 'India', 'United Kingdom', 'Germany', 'China'], 'people_fully_vaccinated')

In [None]:
plotData(['United States', 'India', 'United Kingdom', 'Germany', 'China'], 'daily_vaccinations')

In [None]:
plotData(['United States', 'India', 'United Kingdom', 'Germany', 'China'], 'daily_vaccinations_raw')

We can see that country **China** has highest number of total vaccination but we dont have the data for people vaccinated in **China**. Seciond comes is **United States of America** in both total vaccination and people vaccinated in the country. Third comes is **India**. 

**Germany** and **United Kingdom** is catching up with this, but because of there less population, there are less number of total vaccination, but these country is doing good according to their population. 

If we look at the daily vaccination drive, we can see that its not fluctuating and not constantly increasing. we can see that **UK** and **Germany** have more or less stable graph, but graph of **USA**, **India** and **China** is bit fluctuating. 

We can see that the daily vaccination numbers is decreasing in **India** and **USA**. Reason for **India** can be the second wave of COVID-19 that hits country. 

In [None]:
def getDataSclice():
    dataSlice = {'country':[], 'Total':[], 'People':[], 'FPeople':[]}
    for country in CountryList:
        dataCountryT = data[data['country'] == country]['total_vaccinations']
        dataCountryP = data[data['country'] == country]['people_vaccinated']
        dataCountryPF = data[data['country'] == country]['people_fully_vaccinated']
        
        dataCountryT.fillna(method='ffill', inplace=True)
        dataCountryP.fillna(method='ffill', inplace=True)
        dataCountryPF.fillna(method='ffill', inplace=True)
        
        dataCountryT.fillna(0, inplace=True)
        dataCountryP.fillna(0, inplace=True)
        dataCountryPF.fillna(0, inplace=True)
        
        dataSlice['country'].append(country)
        dataSlice['Total'].append(dataCountryT.iloc[-1])
        dataSlice['People'].append(dataCountryP.iloc[-1])
        dataSlice['FPeople'].append(dataCountryPF.iloc[-1])
    return pd.DataFrame(dataSlice)

In [None]:
dataSlice = getDataSclice()
dataSlice.head()

In [None]:
tempData = dataSlice[['country', 'Total']]
tempDataTop = tempData.sort_values(by='Total',ascending=False)[:20]
tempDataBot = tempData.sort_values(by='Total')[:20]

fig = plt.figure(figsize=(22,10))
ax1 = plt.subplot2grid((2,1), (0,0), rowspan=1, colspan=1)
ax1.bar(tempDataTop['country'], height=tempDataTop['Total'], color = "#6ded71")
for tick in ax1.get_xticklabels():
    tick.set_rotation(90)
ax1.set_ylabel("Number of Vaccination")
ax1.title.set_text("Countries having highest number of vaccination")

ax2 = plt.subplot2grid((2,1), (1,0), rowspan=1, colspan=1)
ax2.bar(tempDataBot['country'][::-1], height=tempDataBot['Total'][::-1], color = "#f07d73")
for tick in ax2.get_xticklabels():
    tick.set_rotation(90)
ax2.set_ylabel("Number of Vaccination")
ax2.title.set_text("Countries having lowest number of vaccination")

plt.xlabel("Country")
plt.subplots_adjust(hspace = 0.6)
plt.show()

In [None]:
tempData = dataSlice[['country', 'People']]
tempDataTop = tempData.sort_values(by='People',ascending=False)[:20]
tempDataBot = tempData.sort_values(by='People')[:20]

fig = plt.figure(figsize=(22,10))
ax1 = plt.subplot2grid((2,1), (0,0), rowspan=1, colspan=1)
ax1.bar(tempDataTop['country'], height=tempDataTop['People'], color = "#6ded71")
for tick in ax1.get_xticklabels():
    tick.set_rotation(90)
ax1.set_ylabel("Number of Vaccination")
ax1.title.set_text("Countries having highest number of people vaccinated")

ax2 = plt.subplot2grid((2,1), (1,0), rowspan=1, colspan=1)
ax2.bar(tempDataBot['country'][::-1], height=tempDataBot['People'][::-1], color = "#f07d73")
for tick in ax2.get_xticklabels():
    tick.set_rotation(90)
ax2.set_ylabel("Number of Vaccination")
ax2.title.set_text("Countries having lowest number of people vaccinated")

plt.xlabel("Country")
plt.subplots_adjust(hspace = 0.6)
plt.show()

In [None]:
tempData = dataSlice[['country', 'FPeople']]
tempDataTop = tempData.sort_values(by='FPeople',ascending=False)[:20]
tempDataBot = tempData.sort_values(by='FPeople')[:20]

fig = plt.figure(figsize=(22,10))
ax1 = plt.subplot2grid((2,1), (0,0), rowspan=1, colspan=1)
ax1.bar(tempDataTop['country'], height=tempDataTop['FPeople'], color = "#6ded71")
for tick in ax1.get_xticklabels():
    tick.set_rotation(90)
ax1.set_ylabel("Number of Vaccination")
ax1.title.set_text("Countries having highest number of fully vaccinated people")

ax2 = plt.subplot2grid((2,1), (1,0), rowspan=1, colspan=1)
ax2.bar(tempDataBot['country'][::-1], height=tempDataBot['FPeople'][::-1], color = "#f07d73")
for tick in ax2.get_xticklabels():
    tick.set_rotation(90)
ax2.set_ylabel("Number of Vaccination")
ax2.title.set_text("Countries having lowest number of fully vaccinated people")

plt.xlabel("Country")
plt.subplots_adjust(hspace = 0.6)
plt.show()

In above plot we have lot of zeros because we dont have data for these countries or they are NaN in given data

In [None]:
tempData = dataSlice[['country', 'Total']]
tempData = tempData.sort_values(by='Total',ascending=False)
tempData.set_index('country',drop=True,inplace=True)

countries=['United States', 'India', 'United Kingdom', 'Germany', 'China']

ax_1 = tempData['Total'].plot(kind='bar', title ="Total Vaccination", figsize=(22, 6), fontsize=12)
ax_1.set_xlabel("Country Name", fontsize=12)
for ticks in ax_1.xaxis.get_major_ticks():
    if ticks.label1.get_text() not in countries:
        ticks.label1.set_visible(False)
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('w')
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_edgecolor('#c7c3c3')
    else:
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('r')
        

In [None]:
tempData = dataSlice[['country', 'People']]
tempData = tempData.sort_values(by='People',ascending=False)
tempData.set_index('country',drop=True,inplace=True)

countries=['United States', 'India', 'United Kingdom', 'Germany', 'China']

ax_1 = tempData['People'].plot(kind='bar', title ="People Vaccinated", figsize=(22, 6), fontsize=12)
ax_1.set_xlabel("Country Name", fontsize=12)
for ticks in ax_1.xaxis.get_major_ticks():
    if ticks.label1.get_text() not in countries:
        ticks.label1.set_visible(False)
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('w')
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_edgecolor('#c7c3c3')
    else:
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('r')
        

In [None]:
tempData = dataSlice[['country', 'FPeople']]
tempData = tempData.sort_values(by='FPeople',ascending=False)
tempData.set_index('country',drop=True,inplace=True)

countries=['United States', 'India', 'United Kingdom', 'Germany', 'China']

ax_1 = tempData['FPeople'].plot(kind='bar', title ="People Fully Vaccinated", figsize=(22, 6), fontsize=12)
ax_1.set_xlabel("Country Name", fontsize=12)
for ticks in ax_1.xaxis.get_major_ticks():
    if ticks.label1.get_text() not in countries:
        ticks.label1.set_visible(False)
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('w')
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_edgecolor('#c7c3c3')
    else:
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('r')
        

These plots convay the same things as the first section plots, These Graph also shows the position of the countries in the world. We can see that there are two countries between **UK** and **Germany**. and **USA** and **India** are way ahead then all of countries in terms of people vaccinated. 

In [None]:
def plotDataComparision(country):
    fig = plt.figure(figsize=(20,5))
    ax = plt.subplot2grid((1,1), (0,0))
    
    dataCountryTotal = data[data['country'] == country][['date','total_vaccinations']]
    dataCountryPeople = data[data['country'] == country][['date','people_vaccinated']]
    dataCountryPeopleFull = data[data['country'] == country][['date','people_fully_vaccinated']]
    
    

    dataCountryTotal.reset_index(inplace=True, drop=True)
    dataCountryTotal.set_index('date', inplace=True)
    dataCountryTotal.fillna(method='ffill', inplace=True)
    dataCountryTotal.index = pd.to_datetime(dataCountryTotal.index)

    dataCountryPeople.reset_index(inplace=True, drop=True)
    dataCountryPeople.set_index('date', inplace=True)
    dataCountryPeople.fillna(method='ffill', inplace=True)
    dataCountryPeople.index = pd.to_datetime(dataCountryPeople.index)
    
    dataCountryPeopleFull.reset_index(inplace=True, drop=True)
    dataCountryPeopleFull.set_index('date', inplace=True)
    dataCountryPeopleFull.fillna(method='ffill', inplace=True)
    dataCountryPeopleFull.index = pd.to_datetime(dataCountryPeople.index)

    dataCountryCombined = pd.concat([dataCountryTotal, dataCountryPeople],axis = 1)
    dataCountryCombined['Diff'] = dataCountryCombined['total_vaccinations'] - dataCountryCombined['people_vaccinated']

    ax.plot(dataCountryTotal.index, dataCountryTotal['total_vaccinations'], label='Total Vaccination', marker='.', color='#ff85e4')
    ax.plot(dataCountryPeople.index, dataCountryPeople['people_vaccinated'], label='People Vaccinated', marker='.', color='#5690f5')
    ax.plot(dataCountryPeopleFull.index, dataCountryPeopleFull['people_fully_vaccinated'], label='People Fully Vaccinated', marker='.', color='#c94dff')
    
    
    for tick in ax.get_xticklabels():
        tick.set_rotation(90)
    plt.grid()
    plt.legend()
    plt.xlabel("Date")
    plt.ylabel('Number of Vaccination')
    plt.title(f"Total Vaccination Vs People Vaccinated of {country}")
    plt.show()

In [None]:
ax = plotDataComparision('India')

In [None]:
plotDataComparision('United States')

In [None]:
plotDataComparision('United Kingdom')

In [None]:
plotDataComparision('Germany')

# Total Vaccination per hundred + People vaccinated per hundred

In [None]:
plotData(['United States', 'India', 'United Kingdom', 'Germany', 'China'], 'total_vaccinations_per_hundred')

In [None]:
plotData(['United States', 'India', 'United Kingdom', 'Germany', 'China'], 'people_vaccinated_per_hundred')

In [None]:
plotData(['United States', 'India', 'United Kingdom', 'Germany', 'China'],'people_fully_vaccinated_per_hundred')

In [None]:
def getDataSclice():
    dataSlice = {'country':[], 'Total':[], 'People':[], 'FPeople':[]}
    for country in CountryList:
        dataCountryT = data[data['country'] == country]['total_vaccinations_per_hundred']
        dataCountryP = data[data['country'] == country]['people_vaccinated_per_hundred']
        dataCountryPF = data[data['country'] == country]['people_fully_vaccinated_per_hundred']
        
        dataCountryT.fillna(method='ffill', inplace=True)
        dataCountryP.fillna(method='ffill', inplace=True)
        dataCountryPF.fillna(method='ffill', inplace=True)
        
        dataCountryT.fillna(0, inplace=True)
        dataCountryP.fillna(0, inplace=True)
        dataCountryPF.fillna(0, inplace=True)
        
        dataSlice['country'].append(country)
        dataSlice['Total'].append(dataCountryT.iloc[-1])
        dataSlice['People'].append(dataCountryP.iloc[-1])
        dataSlice['FPeople'].append(dataCountryPF.iloc[-1])
    return pd.DataFrame(dataSlice)

In [None]:
dataSlice = getDataSclice()
dataSlice.head()

In [None]:
tempData = dataSlice[['country', 'Total']]
tempDataTop = tempData.sort_values(by='Total',ascending=False)[:20]
tempDataBot = tempData.sort_values(by='Total')[:20]

fig = plt.figure(figsize=(22,10))
ax1 = plt.subplot2grid((2,1), (0,0), rowspan=1, colspan=1)
ax1.bar(tempDataTop['country'], height=tempDataTop['Total'], color = "#6ded71")
for tick in ax1.get_xticklabels():
    tick.set_rotation(90)
ax1.set_ylabel("Number of Vaccination")
ax1.title.set_text("Countries having highest number of vaccination")

ax2 = plt.subplot2grid((2,1), (1,0), rowspan=1, colspan=1)
ax2.bar(tempDataBot['country'][::-1], height=tempDataBot['Total'][::-1], color = "#f07d73")
for tick in ax2.get_xticklabels():
    tick.set_rotation(90)
ax2.set_ylabel("Number of Vaccination")
ax2.title.set_text("Countries having lowest number of vaccination")

plt.xlabel("Country")
plt.subplots_adjust(hspace = 0.6)
plt.show()

In [None]:
tempData = dataSlice[['country', 'People']]
tempDataTop = tempData.sort_values(by='People',ascending=False)[:20]
tempDataBot = tempData.sort_values(by='People')[:20]

fig = plt.figure(figsize=(22,10))
ax1 = plt.subplot2grid((2,1), (0,0), rowspan=1, colspan=1)
ax1.bar(tempDataTop['country'], height=tempDataTop['People'], color = "#6ded71")
for tick in ax1.get_xticklabels():
    tick.set_rotation(90)
ax1.set_ylabel("Number of Vaccination")
ax1.title.set_text("Countries having highest number of people vaccinated")

ax2 = plt.subplot2grid((2,1), (1,0), rowspan=1, colspan=1)
ax2.bar(tempDataBot['country'][::-1], height=tempDataBot['People'][::-1], color = "#f07d73")
for tick in ax2.get_xticklabels():
    tick.set_rotation(90)
ax2.set_ylabel("Number of Vaccination")
ax2.title.set_text("Countries having lowest number of people vaccinated")

plt.xlabel("Country")
plt.subplots_adjust(hspace = 0.6)
plt.show()

In [None]:
tempData = dataSlice[['country', 'FPeople']]
tempDataTop = tempData.sort_values(by='FPeople',ascending=False)[:20]
tempDataBot = tempData.sort_values(by='FPeople')[:20]

fig = plt.figure(figsize=(22,10))
ax1 = plt.subplot2grid((2,1), (0,0), rowspan=1, colspan=1)
ax1.bar(tempDataTop['country'], height=tempDataTop['FPeople'], color = "#6ded71")
for tick in ax1.get_xticklabels():
    tick.set_rotation(90)
ax1.set_ylabel("Number of Vaccination")
ax1.title.set_text("Countries having highest number of fully vaccinated people")

ax2 = plt.subplot2grid((2,1), (1,0), rowspan=1, colspan=1)
ax2.bar(tempDataBot['country'][::-1], height=tempDataBot['FPeople'][::-1], color = "#f07d73")
for tick in ax2.get_xticklabels():
    tick.set_rotation(90)
ax2.set_ylabel("Number of Vaccination")
ax2.title.set_text("Countries having lowest number of fully vaccinated people")

plt.xlabel("Country")
plt.subplots_adjust(hspace = 0.6)
plt.show()

In [None]:
tempData = dataSlice[['country', 'Total']]
tempData = tempData.sort_values(by='Total',ascending=False)
tempData.set_index('country',drop=True,inplace=True)

countries=['United States', 'India', 'United Kingdom', 'Germany', 'China']

ax_1 = tempData['Total'].plot(kind='bar', title ="Total Vaccination", figsize=(22, 6), fontsize=12)
ax_1.set_xlabel("Country Name", fontsize=12)
for ticks in ax_1.xaxis.get_major_ticks():
    if ticks.label1.get_text() not in countries:
        ticks.label1.set_visible(False)
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('w')
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_edgecolor('#c7c3c3')
    else:
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('r')
        

In [None]:
tempData = dataSlice[['country', 'People']]
tempData = tempData.sort_values(by='People',ascending=False)
tempData.set_index('country',drop=True,inplace=True)

countries=['United States', 'India', 'United Kingdom', 'Germany', 'China']

ax_1 = tempData['People'].plot(kind='bar', title ="People Vaccinated", figsize=(22, 6), fontsize=12)
ax_1.set_xlabel("Country Name", fontsize=12)
for ticks in ax_1.xaxis.get_major_ticks():
    if ticks.label1.get_text() not in countries:
        ticks.label1.set_visible(False)
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('w')
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_edgecolor('#c7c3c3')
    else:
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('r')
        

In [None]:
tempData = dataSlice[['country', 'FPeople']]
tempData = tempData.sort_values(by='FPeople',ascending=False)
tempData.set_index('country',drop=True,inplace=True)

countries=['United States', 'India', 'United Kingdom', 'Germany', 'China']

ax_1 = tempData['FPeople'].plot(kind='bar', title ="People Fully Vaccinated", figsize=(22, 6), fontsize=12)
ax_1.set_xlabel("Country Name", fontsize=12)
for ticks in ax_1.xaxis.get_major_ticks():
    if ticks.label1.get_text() not in countries:
        ticks.label1.set_visible(False)
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('w')
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_edgecolor('#c7c3c3')
    else:
        ax_1.patches[tempData.index.get_indexer([ticks.label1.get_text()])[0]].set_facecolor('r')
        

In [None]:
data.head()

# daily Vaccinations Per Million

In [None]:
plotData(['United States', 'India', 'United Kingdom', 'Germany', 'China'],'daily_vaccinations_per_million')