In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

import warnings 
warnings.filterwarnings('ignore')

In [None]:
vaccine_df=pd.read_csv('/kaggle/input/covid-vaccination-india-district-wise-data/vaccine_doses_statewise.csv')
population_df=pd.read_csv('/kaggle/input/covid-vaccination-india-district-wise-data/india_state_wise_projected_population_2021.csv')

vaccine_df.tail()

In [None]:
vaccine_df.drop([36,37],axis=0,inplace=True)
dataset=vaccine_df.copy()

vaccine_df['Total Population']=population_df['Total Population(Projected 20201)']
vaccine_df.rename(columns={'10/07/2021':'Total Vaccinated'},inplace=True)
vaccine_df['% Vaccinated']=vaccine_df['Total Vaccinated']*100/vaccine_df['Total Population']


## VISUALIZATION

In [None]:
#Marking with % popullation vaccinated less than national average as True and False otherwise
average=vaccine_df['Total Vaccinated'].sum()*100/vaccine_df['Total Population'].sum()
states=vaccine_df[vaccine_df['% Vaccinated']<average].index
temp=[]
for x in vaccine_df.index:
    if x in states:
        temp.append(True)
    else:
        temp.append(False)
temp=pd.Series(temp)

In [None]:
plt.figure(figsize=(16,9))
plt.bar(vaccine_df['State'],vaccine_df['% Vaccinated'],color=temp.map({True:'#8C000F',False:'#929591'}));
plt.axhline(average,color='#008000',lw=5,ls='--',label='National Average');
plt.xticks(rotation=90);
plt.title('States with Lower Vaccination Rate than National Average');
plt.xlabel("");
plt.ylabel("% of Population Vaccinated");
plt.legend(loc='best');


**Now let's convert  the raw figures into percentage of population as then only we can compare all the states together irrespective of their population** 

In [None]:
df=dataset.copy()

date_cols=df.select_dtypes(np.number).columns.tolist()
df['Total Population']=population_df['Total Population(Projected 20201)']
df.rename(columns={'Total Population':'Population'},inplace=True)
for col in date_cols:
    df[col]=df[col]*100/df['Population']
    

In [None]:
df.set_index('State',inplace=True)
df=df.T.reset_index()
df.set_index(['index'],inplace=True)
df.drop('Population',inplace=True)

**It will be messy to plot all 36 stated and uts together so we will plot them in different sets**

## TOP STATES

In [None]:
temp=df.T
states=temp.sort_values('10/07/2021',ascending=False).index[:5]
#selecting top 5 States/UTs who vaccinated their maximum population as of 10/07/2021

plt.rcParams['font.size']=15
sns.set_style('whitegrid')
df[states].plot(figsize=(15,8));
plt.title("Top 5 States/UTs with Maximum % of Population Vaccinated as of 10/07/2021 ");
plt.xlabel('')
plt.ylabel('% Population Vaccinated');

## SEVEN SISTERS OF NE

In [None]:
states=['Tripura','Nagaland','Mizoram','Meghalaya','Manipur','Assam','Arunachal Pradesh']

df[states].plot(figsize=(15,8))
plt.title('% Popullation Vaccinated in NE States');
plt.ylabel('% Population Vaccinated');
plt.xlabel('');


## SOUTHERN STATES

In [None]:
states=['Andhra Pradesh','Karnataka','Kerala','Tamil Nadu','Telangana']
df[states].plot(figsize=(15,8))
plt.title('% Popullation Vaccinated in Southern States');
plt.ylabel('% Population Vaccinated');
plt.xlabel('');

## ALL UTs

In [None]:
ut=["Andaman and Nicobar Islands",'Dadra and Nagar Haveli and Daman and Diu','Delhi','Jammu and Kashmir',
   'Ladakh','Lakshadweep','Chandigarh','Puducherry'] 

df[ut].plot(figsize=(15,8))
plt.title('% Popullation Vaccinated in Union Territories');
plt.ylabel('% Population Vaccinated');
plt.xlabel('');

## NORTHERN STATES

In [None]:
states=['Uttar Pradesh','Uttarakhand','Himachal Pradesh','Rajasthan','Haryana','Punjab','Bihar','Sikkim']

df[states].plot(figsize=(15,8))
plt.title('% Popullation Vaccinated in Northern States');
plt.ylabel('% Population Vaccinated');
plt.xlabel('');

## CENTRAL INDIAN STATES

In [None]:
states=['Maharashtra','Madhya Pradesh','Jharkhand','Odisha','Gujarat','Goa']

df[states].plot(figsize=(15,8))
plt.title('% Popullation Vaccinated in Central Indian States');
plt.ylabel('% Population Vaccinated');
plt.xlabel('');

# DAILY GROWTH

In [None]:
daily_df=dataset.set_index('State').T
daily_df.head()

In [None]:
#Calculating daily national vaccination figures
daily_df['Total Daily']=daily_df['West Bengal']
for col in daily_df.columns[:-1]:
    daily_df['Total Daily']=daily_df['Total Daily']+daily_df[col]


#converting comnmulative sum into daily raw figures
daily_df=daily_df-daily_df.shift(1).fillna(0)

#converting date
daily_df['Date']=pd.to_datetime(daily_df.index,dayfirst=True)
daily_df.reset_index(drop=True,inplace=True)

daily_df.set_index('Date',inplace=True)

In [None]:
daily_df.drop('Total Daily',axis=1).plot(figsize=(20,12),color='#d9d2d0',legend=False,lw=5);
daily_df['Uttar Pradesh'].rolling(10).mean().plot(figsize=(20,12),color='#0b032b',legend=False,lw=5);
plt.annotate("Uttar Pradesh's Vaccination Drive",xytext=('2021-03-01',750000),xy=('2021-03-01',7500),
            bbox=dict(boxstyle='round',alpha=0.5,color='maroon'),
            arrowprops=dict(arrowstyle='fancy',color='black',alpha=0.5));
plt.title("Vaccination Drive of Uttar Pradesh Vs Vccination Drive of Other States");
plt.xlabel("");
plt.ylabel('Vaccinations per Day');

In [None]:
temp=daily_df.rolling(10).mean();
temp['Total Daily'].plot(ls='--',lw=3,figsize=(16,9),label='Vaccination Drive');
plt.axvspan('2021-04-01','2021-06-15',color='maroon',alpha=0.2,label='DownFall');
plt.legend(loc='best');
plt.title('Vaccination Drive of India');
plt.xlabel('')
plt.ylabel("Vaccination per Day");