In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime

In [3]:
covid_df = pd.read_csv('../input/covid19-in-india/covid_19_india.csv')

In [4]:
covid_df.head(10)

In [5]:
covid_df.info()

In [6]:
covid_df.describe()

In [7]:
vaccine_df=pd.read_csv('../input/covid19-in-india/covid_vaccine_statewise.csv')

In [8]:
vaccine_df.describe()

In [9]:
covid_df.drop(['Sno','Time','ConfirmedIndianNational','ConfirmedForeignNational'],inplace=True,axis=1)

In [10]:
covid_df.head()

Transform Date Object to datetime

In [11]:
covid_df['Date']=pd.to_datetime(covid_df['Date'],format='%Y-%m-%d')

In [12]:
covid_df.info()

In [13]:
#Active Cases
covid_df['Active_Cases']=covid_df['Confirmed']-(covid_df['Cured']+covid_df['Deaths'])


In [14]:
covid_df.tail()

In [15]:
covid_df['State/UnionTerritory'].unique().size


Pivot TAble

In [16]:
statewise = pd.pivot_table(covid_df,values=['Cured','Deaths','Confirmed'],index='State/UnionTerritory',aggfunc=max)

In [17]:
statewise['Recovery_Rate']=statewise['Cured']*100/statewise['Confirmed']
statewise['Mortality_Rate']=statewise['Deaths']*100/statewise['Confirmed']

In [18]:
statewise = statewise.sort_values(by='Confirmed',ascending=False)

In [19]:
statewise.style.background_gradient(cmap = 'cubehelix')

In [20]:
top_10_active_cases=covid_df.groupby(by='State/UnionTerritory').max()[['Active_Cases','Date']]

In [21]:
top_10_active_cases = top_10_active_cases.sort_values(by='Active_Cases',ascending=False)


In [22]:
top_10_active_cases

In [23]:
top_10_active_cases.head(10)

In [24]:
top_10_active_cases.info()

**top_10_active_cases by Month**

In [25]:
top_10_active_cases_month=covid_df.groupby(by=['State/UnionTerritory']).max()[['Active_Cases','Date']]

**top 10 deaths cases**

In [26]:
top_10_deaths = covid_df.groupby(by=['State/UnionTerritory']).max()['Deaths']

In [27]:
top_10_deaths=top_10_deaths.sort_values(ascending=False).head(10).reset_index()

In [28]:
top_10_deaths

**Plotting Data**

In [29]:
top_10_active_cases.iloc[:10].index

In [30]:
plt.figure(figsize=(20,9))
plt.title('Top 10 active cases states',size=25)
sns.barplot(data = top_10_active_cases.iloc[:10],x = top_10_active_cases.iloc[:10].index,y = 'Active_Cases')

In [31]:
fig= plt.figure(figsize=(20,9))
plt.title('Top 10 desths',size=25)
ax= sns.barplot(data=top_10_deaths.iloc[:10],x = top_10_deaths['State/UnionTerritory'],y ='Deaths',palette=sns.color_palette("pastel"))

In [32]:
covid_df[covid_df['State/UnionTerritory'].isin(['Maharashtra','Kamataka'])]


**Top 5 affected states in India**

In [33]:
x = top_10_active_cases.iloc[:10].index
fig= plt.figure(figsize=(20,9))
plt.title('Top 10 Affected States in India',size=25)
sns.lineplot(data = covid_df[covid_df['State/UnionTerritory'].isin(x)],x = 'Date',y = 'Active_Cases',hue='State/UnionTerritory')

In [34]:
x = top_10_active_cases.iloc[:10].index
fig = px.pie(covid_df[covid_df['State/UnionTerritory'].isin(x)], names='State/UnionTerritory',values='Active_Cases',title='Individuals Vaccination')
fig.show()

> **Vaccine Dataframe**

In [35]:
vaccine_df.head()

In [36]:
vaccine_df.rename(columns={'Updated On':'VaccineDate'},inplace=True)

In [37]:
vaccine_df

In [38]:
vaccine_df.isnull().sum()

In [39]:
vaccine_df.info()

In [40]:
vaccination=vaccine_df.drop(columns=['Sputnik V (Doses Administered)','AEFI','18-44 Years (Doses Administered)','45-60 Years (Doses Administered)','60+ Years (Doses Administered)'])

In [41]:
vaccination.head()

In [42]:
male = vaccination['Male(Individuals Vaccinated)'].sum()
female = vaccination['Female(Individuals Vaccinated)'].sum()


In [43]:
fig = px.pie(names=['Male','Females'],values=[male,female],title='Individuals Vaccination')
fig.show()

In [44]:
vaccination.rename(columns={'Total Individuals Vaccinated':'Total'},inplace=True)

In [45]:
vaccination=vaccination[vaccination.State != 'India']

In [46]:
vaccination

In [47]:
#Most vaccinated State
most_vaccinated = vaccination.groupby('State').sum()['Total'].to_frame('Total')
most_vaccinated = most_vaccinated.sort_values( by = 'Total', ascending = False)[:5]
most_vaccinated

In [48]:
plt.figure(figsize=(20,9))
plt.title('Most vaccinated States In India')
sns.barplot(data=most_vaccinated,x=most_vaccinated.index,y='Total',edgecolor='mediumblue',palette=sns.color_palette('pastel'))

**Least vaccinated states**

In [49]:
least_vaccinated = vaccination.groupby('State').sum()['Total'].to_frame('Total')
least_vaccinated = most_vaccinated.sort_values( by = 'Total', ascending = True)[:5]
least_vaccinated