**COVID - 19 ANALYSIS
   ANALYSED BY - SUMIT SINGH BISHT
                 sumitbisht867@gmail.com**

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mtd
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

1. **READING DATA**

In [None]:
# Loading and Reading of Provided Data
df = pd.read_csv("../input/covid19-in-india/covid_19_india.csv")
state_testing = pd.read_csv("../input/covid19-in-india/StatewiseTestingDetails.csv")
df_vac = pd.read_csv("../input/covid19-in-india/covid_vaccine_statewise.csv")

In [None]:
# Display of Dataframe
df.head(10)

In [None]:
df.info()

In [None]:
# Cheching for any null values
df.isnull().sum()

In [None]:
# Dropping the columns that are not important in analysis
df.drop(["Sno", "Time", "ConfirmedIndianNational", "ConfirmedForeignNational"], inplace = True, axis = 1)

In [None]:
# Changing dtype of Date
df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d')

In [None]:
# Finding the Active Cases 
df['Active_Cases']=df['Confirmed']-(df['Cured']+df['Deaths'])
df.tail()

**STATE-WISE ANALYSIS**

In [None]:
statewise = pd.pivot_table(df, values = ["Confirmed", "Deaths","Cured"], index = "State/UnionTerritory", aggfunc = max)
statewise["Recovery Rate"] = statewise["Cured"]*100 / statewise["Confirmed"]
statewise["Mortality Rate"] = statewise["Deaths"]*100 / statewise["Confirmed"]
statewise = statewise.sort_values(by = "Confirmed", ascending = False)
statewise.style.background_gradient(cmap = "YlOrRd")

In [None]:
# Display Active Cases
top_10_active_states = df.groupby(by = 'State/UnionTerritory').max()[['Active_Cases', 'Date']].sort_values(by = ['Active_Cases'],ascending=False).reset_index()
fig = plt.figure(figsize=(15,8))
plt.title("Top 10 States with highest number of Active Cases in India", size = 20)
ax = sns.barplot(data=top_10_active_states.iloc[:10], y = "Active_Cases", x = "State/UnionTerritory", linewidth = 2, edgecolor = 'black')
plt.xlabel("STATES")
plt.ylabel("NO. OF ACTIVE CASES")
plt.show()

In [None]:
# Display states with lowest no. of active cases 
last_10_active_states = df.groupby(by = 'State/UnionTerritory').max()[['Active_Cases', 'Date']].sort_values(by = ['Active_Cases']).reset_index()
fig = plt.figure(figsize=(20,8))
plt.title("Top 10 States with lowest number of Active Cases in India", size = 20)
ax = sns.barplot(data=last_10_active_states.iloc[:10], y = "Active_Cases", x = "State/UnionTerritory", linewidth = 2, edgecolor = 'black')
plt.xlabel("STATES")
plt.ylabel("NO. OF ACTIVE CASES")
plt.show()

In [None]:
# Growth of Active Cases
fig = plt.figure(figsize=(15,8))
ax = sns.lineplot(data=df[df['State/UnionTerritory'].isin(['Maharashtra', 'Karnataka', 'Kerala', 'Tamil Nadu', 'Uttar Pradesh'])], x='Date', y='Active_Cases', hue='State/UnionTerritory')
ax.set_title(" Rate of Active cases in Top 5 Affected States in India", size=20)

In [None]:
india_cases = df[df['Date'] == df['Date'].max()].copy().fillna(0)
india_cases.index = india_cases["State/UnionTerritory"]
india_cases = india_cases.drop(['State/UnionTerritory','Date'], axis=1)

In [None]:
india_cases.head(10)

In [None]:
Trend = df.groupby(['Date'])['Confirmed', 'Deaths','Cured','Active_Cases'].sum().reset_index()

In [None]:
Trend.head()

In [None]:
fig = go.Figure(go.Bar(x= Trend.Date, y= Trend.Cured, name='Recovered'))
fig.add_trace(go.Bar(x=Trend.Date, y= Trend.Deaths, name='Deaths'))
fig.add_trace(go.Bar(x=Trend.Date, y= Trend.Confirmed, name='Confirmed'))
fig.add_trace(go.Bar(x=Trend.Date, y= Trend.Active_Cases, name='Active Cases'))

fig.update_layout(barmode='stack',legend_orientation="h",legend=dict(x= 0.3, y=1.1),
                 paper_bgcolor='white',
                 plot_bgcolor = "white",)
fig.show()

In [None]:
# Display  Death Cases 
top_10_death_states = india_cases.sort_values('Deaths',ascending = False)[:10]
fig = plt.figure(figsize = (15,8))
plt.title("Top 10 States with highest number of death", size=20)
ax = sns.barplot(data = top_10_death_states.iloc[:10], y = top_10_death_states.Deaths, x = top_10_death_states.index, linewidth=2, edgecolor = 'black')
plt.xlabel("STATES")
plt.ylabel("NO. OF DEATH CASES")
plt.show()

In [None]:
# Display Cured Cases
top_10_cured_cases = india_cases.sort_values('Cured', ascending = False)[:10]
fig = plt.figure(figsize=(15,8))
plt.title("Top 10 States with highest number of cured cases", size = 20)
ax = sns.barplot(data = top_10_cured_cases.iloc[:10], y = top_10_cured_cases.Cured, x = top_10_cured_cases.index, linewidth = 2 , edgecolor = 'black')
plt.xlabel("STATES")
plt.ylabel("NO. OF CURED CASES")
plt.show()

**VACCINATION ANALYSIS**

In [None]:
#Renaming 'Updated On' column
df_vac.rename(columns = {'Updated On':'Date'}, inplace = True)
df_vac.head(10)

In [None]:
df_vac.info()

In [None]:
df_vac.isnull().sum()

In [None]:
# Removing columns with most no. of null values
df_vac = df_vac.drop(['Total Sputnik V Administered','AEFI', '18-45 years (Age)','45-60 years (Age)','60+ years (Age)'], axis = 1)

In [None]:
vac=df_vac[df_vac.State!='India']
vac.rename(columns = {"Total Individuals Vaccinated": "Total"}, inplace = True)

In [None]:
vac.head()

In [None]:
# States with most no. of vaccinated individuals
max_vac=vac.groupby('State')['Total'].sum().to_frame('Total')
max_vac=max_vac.sort_values('Total',ascending=False)[:5]
max_vac


In [None]:
fig = plt.figure(figsize = (10,8))
plt.title("Top 5 Vaccinated States in India", size = 20)
x = sns.barplot(data = max_vac.iloc[:10],y = max_vac.Total, x = max_vac.index, linewidth=2, edgecolor='black')
plt.xlabel("STATES")
plt.ylabel("VACCINATION NUMBERS")
plt.show()

In [None]:
# States with least no. of vaccinated individuals
min_vac=vac.groupby('State')['Total'].sum().to_frame('Total')
min_vac=min_vac.sort_values('Total')[:5]
min_vac


In [None]:
fig = plt.figure(figsize = (12,8))
plt.title("Bottom 5 Vaccinated States in India", size = 20)
x = sns.barplot(data = min_vac.iloc[:10],y = min_vac.Total, x = min_vac.index, linewidth=2, edgecolor='black')
plt.xlabel("STATES")
plt.ylabel("VACCINATION NUMBERS")
plt.show()

In [None]:
# Percentage of Gender 
male = vac["Male(Individuals Vaccinated)"].sum() 
female = vac["Female(Individuals Vaccinated)"].sum()  
trans = vac["Transgender(Individuals Vaccinated)"].sum()
px.pie(names=["Male Vaccinated","Female Vaccinated","Trans Gender"],values=[male,female,trans],title="Male and Female Vaccinated ratio for Covid19")


**THANK YOU**