# Import libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
from scipy.stats import boxcox, norm
import plotly.express as px

warnings.filterwarnings('ignore')

# Load dataset

In [None]:
data = pd.read_csv('../input/latest-covid19-india-statewise-data/Latest Covid-19 India Status.csv')

In [None]:
data.head()

In [None]:
print(f"Shape of dataset: {data.shape}")

In [None]:
print(f"Column of dataset: {data.columns}")

In [None]:
print("Info. of dataset\n\n")
print(data.info())

In [None]:
print("Central tendency of dataset\n\n")
data.describe().T

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
plt.hist(data['Total Cases'],bins=20)
plt.xlabel('Total Cases')

plt.title("Frequency plot of Total Cases")
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
sns.distplot(data['Total Cases'],kde=True,fit=norm,ax=axes,bins=20)
plt.title('Distplot of Total Cases')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
plt.hist(data['Active'],bins=100)
plt.xlabel('Active')
plt.title('Frequency plot of Active cases')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
sns.distplot(data['Active'],kde=True,fit=norm,ax=axes,bins=100)
plt.title('Distplot of Active Cases')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
plt.hist(data['Discharged'],bins=20)
plt.xlabel('Discharged')

plt.title("Frequency plot of Discharged Cases")
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
sns.distplot(data['Discharged'],kde=True,fit=norm,ax=axes,bins=20)
plt.title('Distplot of Discharged Cases')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
plt.hist(data['Deaths'],bins=20)
plt.xlabel('Deaths')

plt.title("Frequency plot of Death Cases")
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
sns.distplot(data['Deaths'],kde=True,fit=norm,ax=axes,bins=20)
plt.title('Distplot of Death Cases')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
plt.hist(data['Active Ratio (%)'],bins=20)
plt.xlabel('Active Ratio (%)')

plt.title("Frequency plot of Active Ratio (%)")
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
sns.distplot(data['Active Ratio (%)'],kde=True,fit=norm,ax=axes,bins=20)
plt.title('Distplot of Active Ratio (%)')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
plt.hist(data['Discharge Ratio (%)'],bins=20)
plt.xlabel('Discharge Ratio (%)')

plt.title("Frequency plot of Discharge Ratio (%)")
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
sns.distplot(data['Discharge Ratio (%)'],kde=True,fit=norm,ax=axes,bins=20)
plt.title('Distplot of Discharge Ratio (%)')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
plt.hist(data['Death Ratio (%)'],bins=20)
plt.xlabel('Death Ratio (%)')

plt.title("Frequency plot of Death Ratio (%)")
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
sns.distplot(data['Death Ratio (%)'],kde=True,fit=norm,ax=axes,bins=20)
plt.title('Distplot of Death Ratio (%)')
plt.show()

**Insights**
* Majority of the states has less than 1e6 total cases.
* Majority of the states has less than 5e4 active cases.
* Majority of the states has less than 1e6 discharged cases.
* Majority of the states has less than 2e4 active cases.

In [None]:
print(f"Total cases over India: {data['Total Cases'].sum()} cases")
print(f"Total active cases over India: {data['Active'].sum()} cases")
print(f"Total discharged cases over India: {data['Discharged'].sum()} cases")
print(f"Total death cases over India: {data['Deaths'].sum()} cases\n\n")

print(f"Most cases over India: {data['Total Cases'].max()} cases, State: {data[data['Total Cases']==data['Total Cases'].max()]['State/UTs'].values[0]}")
print(f"Least cases over India: {data['Total Cases'].min()} cases, State: {data[data['Total Cases']==data['Total Cases'].min()]['State/UTs'].values[0]}\n\n")

print(f"Most active cases over India: {data['Active'].max()} cases, State: {data[data['Active']==data['Active'].max()]['State/UTs'].values[0]}")
print(f"Least active cases over India: {data['Active'].min()} cases, State: {data[data['Active']==data['Active'].min()]['State/UTs'].values[0]}\n\n")

print(f"Most discharged cases over India: {data['Discharged'].max()} cases, State: {data[data['Discharged']==data['Discharged'].max()]['State/UTs'].values[0]}")
print(f"Least discharged cases over India: {data['Discharged'].min()} cases, State: {data[data['Discharged']==data['Discharged'].min()]['State/UTs'].values[0]}\n\n")

print(f"Most deaths cases over India: {data['Deaths'].max()} cases, State: {data[data['Deaths']==data['Deaths'].max()]['State/UTs'].values[0]}")
print(f"Least deaths cases over India: {data['Deaths'].min()} cases, State: {data[data['Deaths']==data['Deaths'].min()]['State/UTs'].values[0]}\n\n")

print(f"Most active ratio (%) cases over India: {data['Active Ratio (%)'].max()} %, State: {data[data['Active Ratio (%)']==data['Active Ratio (%)'].max()]['State/UTs'].values[0]}")
print(f"Least active ratio (%) cases over India: {data['Active Ratio (%)'].min()} %, State: {data[data['Active Ratio (%)']==data['Active Ratio (%)'].min()]['State/UTs'].values[0]}\n\n")

print(f"Most discharge ratio (%) cases over India: {data['Discharge Ratio (%)'].max()} %, State: {data[data['Discharge Ratio (%)']==data['Discharge Ratio (%)'].max()]['State/UTs'].values[0]}")
print(f"Least discharge ratio (%) cases over India: {data['Discharge Ratio (%)'].min()} %, State: {data[data['Discharge Ratio (%)']==data['Discharge Ratio (%)'].min()]['State/UTs'].values[0]}\n\n")

print(f"Most death ratio (%) cases over India: {data['Death Ratio (%)'].max()} %, State: {data[data['Death Ratio (%)']==data['Death Ratio (%)'].max()]['State/UTs'].values[0]}")
print(f"Least death ratio (%) cases over India: {data['Death Ratio (%)'].min()} %, State: {data[data['Death Ratio (%)']==data['Death Ratio (%)'].min()]['State/UTs'].values[0]}\n\n")

In [None]:
fig, axes = plt.subplots(figsize=(15,5))
corr = data.corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
plot = sns.heatmap(data.corr(), annot=True, mask=mask)
fig.suptitle('Correlation heat map')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,10))
plot = sns.barplot(y='State/UTs',x='Total Cases',data=data, ax=axes)
fig.suptitle('Bar plot of Total Cases v/s State/UTs')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,10))
plot = sns.barplot(y='State/UTs',x='Active',data=data, ax=axes)
fig.suptitle('Bar plot of Active Cases v/s State/UTs')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,10))
plot = sns.barplot(y='State/UTs',x='Discharged',data=data, ax=axes)
fig.suptitle('Bar plot of Discharged Cases v/s State/UTs')
plt.show()

In [None]:
fig, axes = plt.subplots(figsize=(15,10))
plot = sns.barplot(y='State/UTs',x='Deaths',data=data, ax=axes)
fig.suptitle('Bar plot of Deaths v/s State/UTs')
plt.show()

In [None]:
px.bar(data, x="State/UTs", y="Total Cases", color="Death Ratio (%)", title="Total Cases per State : ")

In [None]:
px.bar(data, x="State/UTs", y="Total Cases", color="Active Ratio (%)", title="Total Cases per State : ")

In [None]:
px.bar(data, x="State/UTs", y="Total Cases", color="Discharge Ratio (%)", title="Total Cases per State : ")

**2D Histogram**

In [None]:
px.density_heatmap(data, x="Deaths", y="Total Cases",nbinsx=20, nbinsy=20)

In [None]:
px.density_heatmap(data, x="Active", y="Total Cases",nbinsx=20, nbinsy=20)

In [None]:
px.density_heatmap(data, x="Discharged", y="Total Cases",nbinsx=20, nbinsy=20)

In [None]:
px.scatter(data, x="Active", y="Total Cases", color=data['State/UTs'], title="Total cases v/s Active")

In [None]:
px.scatter(data, x="Deaths", y="Total Cases", color=data['State/UTs'], title="Total cases v/s Deaths")

In [None]:
px.scatter(data, x="Discharged", y="Total Cases", color=data['State/UTs'], title="Total cases v/s Discharged")

In [None]:
px.scatter(data, x="Death Ratio (%)", y="Total Cases", color=data['State/UTs'], title="Total cases v/s death ratio(%)")

In [None]:
px.scatter(data, x="Active Ratio (%)", y="Total Cases", color=data['State/UTs'], title="Total cases v/s Active ratio(%)")

In [None]:
px.scatter(data, x="Discharge Ratio (%)", y="Total Cases", color=data['State/UTs'], title="Total cases v/s Discharge ratio(%)")

In [None]:
px.scatter(data, x="Active", y="Total Cases", color=data['State/UTs'], log_y=True,log_x=True,size="Deaths", size_max=50, title="Total cases v/s Active")

In [None]:
px.scatter(data, x="Discharged", y="Total Cases", color=data['State/UTs'], size="Deaths", size_max=50, title="Total cases v/s Discharged")

In [None]:
fig = px.choropleth(data,
                    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
                    featureidkey='properties.ST_NM', locations='State/UTs',color='Total Cases',color_continuous_scale="Blues", title="Total cases density plot")
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

In [None]:
fig = px.choropleth(data,
                    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
                    featureidkey='properties.ST_NM', locations='State/UTs',color='Active',color_continuous_scale="pinkyl", title="Active cases density plot")
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

In [None]:
fig = px.choropleth(data,
                    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
                    featureidkey='properties.ST_NM', locations='State/UTs',color='Discharged',color_continuous_scale="Greens", title="Discharged cases density plot")
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

In [None]:
fig = px.choropleth(data,
                    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
                    featureidkey='properties.ST_NM', locations='State/UTs',color='Deaths',color_continuous_scale="Reds", title="Death cases density plot")
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

In [None]:
px.pie(data,values='Total Cases', names=data['State/UTs'], title='Covid cases (%) in states of India')

In [None]:
px.pie(data,values='Active', names=data['State/UTs'], width=1400, height=800, title='Active cases (%) in states of India')

In [None]:
px.pie(data,values='Discharged', names=data['State/UTs'], title='Discharged cases (%) in states of India')

In [None]:
px.pie(data,values='Deaths', names=data['State/UTs'], width=1400, height=600,title='Death cases (%) in states of India')