# Table of Contents

1. [Load and Check Data](#1)
1. [Caution!](#2)
1. [Correlation Matrix](#3)
1. [Pair Plot](#4)
1. [Distribution Plot](#5)
1. [Bar Plot](#6)
    * [Total Cases - State/UTs](#7)
    * [Active Cases - State/UTs](#8)
1. [Point Plot](#9)
    * [Discharge Ratio - Death Ratio - Active Ratio - States/UTS](#10)
    * [Active - Total Cases - State/UTs](#11)
1. [Violin Plot](#12)
    * [Total Cases - Deaths](#13)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import missingno

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<a id='1'></a>
# Load and Check Data

In [None]:
data = pd.read_csv('/kaggle/input/latest-covid19-india-statewise-data/Latest Covid-19 India Status.csv')

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
data.info()

<a id='2'></a>
# Caution!

* **Active Ratio (%):** Active/Total Cases
* **Discharge Ratio (%):** Discharged/Total Cases
* **Death Ratio (%):** Deaths/Total Cases

In [None]:
missingno.bar(data, figsize=(10,5), fontsize=12);

In [None]:
data.columns

In [None]:
feature_list = ['State/UTs', 'Total Cases', 'Active', 'Discharged', 'Deaths','Active Ratio (%)', 'Discharge Ratio (%)', 'Death Ratio (%)']
categorical_list = ['State/UTs']
numerical_list = ['Total Cases', 'Active', 'Discharged', 'Deaths','Active Ratio (%)', 'Discharge Ratio (%)', 'Death Ratio (%)']

<a id='3'></a>
# Correlation Matrix

In [None]:
sns.heatmap(data.corr(),annot=True,linewidths=.5,fmt='.2f')
plt.show()

<a id='4'></a>
## Pair Plot

In [None]:
sns.pairplot(data)

<a id='5'></a>
## Distribution Plot

In [None]:
def draw_dist(df,feature):
    """
    distribution/density function with histogram
    """
    sns.displot(df[feature],height=4,kind='hist')
    plt.show()

In [None]:
for i in numerical_list:
    draw_dist(data,i)

<a id='6'></a>
## Bar Plot

<a id='7'></a>
### Total Cases - State/UTs

In [None]:
plt.figure(figsize=(5,10))
data = data.sort_values(by=['Total Cases'],ascending=False)
sns.barplot(x=data['Total Cases'],y=data['State/UTs'])
plt.xlabel('Total Cases')
plt.ylabel('State/UTs')
plt.show()

<a id='8'></a>
### Active Cases - State/UTs

In [None]:
plt.figure(figsize=(5,10))
data = data.sort_values(by=['Active'],ascending=False)
sns.barplot(x=data['Active'],y=data['State/UTs'])
plt.xticks(rotation=90)
plt.xlabel('Active')
plt.ylabel('State/UTs')
plt.show()

<a id='9'></a>
## Point Plot

<a id='10'></a>
### Discharge Ratio - Death Ratio - Active Ratio - States/UTS

In [None]:
plt.figure(figsize=(20,5))
plt.xticks(rotation=90)
sns.pointplot(x='State/UTs',y='Discharge Ratio (%)',data=data,color='darkblue')
sns.pointplot(x='State/UTs',y='Death Ratio (%)',data=data,color='darkgreen')
sns.pointplot(x='State/UTs',y='Active Ratio (%)',data=data,color='darkred')
plt.text(31,54,'% Discharge Ratio',color='darkblue',fontsize=12)
plt.text(31,48,'% Death Ratio',color='darkred',fontsize=12)
plt.text(31,42,'% Active Ratio',color='darkgreen',fontsize=12)
plt.xlabel('State',fontsize=14)
plt.ylabel('Value',fontsize=14)
plt.title('Discharge Ratio - Death Ratio - Active Ratio',fontsize=14)
plt.grid()

<a id='11'></a>
### Active - Total Cases - State/UTs

In [None]:
plt.figure(figsize=(20,5))
plt.xticks(rotation=90)
sns.pointplot(x='State/UTs',y='Active',data=data,color='darkblue')
sns.pointplot(x='State/UTs',y='Total Cases',data=data,color='darkgreen')

plt.text(32,5200000,'Active',color='darkblue',fontsize=12)
plt.text(32,4600000,'Total Cases',color='darkgreen',fontsize=12)

plt.xlabel('State',fontsize=14)
plt.ylabel('Value',fontsize=14)
plt.title('Active - Total Cases - State/UTs',fontsize=14)
plt.grid()

<a id='12'></a>
## Violin Plot

<a id='13'></a>
### Total Cases - Discharged

In [None]:
data.columns

In [None]:
df_1=data.drop(['State/UTs', 'Active', 'Deaths', 'Active Ratio (%)', 'Discharge Ratio (%)', 'Death Ratio (%)'],axis=1)

In [None]:
plt.figure(figsize=(10,5))
#palette = sns.cubehelix_palette(3,rot=.4,dark=.4)
sns.violinplot(data=df_1,palette='muted',inner='points')
plt.show()