# COVID 19 state wise analytics
## Finding Indian states which are at high risk of encountering COVID 3rd Wave

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas_profiling import ProfileReport
import plotly.express as px
import plotly.graph_objects as go

### Description of Dataset
<b>About</b>

This dataset contains latest Covid-19 India state-wise data as on September 01, 2021. This dataset can be used to analyze covid condition in India.
This dataset is great for Exploratory Data Analysis

<b>Attribute Information</b>

State/UTs - Names of Indian States and Union Territories.<br>
Total Cases - Total number of confirmed cases<br>
Active - Total number of active cases<br>
Discharged - Total number of discharged cases<br>
Deaths - Total number of deaths<br>
Active Ratio (%) - Ratio of number of active cases to total cases<br>
Discharge Ratio (%) - Ratio of number of discharged cases to total cases<br>
Death Ratio (%) - Ratio of number of deaths to total cases<br>

<b>Source</b><br>
Link : https://www.mygov.in/covid-19

## Read dataset

In [None]:
covid_data = pd.read_csv("../input/latest-covid19-india-statewise-data/Latest Covid-19 India Status.csv",)
covid_data

#### Check dataset contain missing values

In [None]:
covid_data.isnull().sum()

#### Dataset does not contain missing values
#### Pandas Profiling

In [None]:
profile = ProfileReport(covid_data, title = "Pandas Profiling Report")
profile.to_notebook_iframe()

## Data Analysis

### Find top 5 state/UTs having highest total number of cases

In [None]:
covid_data.sort_values("Total Cases",inplace=True)
fig = px.bar(covid_data, y='Total Cases', x='State/UTs', text='Total Cases',title = "Total Covid cases in State/UTs")
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', width = 1300, height = 600)
fig.show()

#### From above barplot, we can conclude:
1. Top 5 States containing highest number of covid cases
    - Maharastra: 6464876
    - Kerala: 4057233
    - Karnataka: 2949445
    - Tamil Nadu: 2614872
    - Andhra Pradesh: 2014872
    
    
2. States containing lowest number of covid cases
    - Andaman and Nicobar: 7566
    - Lakshadweep: 10347
    - Dadra and Nagar Haveli and Daman and Diu: 10663
    - Ladakh: 20560
    - Sikkim: 29878

In [None]:
import plotly.graph_objects as go

dataset = covid_data.sort_values('Total Cases', ascending = False).head(5)
colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']

fig = go.Figure(data=[go.Pie(labels=dataset['State/UTs'],
                             values=dataset['Total Cases'])])
fig.update_traces(hoverinfo='label+percent', textinfo='label +value', textfont_size=15,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

### Find top 5 state/UTs having highest active cases

In [None]:
covid_data.sort_values("Active",inplace=True)
fig = px.bar(covid_data, y='Active', x='State/UTs', text='Active', title = "Active Cases in State/UTs")
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', width = 1300, height = 600)
fig.show()

#### From above barplot, we can conclude:
1. Top 5 States containing highest number of active covid cases
    - Kerala: 219441
    - Maharastra: 54763
    - Tamil Nadu: 18412
    - Andhra Pradesh: 16850
    - Mizoram: 14693
    
    
2. States containing lowest number of active covid cases
    - Dadra and Nagar Haveli and Daman and Diu: 4
    - Andaman and Nicobar: 6
    - Lakshadweep: 31
    - Chandigarh: 43
    - Ladakh: 69

In [None]:
dataset = covid_data.sort_values('Active', ascending = False).head(5)
colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']

fig = go.Figure(data=[go.Pie(labels=dataset['State/UTs'],
                             values=dataset['Active'])])
fig.update_traces(hoverinfo='label+percent', textinfo='label +value', textfont_size=15,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

### Find top 5 state/UTs having highest discharged number of cases

In [None]:
covid_data.sort_values("Discharged",inplace=True)
fig = px.bar(covid_data, y='Discharged', x='State/UTs', text='Discharged',title = "Discharged in State/UTs")
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', width = 1300, height = 600)
fig.show()

#### From above barplot, we can conclude:
1. Top 5 States containing highest number of Discharged covid cases
    - Maharastra: 6272800
    - Kerala: 3817004
    - Karnataka: 2893715
    - Tamil Nadu: 2563101
    - Andhra Pradesh: 1985566	
    
    
2. States containing lowest number of Discharged covid cases
    - Andaman and Nicobar: 7431
    - Lakshadweep: 10265
    - Dadra and Nagar Haveli and Daman and Diu: 10655
    - Ladakh: 20284
    - Sikkim: 28414

In [None]:
dataset = covid_data.sort_values('Discharged', ascending = False).head(5)
colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']

fig = go.Figure(data=[go.Pie(labels=dataset['State/UTs'],
                             values=dataset['Discharged'])])
fig.update_traces(hoverinfo='label+percent', textinfo='label +value', textfont_size=15,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

### Find top 5 state/UTs having highest of number of deaths

In [None]:
covid_data.sort_values("Deaths",inplace=True)
fig = px.bar(covid_data, y='Deaths', x='State/UTs', text='Deaths',title = "Deaths in State/UTs")
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', width = 1300, height = 600)
fig.show()

#### From above barplot, we can conclude:
1. Top 5 States containing highest number of Death covid cases
    - Maharastra: 137313 
    - Karnataka: 37318
    - Tamil Nadu: 34921
    - Delhi: 25082
    - Uttar Pradesh: 22823
    
    
2. States containing lowest number of Death covid cases
    - Dadra and Nagar Haveli and Daman and Diu: 4
    - Lakshadweep: 51
    - Andaman and Nicobar: 129 
    - Ladakh: 207
    - Mizoram: 217

In [None]:
dataset = covid_data.sort_values('Deaths', ascending = False).head(5)
colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']

fig = go.Figure(data=[go.Pie(labels=dataset['State/UTs'],
                             values=dataset['Deaths'])])
fig.update_traces(hoverinfo='label+percent', textinfo='label +value', textfont_size=15,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

### Find top 5 state/UTs having highest active ratio

In [None]:
covid_data.sort_values("Active Ratio (%)",inplace=True)
fig = px.line(covid_data, y='Active Ratio (%)', x='State/UTs', text='Active Ratio (%)',title = "Active Ratio (%) of State/UTs")
fig.update_traces( textposition='top center')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', width = 2000, height = 600)
fig.show()

#### From above barplot, we can conclude:
1. Top 5 States containing highest Active Ratio
    - Mizoram: 15.03
    - Kerala: 5.41
    - Sikkim: 3.66
    - Meghalaya: 3.07
    - Manipur: 2.98
    
    
2. States containing lowest number of Active Ratio
    - Uttar Pradesh: 0.01%
    - Madhya Pradesh	: 0.01%
    - Bihar: 0.01%
    - Rajasthan: 0.01%
    - Delhi: 0.01%

In [None]:
dataset = covid_data.sort_values('Active Ratio (%)', ascending = False).head(5)
colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']

fig = go.Figure(data=[go.Pie(labels=dataset['State/UTs'],
                             values=dataset['Active Ratio (%)'])])
fig.update_traces(hoverinfo='label+percent', textinfo='label +value', textfont_size=15,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

### First 5 state/UTs having Lowest discharge ratio

In [None]:
covid_data.sort_values("Discharge Ratio (%)",inplace=True)
fig = px.line(covid_data, y='Discharge Ratio (%)', x='State/UTs', text='Discharge Ratio (%)',title = "Discharge Ratio (%) of State/UTs")
fig.update_traces( textposition='top center')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', width = 2000, height = 600)
fig.show()

#### From above barplot, we can conclude:
1. Top 5 States containing highest number of Discharged Ratio
    - Dadra and Nagar Haveli and Daman and Diu	: 99.92%
    - Lakshadweep: 99.21%
    - Rajasthan: 99.05%
    - Gujarat: 98.76%
    - Chandigarh: 98.69%
    
    
2. States containing lowest number of Discharged Ratio
    - Mizoram: 84.60%
    - Kerala: 94.08%
    - Sikkim: 95.10%
    - Meghalaya: 95.20%
    - Nagaland: 95.23%

In [None]:
dataset = covid_data.sort_values('Discharge Ratio (%)', ascending = False).head(5)
colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']

fig = go.Figure(data=[go.Pie(labels=dataset['State/UTs'],
                             values=dataset['Discharge Ratio (%)'])])
fig.update_traces(hoverinfo='label+percent', textinfo='label +value', textfont_size=15,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

### Find top 5 state/UTs having highest Death ratio

In [None]:
covid_data.sort_values("Death Ratio (%)",inplace=True)
fig = px.line(covid_data, y='Death Ratio (%)', x='State/UTs', text='Death Ratio (%)',title = "Death Ratio (%) of State/UTs")
fig.update_traces( textposition='top center')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', width = 2000, height = 600)
fig.show()

#### From above barplot, we can conclude:
1. Top 5 States containing highest number of Death covid cases
    - Punjab: 2.74%
    - Uttarakhand: 2.15% 
    - Maharashtra: 2.12%
    - Nagaland: 2.06%
    - Goa: 1.84%
    
    
2. States containing lowest number of Death covid cases
    - Dadra and Nagar Haveli and Daman and Diu: 0.04%
    - Mizoram: 0.37%
    - Lakshadweep: 0.49%
    - Arunachal Pradesh: 0.49% 
    - Kerala: 0.51%
    

In [None]:
dataset = covid_data.sort_values('Death Ratio (%)', ascending = False).head(5)
colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']

fig = go.Figure(data=[go.Pie(labels=dataset['State/UTs'],
                             values=dataset['Death Ratio (%)'])])
fig.update_traces(hoverinfo='label+percent', textinfo='label +value', textfont_size=15,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

### Choropleth map to analyze feature

In [None]:
import plotly.graph_objects as go
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=covid_data['State/UTs'], y=covid_data['Active Ratio (%)'],
                    mode='lines',
                    name='Active Ratio (%)'))
fig.add_trace(go.Scatter(x=covid_data['State/UTs'], y=covid_data['Discharge Ratio (%)'],
                    mode='lines+markers',
                    name='Discharge Ratio (%)'))
fig.add_trace(go.Scatter(x=covid_data['State/UTs'], y=covid_data['Death Ratio (%)'],
                    mode='lines', name='Death Ratio (%)'))
fig.update_layout(title='Combined analysis of Active Ratio, Discharge Ratio and Death Ratio (%)',
                   xaxis_title='State/UTs',
                   yaxis_title='Ratio (%)')
fig.show()

import pandas as pd
import plotly.express as px

fig = px.choropleth(
    covid_data,
    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
    featureidkey='properties.ST_NM',
    locations='State/UTs',
    color='Total Cases',
    color_continuous_scale='Blues',
    title = 'Total Cases '
)

fig.update_geos(fitbounds="locations", visible=False)

fig.show()

In [None]:
import pandas as pd
import plotly.express as px

fig = px.choropleth(
    covid_data,
    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
    featureidkey='properties.ST_NM',
    locations='State/UTs',
    color='Active',
    color_continuous_scale='Blues',
    title = 'Active Cases '
)

fig.update_geos(fitbounds="locations", visible=False)

fig.show()

In [None]:
import pandas as pd
import plotly.express as px

fig = px.choropleth(
    covid_data,
    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
    featureidkey='properties.ST_NM',
    locations='State/UTs',
    color='Active Ratio (%)',
    color_continuous_scale='Blues',
    title = 'Active Ratio (%)'
)

fig.update_geos(fitbounds="locations", visible=False)

fig.show()

In [None]:
import pandas as pd
import plotly.express as px

fig = px.choropleth(
    covid_data,
    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
    featureidkey='properties.ST_NM',
    locations='State/UTs',
    color='Discharge Ratio (%)',
    color_continuous_scale='Blues',
    title = 'Discharge Ratio (%)'
)

fig.update_geos(fitbounds="locations", visible=False)

fig.show()

In [None]:
import pandas as pd
import plotly.express as px

fig = px.choropleth(
    covid_data,
    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
    featureidkey='properties.ST_NM',
    locations='State/UTs',
    color='Death Ratio (%)',
    color_continuous_scale='Blues',
    title = 'Death Ratio (%)'
)

fig.update_geos(fitbounds="locations", visible=False)

fig.show()

# Conclusion

<table>
<tr><th>Top 5 states</th><th>Total Cases</th><th>Active Cases</th><th>Discharged Cases</th><th>Death</th><th>Active Ratio</th><th>Discharge Ratio<br>(Lowest 5 States)</th><th>Death Ratio</th>
</tr>
<tr><th>1</th><td>Maharashtra</td><td>Kerala</td><td>Maharashtra</td><td>Maharashtra</td><td>Mizoram</td><td>Mizoram</td><td>Punjab</td>
</tr>
<tr><th>2</th><td>Kerala</td><td>Maharashtra</td><td>Kerala</td><td>Karnataka</td><td>Kerala</td><td>Kerala</td><td>Uttarakhand</td>
</tr>
<tr><th>3</th><td>Karnataka</td><td>Tamil Nadu</td><td>Karnataka</td><td>Tamil Nadu</td><td>Sikkim</td><td>Sikkim</td><td>Maharashtra</td>
</tr>
<tr><th>4</th><td>Tamil Nadu</td><td>Andhra Pradesh</td><td>Tamil Nadu</td><td>Delhi</td><td>Meghalaya</td><td>Gujrat</td><td>Nagaland</td>
</tr>
<tr><th>5</th><td>Andhra Pradesh</td><td>Mizoram</td><td>Andhra Pradesh</td><td>Uttar Pradesh</td><td>Manipur</td><td>Chandigarh</td><td>Goa</td>
</tr>
</table>

### From our visualizations we can infer that Maharashtra, Kerala, Karnataka, Tamil Nadu and Andhra Pradesh are at high risks of having a COVID 3 rd wave. Therefore it is vital to concentrate on such locations in order to remove the risk. 