In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime

## Exploratory Data Analysis of Covid_19_India Dataset

In [None]:
covid_data = pd.read_csv("../input/covid19-in-india/covid_19_india.csv")
covid_data['Date'] = covid_data['Date'].astype('datetime64[ns]')
covid_data.head()

In [None]:
covid_data.shape

In [None]:
covid_data.info()

In [None]:
covid_data.describe()

## Statewise Analysis

In [None]:
state_wise = covid_data.groupby('State/UnionTerritory')['Confirmed','Cured','Deaths'].sum().reset_index()
state_wise["Death_percentage"] = ((state_wise["Deaths"] / state_wise["Confirmed"]) * 100)
state_wise.style.background_gradient(cmap='magma')

In [None]:
px.bar(x=state_wise.nlargest(10,"Confirmed")["State/UnionTerritory"],
       y = state_wise.nlargest(10,"Confirmed")["Confirmed"],
       color_discrete_sequence=px.colors.diverging.Picnic,
       title="Top 10 states with highest number of Confirmed cases")

In [None]:
px.bar(x=state_wise.nlargest(10,"Cured")["State/UnionTerritory"],
       y = state_wise.nlargest(10,"Cured")["Cured"],
       color_discrete_sequence=px.colors.sequential.Sunset,
       title="Top 10 states with highest number of Cured cases")

In [None]:
px.bar(x=state_wise.nlargest(10,"Deaths")["State/UnionTerritory"],
       y = state_wise.nlargest(10,"Deaths")["Deaths"],
       color_discrete_sequence=px.colors.diverging.curl,
       title="Top 10 states with highest number of Deaths")

In [None]:
px.bar(x=state_wise.nlargest(10,"Death_percentage")["State/UnionTerritory"],
       y = state_wise.nlargest(10,"Death_percentage")["Death_percentage"],
       color_discrete_sequence=px.colors.diverging.Portland,
       title="Top 10 states with highest of Death percentage")

## Monthwise Analysis

In [None]:
month_wise = covid_data.groupby(pd.Grouper(key='Date',freq='M')).sum()

month_wise = month_wise.drop(['Sno'], axis = 1)
month_wise['Date'] = month_wise.index

first_column = month_wise.pop('Date')
month_wise.insert(0, 'Date', first_column)

index = [x for x in range(len(month_wise))]
month_wise['index'] = index
month_wise = month_wise.set_index('index')

second_column = month_wise.pop('Confirmed')
month_wise.insert(1, 'Confirmed', second_column)
month_wise["Death_percentage"] = ((month_wise["Deaths"] / month_wise["Confirmed"]) * 100)
month_wise.style.background_gradient(cmap='twilight_shifted')

In [None]:
fig = px.bar(month_wise, x='Date', y='Confirmed',
             hover_data=['Cured', 'Deaths'], color='Date',
             labels={'Date':'Date(monthwise)'},
             title="Monthwise Increase in Confirmed cases")
fig.show()

In [None]:
fig = px.bar(month_wise, x='Date', y='Cured',
             hover_data=['Confirmed','Deaths'], color='Date',
             labels={'Date':'Date(monthwise)'},
             title="Monthwise Increase in Cured cases")
fig.show()

In [None]:
fig = px.bar(month_wise, x='Date', y='Deaths',
             hover_data=['Confirmed','Cured'], color='Date',
             labels={'Date':'Date(monthwise)'},
             title="Monthwise Increase in Deaths cases")
fig.show()

In [None]:
fig = px.bar(month_wise , 
             x='Date', 
             y='Death_percentage' ,
             hover_data=['Confirmed','Deaths'],color='Date',
             labels={'Death_percentage':'Death percentage'},
             title="Top 10 states with highest of Death percentage")
fig.show()

## Exploratory Data Analysis of StatewiseTestingDetails Dataset

In [None]:
covid_testing = pd.read_csv("../input/covid19-in-india/StatewiseTestingDetails.csv")
covid_testing['Date'] = covid_testing['Date'].astype('datetime64[ns]')
covid_testing.head()

In [None]:
covid_testing.shape

In [None]:
covid_testing.info()

In [None]:
covid_testing['Negative'] = covid_testing['TotalSamples'] - covid_testing['Positive']
covid_testing = covid_testing.dropna()
covid_testing.info()

## Statewise Analysis

In [None]:
covid_testing_state = covid_testing.groupby('State')['TotalSamples','Negative','Positive'].max().reset_index()
covid_testing_state["Positive_percentage"] = ((covid_testing["Positive"] / covid_testing["TotalSamples"]) * 100)
covid_testing_state.style.background_gradient(cmap='gist_earth_r')

In [None]:
px.bar(x=covid_testing_state.nlargest(10,"TotalSamples")["State"],
       y = covid_testing_state.nlargest(10,"TotalSamples")["TotalSamples"],
       labels={'y':'Total Samples','x':'State'},
       color_discrete_sequence=px.colors.sequential.haline,
       title="Top 10 states with highest number of Total Samples")

In [None]:
px.bar(x=covid_testing_state.nlargest(10,"Negative")["State"],
       y = covid_testing_state.nlargest(10,"Negative")["Negative"],
       labels={'y':'Total Negative cases','x':'State'},
       color_discrete_sequence=px.colors.sequential.turbid,
       title="Top 10 states with highest number of Negative cases")

In [None]:
px.bar(x=covid_testing_state.nlargest(10,"Positive")["State"],
       y = covid_testing_state.nlargest(10,"Positive")["Positive"],
       labels={'y':'Total Positive Cases','x':'State'},
       color_discrete_sequence=px.colors.sequential.solar,
       title="Top 10 states with highest number of Positive cases")

In [None]:
px.bar(x=covid_testing_state.nlargest(10,"Positive_percentage")["State"],
       y = covid_testing_state.nlargest(10,"Positive_percentage")["Positive_percentage"],
       labels={'y':'Positive Percentage','x':'State'},
       color_discrete_sequence=px.colors.sequential.Aggrnyl,
       title="Top 10 states with highest Positive percentage",
       height = 420)

### There is another dataset named "covid_vaccine_statewise" attached to this same project. I will upload another Notebook analysing that data and will attach a link to that Notebook as a reference in this Project.