In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import seaborn as sn
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import iplot
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
pd.set_option('display.max_columns',None)
df_india = pd.read_csv('/kaggle/input/covid19-in-india/covid_19_india.csv')
df_individual = pd.read_csv('/kaggle/input/covid19-in-india/IndividualDetails.csv')
df_pop_ind = pd.read_csv('/kaggle/input/covid19-in-india/population_india_census2011.csv')
df_daily_test = pd.read_csv('/kaggle/input/covid19-in-india/ICMRTestingLabs.csv')
df_hospitals = pd.read_csv('/kaggle/input/covid19-in-india/HospitalBedsIndia.csv')
df_india.head()

In [None]:
df_india['Month'] = pd.to_datetime(df_india['Date']).dt.month
df_india['Day'] = pd.to_datetime(df_india['Date']).dt.day
df_india.head()

In [None]:
month_map = {1:'Jan',2:'Feb',3:'Mar',4:'Apr',5:'May',6:'June',7:'July',8:'Aug'}
df_india['Month'] = df_india['Month'].map(month_map)
df_india.head()

In [None]:
df_india[df_india['Month'] == 'Aug'].style.background_gradient('plasma')

# ARRANGING THE DATAFRAMES MONTHWISE

In [None]:
df_jan = df_india[df_india['Month'] == 'Jan']
df_feb = df_india[df_india['Month'] == 'Feb']
df_mar = df_india[df_india['Month'] == 'Mar']
df_apr = df_india[df_india['Month'] == 'Apr']
df_may = df_india[df_india['Month'] == 'May']
df_june = df_india[df_india['Month'] == 'June']
df_july = df_india[df_india['Month'] == 'July']
df_aug = df_india[df_india['Month'] == 'Aug']

In [None]:
def confirmsort(df,i):
    df = df.sort_values(i,ascending = False)
    return df

In [None]:
df_jan = confirmsort(df_jan,'Confirmed')
df_feb = confirmsort(df_feb,'Confirmed')
df_mar = confirmsort(df_mar,'Confirmed')
df_apr = confirmsort(df_apr,'Confirmed')
df_may = confirmsort(df_may,'Confirmed')
df_june = confirmsort(df_june,'Confirmed')
df_july = confirmsort(df_july,'Confirmed')
df_aug = confirmsort(df_aug,'Confirmed')

In [None]:
month_list = [df_jan,df_feb,df_mar,df_apr,df_june,df_july,df_aug]
for i in month_list:
    print(f' shape of the dataframe is <{i.shape}>')
    print("="*75)

# COUNTPLOT FOR THE NUMBER OF CASES PER DAY

In [None]:
def countplots(df):
    sn.set_style(style='darkgrid')
    sn.set_palette(palette='plasma')
    plt.figure(figsize=(25,12.5))
    sn.countplot(data=df,x = 'Day')
    plt.title('Cases on  Date')
    plt.show()


In [None]:
months = ['Jan','Feb','Mar','Apr','May','June','July','Aug']
month_list = [df_jan,df_feb,df_mar,df_apr,df_june,df_july,df_aug]
for i,j in zip(month_list,months):
    print(f' stats per day for the <{j}> month is shown below ↓')
    countplots(i)
    print("="*75)

# AREA PLOT FOR THE NUMBER OF CASES MONTH WISE

In [None]:
def melt(df):
    temp = df.groupby('Date')['Cured','Deaths','Confirmed'].sum().reset_index()
    temp = temp.melt(id_vars = 'Date',value_vars = ['Cured','Deaths','Confirmed'],
                   var_name = 'Cases',value_name = 'Count')
    return temp

In [None]:
def areaplot(df):
    fig = px.area(data_frame=melt(df),x='Date',y = 'Count',color='Cases',height=600,title = 'Cases over time',
                 color_discrete_map={'Cured':'green','Deaths':'Red','Confirmed':'purple'})
    fig.update_layout(xaxis_rangeslider_visible = True)
    fig.show()

In [None]:
for i,j in zip(month_list,months):
    print(f' cases over time for the <{j}> month is shown below ↓')
    areaplot(i)
    print("="*75)

# TREE MAP ANALYSIS FOR CONFIRMED,CURED,DEATH CASES STATEWISE

In [None]:
def treemap(df,i,j):
    latest = df[df['Date'] == max(df['Date'])]
    
    fig = px.treemap(latest.sort_values(i,ascending = False).reset_index(drop = True),
                    path = ['State/UnionTerritory'],values = i,height = 700,
                    title = j,color_discrete_sequence=px.colors.qualitative.Dark24_r)
    fig.data[0].textinfo = 'label+text+value'
    fig.show()


In [None]:
def showmaps(i,t):
    months_edit = months[2:]
    month_list = [df_mar,df_apr,df_june,df_july,df_aug]
    for df,j in zip(month_list,months_edit):
        print(f' stats for the <{j}> month is shown below ↓')
        treemap(df,i,t)
        print("="*100)

In [None]:
showmaps('Confirmed','No.of Confirmed Cases')

# NUMBER OF CURED CASES STATEWISE FROM THE PANDEMIC

In [None]:
showmaps('Cured','No.of Cured Cases')

# TREE MAP ANALYSIS FOR NUMBER OF DEATHS STATEWISE

In [None]:
showmaps('Deaths','No.of Deaths StateWise')

In [None]:
df_individual.head(3)

In [None]:
def individual_count(df,i,t):
    sn.set_style(style='darkgrid')
    sn.set_palette(palette='plasma')
    #plt.title('Gender Count')
    plt.figure(figsize=(25,12.5))
    sn.countplot(data=df,x = i,palette='plasma')
    plt.title(t)
    plt.show()

In [None]:
individual_count(df_individual,'gender','Gender Count')

In [None]:
individual_count(df_individual,'age','Age Count')

# STATS FOR THE NUMBER OF TESTING CENTERS IN INDIA

In [None]:
test_map = {'Government Laboratory':1,'Collection Site':2,'Private Laboratory':3}
df_daily_test['type_map'] = df_daily_test['type'].map(test_map)
df_daily_test.head()

In [None]:
individual_count(df_daily_test,'type','Count of Types of Testing Center in India')

## TREE MAP ANALYSIS FOR OVERALL TESTING CENTERS STATE/UT WISE

In [None]:
fig = px.treemap(df_daily_test,path = ['state'],values = 'type_map',height=700,
                title = 'Count of Testing Centers State wise',color_discrete_sequence=px.colors.qualitative.Dark24)
fig.data[0].textinfo = 'label+text+value'
fig.show()

## TREE MAP ANALYSIS FOR GOVERNMENT AND PRIVATE TESTING CENTERS

In [None]:
df_government = df_daily_test[df_daily_test['type_map'] == 1]
fig = px.treemap(df_government,path = ['state'],values = 'type_map',height=700,
                title = 'Count of Government Testing Centers State wise',color_discrete_sequence=px.colors.qualitative.Dark24)
fig.data[0].textinfo = 'label+text+value'
fig.show()

In [None]:
df_private = df_daily_test[df_daily_test['type_map'] == 3]
fig = px.treemap(df_private,path = ['state'],values = 'type_map',height=700,
                title = 'Count of Private Testing Centers State wise',color_discrete_sequence=px.colors.qualitative.Dark24)
fig.data[0].textinfo = 'label+text+value'
fig.show()

## Population Census 

In [None]:
df_pop_ind.head(2)

In [None]:
fig = px.density_heatmap(data_frame=df_pop_ind,x = 'State / Union Territory',y = 'Population',
                        color_continuous_scale=['magenta','aqua'])
fig.show()

## Data Preprocessing

In [None]:
df_pop_ind.head(2)

In [None]:
df_pop_ind['New_Area'] = df_pop_ind['Area'].apply(lambda x : int(x.split('km2')[0].replace(',','')))
df_pop_ind.head(3)

In [None]:
df_pop_ind['Density'] = df_pop_ind['Density'].apply(lambda x:(x.split('/')[0].replace(',','')))
df_pop_ind.head(2)

In [None]:
df_pop_ind['Area'] = df_pop_ind['Area'].apply(lambda x : int(x.split('km2')[0].replace(',','')))
df_pop_ind = df_pop_ind.drop(['New_Area'],axis = 1)
df_pop_ind.head(2)

In [None]:
param = df_pop_ind.columns
param = param[2:]
param

# TREE MAP ANALYSIS FOR POPULATION CENSUS

In [None]:
def populationtreemap(df,i,j):
    fig = px.treemap(data_frame=df_pop_ind,path = ['State / Union Territory'],values=i,
                    color_discrete_sequence=px.colors.qualitative.Dark2_r,title=j,height=700)
    fig.data[0].textinfo = 'label+text+value'
    fig.show()

In [None]:
title_list = ['Population State Wise','Rural Population State Wise','Urban Population Statewise',
             'Area of the State in (sq.mi)','Density of the State','Gender Ratio StateWise']
for i,j in zip(param,title_list):
    print(f' stats for <{j}> is shown below ↓')
    populationtreemap(df_pop_ind,i,j)
    print("="*100)

In [None]:
import geopandas as gdp
import geoplot as gplt
import geoplot.crs as gcrs

In [None]:
gplt.choropleth(
df_pop_ind, hue=’Area’, projection=gcrs.AlbersEqualArea(),
edgecolor=’black’, linewidth=1, cmap=’YlOrRd’, legend=True)

## Hospital Beds

In [None]:
df_hospitals = df_hospitals.loc[(df_hospitals['State/UT']!='All India')]
df_hospitals['State/UT'].unique()

In [None]:
def hospitalbarplots(df,i):
    fig = px.bar(data_frame=df,x = 'State/UT',y = i,color= 'State/UT',labels={'x':'State/UT','y':i},
                color_discrete_sequence=px.colors.qualitative.Dark24)
    fig.show()
    

In [None]:
show_list = ['NumPrimaryHealthCenters_HMIS',
       'NumCommunityHealthCenters_HMIS', 'NumSubDistrictHospitals_HMIS',
       'NumDistrictHospitals_HMIS', 'TotalPublicHealthFacilities_HMIS',
       'NumPublicBeds_HMIS', 'NumRuralHospitals_NHP18', 'NumRuralBeds_NHP18',
       'NumUrbanHospitals_NHP18', 'NumUrbanBeds_NHP18']
for i in show_list:
    print(f' bar plot for the <{i}> center is shown below ↓')
    hospitalbarplots(df_hospitals,i)
    print("="*75)

## TREE MAP ANALYSIS FOR NUMBER OF HEALTH CENTERS

In [None]:
def hospitalstreemap(df,i,j):
    fig  = px.treemap(data_frame=df,path=['State/UT'],values=i,
                     color_discrete_sequence=px.colors.qualitative.Dark2,title=j,height=800)
    fig.data[0].textinfo = 'label+text+value'
    fig.show()

In [None]:
for i in show_list:
    print(f' stats of the <{i}> is shown below ↓')
    hospitalstreemap(df_hospitals,i,i)
    print("="*100)