# Exploratory Data Analysis of Covid_19_India Dataset

In [None]:
#Importing the important libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#Importing the datasets

district_data = pd.read_csv("/kaggle/input/covid19-corona-virus-india-dataset/district_level_latest.csv")
state_data = pd.read_csv("/kaggle/input/covid19-corona-virus-india-dataset/state_level_latest.csv")
country_data = pd.read_csv("/kaggle/input/covid19-corona-virus-india-dataset/nation_level_daily.csv")
patient_data = pd.read_csv("/kaggle/input/covid19-corona-virus-india-dataset/patients_data.csv")
tests_state_data = pd.read_csv("/kaggle/input/covid19-corona-virus-india-dataset/tests_state_wise.csv")

In [None]:
#Droping the unnecessary columns and checking for null values

district_data = district_data.drop(['District_Notes','Last_Updated'],axis=1)
district_data.info()

In [None]:
#Plotting the important features across the states

state_data = state_data.drop(0) # Drop the column of Totals

# Plot the Confirmed, Recovered, Active and Deaths on the same plot using matplotlib
plt.figure(figsize=(12,10))
x = state_data['State_code']
y = state_data['Confirmed']
plt.plot(x,y,marker='o',label="Confirmed")
y = state_data['Recovered']
plt.plot(x,y,marker='o',label="Recovered")
y = state_data['Active']
plt.plot(x,y,marker='o',label="Active")
y = state_data['Deaths']
plt.plot(x,y,marker='o',label="Deaths")
plt.legend();
plt.savefig('PIC_1')

In [None]:
#Now, selecting the data for top 20 states with respect to 'Confirmed' cases

state_top20 = state_data.nlargest(20,'Confirmed')
state_top20

In [None]:
#visualizing the data of the top 20 states

# Position of bars on x-axis
ind = np.arange(20)

# Width of a bar 
width = 0.4

plt.figure(figsize=(15,12))
x = state_top20['State_code']
y = state_top20['Confirmed']
plt.bar(ind+width/2,y,align='edge',width=width,label="Confirmed")
y = state_top20['Recovered']
plt.bar(ind+width,y,align='edge',width=width,label="Recovered")
y = state_top20['Active']
plt.bar(ind+3*width/2,y,align='edge',width=width,label="Active")
y = state_top20['Deaths']
plt.bar(ind+2*width,y,align='edge',width=width,label="Deaths")

plt.xticks(ind + 3*width/2, x)
plt.legend();
plt.savefig('PIC_2')

In [None]:
#Similarly selecting data for top 10 districts from India with respect to 'Confirmed' cases

district_top10 = district_data.nlargest(10,'Confirmed')
district_top10

In [None]:
#visualizing the data of the top 10 districts

ind = np.arange(10)    # Position of bars on x-axis

width = 0.4   # Width of a bar 

plt.figure(figsize=(15,12))
x = district_top10['District']
y = district_top10['Confirmed']
plt.bar(ind+width/2,y,align='edge',width=width,label="Confirmed")
y = district_top10['Recovered']
plt.bar(ind+width,y,align='edge',width=width,label="Recovered")
y = district_top10['Active']
plt.bar(ind+3*width/2,y,align='edge',width=width,label="Active")
y = district_top10['Deceased']
plt.bar(ind+2*width,y,align='edge',width=width,label="Deseased")

plt.xticks(ind + 3*width/2, x)
plt.legend()
plt.savefig('PIC_3')

In [None]:
tests = tests_state_data.loc[tests_state_data['Updated On'] == '06/08/2020']
tests = tests[['State','Total Tested','Positive']]
tests = tests.dropna()
tests.info()

In [None]:
#interactive map to see the relationship of number of tests with positive (Confirmed) cases
import plotly.express as px

fig = px.scatter(tests, x="Total Tested", y="Positive", text="State", log_x=True, 
                 log_y=True, size_max=100, color="Positive")
fig.update_traces(textposition='top center')
fig.update_layout(title_text='Life Expectency', title_x=0.5)
fig.show()

# Plotting Covid-19 data on India Map using geopandas:

In [None]:
import geopandas as gpd

fp = r'/kaggle/input/indiageofiles/india-polygon.shp'
map_df = gpd.read_file(fp)
map_df.rename(columns={'st_nm': 'State'},inplace=True)
map_df.head()

In [None]:
#merging the state data with the geo data

data_merge = map_df.merge(state_data, on = 'State', how = 'left')
data_merge.head()

In [None]:
fig, ax = plt.subplots(1, figsize=(8, 8))
ax.axis('off')
ax.set_title('Statewise Confirmed Cases', fontdict={'fontsize': '25', 'fontweight' : '10'})

# plot the figure
data_merge.plot(column='Confirmed',cmap='YlOrRd', linewidth=0.8, ax=ax, 
                edgecolor='0', legend=True,markersize=[39.739192, -104.990337]);

In [None]:
fig, ax = plt.subplots(1, figsize=(8, 8))
ax.axis('off')
ax.set_title('Statewise Recovered Cases', fontdict={'fontsize': '25', 'fontweight' : '10'})
data_merge.plot(column='Recovered',cmap='Greens', linewidth=0.8, ax=ax, edgecolor='0', 
                legend=True,markersize=[39.739192, -104.990337])

In [None]:
fig, ax = plt.subplots(1, figsize=(8, 8))
ax.axis('off')
ax.set_title('Statewise Deaths', fontdict={'fontsize': '25', 'fontweight' : '10'})
data_merge.plot(column='Deaths',cmap='Blues', linewidth=0.8, ax=ax, edgecolor='0', 
                legend=True,markersize=[39.739192, -104.990337])