In [None]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
from statsmodels.stats.proportion import proportions_ztest
import geopandas as gpd
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter

In [None]:
state_df = pd.read_csv('../input/covid19-in-india/StatewiseTestingDetails.csv')
country_df = pd.read_csv('../input/covid19-in-india/covid_19_india.csv')

In [None]:
state_df.head()

In [None]:
state_df.head()

In [None]:
state_df[state_df['State']=='Tamil Nadu'].head()

In [None]:
country_df[((country_df['Date']=='2020-03-30')&(country_df['State/UnionTerritory']=='Tamil Nadu'))]

In [None]:

df=country_df.copy()

In [None]:

def drop_star(df):
    for i in df['State/UnionTerritory'].iteritems():
        if i[1][-3:] == "***":
            df.drop(i[0],inplace=True)
        
drop_star(df)
df['State/UnionTerritory'].unique()

In [None]:

df['Date']=pd.to_datetime(df['Date'],format='%Y-%m-%d')

df.drop(['Time'],axis=1, inplace=True)


df.rename(columns={'State/UnionTerritory':'States'}, inplace=True)

In [None]:
df['Active_cases']=df['Confirmed']-(df['Cured']+df['Deaths'])
df['Discharge_Rate'] = np.round((df['Cured']/df['Confirmed'])*100, decimals = 4)      # create instances for 'death_rate and discharge_rate'
df['Death_Rate'] = np.round((df['Deaths']/df['Confirmed'])*100, decimals = 4)
df.head()

In [None]:
df[df['States']=='Tamil Nadu'].tail()

In [None]:
df[df['States']=='Maharashtra'].tail()

In [None]:
df[df['States']=='West Bengal'].tail()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mtd
import seaborn as sns
from matplotlib.ticker import ScalarFormatter
colors=['#0C68C7','#3A6794','#00FAF3','#FA643C','#C71D12']
sns.set(palette=colors, style='white')

sns.palplot(colors)

In [None]:

current = df[df.Date == '2021-07-06']

In [None]:

max_confirmed_cases = current.sort_values(by='Confirmed', ascending = False)
max_confirmed_cases.head()

In [None]:

top_cases = max_confirmed_cases[:10]
top_cases.head()

In [None]:
for feature in top_cases[['Confirmed','Cured','Deaths','Active_cases','Discharge_Rate','Death_Rate']]:
    fig=plt.figure(figsize=(15,5))
    plt.title("Top 10 highly impacted sates as on 7th June", size=10)
    ax=sns.barplot(data=top_cases,y=top_cases[feature],x='States', linewidth=0, edgecolor='black')
    plt.xlabel('States', size = 15)
    plt.ylabel(feature, size = 15)
    for i in ax.patches:
        ax.text(x=i.get_x(),y=i.get_height(),s=i.get_height())
plt.show()

In [None]:
states=['Kerala', 'Tamil Nadu', 'Maharashtra', 'Tamil Nadu', 'Andhra Pradesh', 'Uttar Pradesh']
mh=df[df['States']=='Maharashtra']
kl=df[df['States']=='Kerala']
ka=df[df['States']=='Karnataka']
tn=df[df['States']=='Tamil Nadu']
ap=df[df['States']=='Andhra Pradesh']
up=df[df['States']=='Uttar Pradesh']

In [None]:
fig, ax=plt.subplots(nrows=3, ncols=3, figsize=(23,10), squeeze=False, sharex=True, sharey=False, constrained_layout=True )
plt.suptitle("Comparison of Active, Cured & Deaths for top States", size = 25)
sns.lineplot(data=tn, x='Date',y='Active_cases', ax=ax[0,0], color=colors[1])
ax[0,0].set_title("Maharashtra", size=20)
sns.lineplot(data=tn, x='Date',y='Cured', ax=ax[1,0], color=colors[1])
sns.lineplot(data=tn, x='Date',y='Deaths', ax=ax[2,0], color=colors[1])

sns.lineplot(data=kl, x='Date',y='Active_cases', ax=ax[0,1], color=colors[2])
ax[0,1].set_title("Kerala", size=20)
sns.lineplot(data=kl, x='Date',y='Cured', ax=ax[1,1], color=colors[2])
sns.lineplot(data=kl, x='Date',y='Deaths', ax=ax[2,1], color=colors[2])

sns.lineplot(data=mh, x='Date',y='Active_cases', ax=ax[0,2], color=colors[3])
ax[0,2].set_title("Karnataka", size=20)
sns.lineplot(data=mh, x='Date',y='Cured', ax=ax[1,2], color=colors[3])
sns.lineplot(data=mh, x='Date',y='Deaths', ax=ax[2,2], color=colors[3])
plt.show()

In [None]:
fig, ax=plt.subplots(nrows=3, ncols=3, figsize=(23,10), squeeze=False, sharex=True, sharey=False, constrained_layout=True )
sns.lineplot(data=tn, x='Date',y='Active_cases', ax=ax[0,0], color=colors[1])
ax[0,0].set_title("Tamil Nadu", size=20)
sns.lineplot(data=tn, x='Date',y='Cured', ax=ax[1,0], color=colors[1])
sns.lineplot(data=tn, x='Date',y='Deaths', ax=ax[2,0], color=colors[1])

sns.lineplot(data=kl, x='Date',y='Active_cases', ax=ax[0,1], color=colors[2])
ax[0,1].set_title("Andhra Pradesh", size=20)
sns.lineplot(data=kl, x='Date',y='Cured', ax=ax[1,1], color=colors[2])
sns.lineplot(data=kl, x='Date',y='Deaths', ax=ax[2,1], color=colors[2])

sns.lineplot(data=mh, x='Date',y='Active_cases', ax=ax[0,2], color=colors[3])
ax[0,2].set_title("Uttar Pradesh", size=20)
sns.lineplot(data=mh, x='Date',y='Cured', ax=ax[1,2], color=colors[3])
sns.lineplot(data=mh, x='Date',y='Deaths', ax=ax[2,2], color=colors[3])
plt.show()

In [None]:
df['Date']= pd.to_datetime(df['Date'])         
data_20 = df[df['Date'].dt.year==2020]              
data_21 = df[df['Date'].dt.year==2021]           

data_20['Month']=data_20['Date'].dt.month               
data_21['Month']=data_21['Date'].dt.month    

#Year 2020
data_confirm_20= data_20['Confirmed'].groupby(data_20['Month']).sum()
data_dis_20= data_20['Cured'].groupby(data_20['Month']).sum()     
data_death_20= data_20['Deaths'].groupby(data_20['Month']).sum()

#Year 2021
data_confirm_21= data_21['Confirmed'].groupby(data_21['Month']).sum()
data_dis_21= data_21['Cured'].groupby(data_21['Month']).sum()      
data_death_21= data_21['Deaths'].groupby(data_21['Month']).sum()

cols_20=[data_confirm_20,data_dis_20,data_death_20]
data_20=pd.concat(cols_20,axis=1)  

cols_21=[data_confirm_21,data_dis_21,data_death_21]
data_21=pd.concat(cols_21,axis=1) 

#Year 2020
data_20['discharge_rate_20'] = np.round((data_20['Cured']/data_20['Confirmed'])*100, decimals=4)      
data_20['death_rate_20'] = np.round((data_20['Deaths']/data_20['Confirmed'])*100, decimals=4)

#Year 2020
data_21['discharge_rate_21'] = np.round((data_21['Cured']/data_21['Confirmed'])*100, decimals=4)      
data_21['death_rate_21'] = np.round((data_21['Deaths']/data_21['Confirmed'])*100, decimals=4)

In [None]:

data_20.reset_index(inplace=True)
data_20.head()

In [None]:

data_21.reset_index(inplace=True)
data_21.head()

In [None]:
plt.figure(figsize=(10,5))
sns.lineplot(x="Month",y="discharge_rate_20",data=data_20,color="g",lw=3,marker='o',markersize=10)
plt.title('DISCHARGE RATE PER MONTH IN 2020')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.lineplot(x="Month",y="discharge_rate_21",data=data_21,color="g",lw=3,marker='o',markersize=10)
plt.title('DISCHARGE RATE PER MONTH IN 2021')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.lineplot(x="Month",y="death_rate_20",data=data_20,color="r",lw=3,marker='o',markersize=10)
plt.title('DEATH RATE PER MONTH IN 2020')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.lineplot(x="Month",y="death_rate_21",data=data_21,color="r",lw=3,marker='o',markersize=10)
plt.title('DEATH RATE PER MONTH IN 2021')
plt.show()

In [None]:
tn=df[df['States']=='Tamil Nadu']['Cured']
mh=df[df['States']=='Maharashtra']['Cured']
kl=df[df['States']=='Kerala']['Cured']

In [None]:
df.head()

In [None]:
print('Total number of Active Covid-19 cases across India : {}'.format(max_confirmed_cases['Active_cases'].sum()))
top_cases = max_confirmed_cases.groupby('States')['Active_cases'].max().sort_values(ascending=False).to_frame()
top_cases.style.background_gradient(cmap='flare')

In [None]:
from fbprophet import Prophet
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
recovered = df.groupby('Date').sum()['Cured'].reset_index()
deaths = df.groupby('Date').sum()['Deaths'].reset_index()
confirmed.head()

In [None]:
confirmed.columns = ['ds', 'y']
confirmed['ds'] = pd.to_datetime(confirmed['ds'])

In [None]:
m = Prophet(interval_width=0.95)
m.fit(confirmed)
future = m.make_future_dataframe(periods=7)       

In [None]:
forecast = m.predict(future)
forecast[['ds','yhat','yhat_lower','yhat_upper']].tail()

In [None]:
confirmed_forecast_plot = m.plot(forecast)

In [None]:
confirmed_forecast_plot = m.plot_components(forecast)

In [None]:
print('Average recovery rate Covid-19 cases across India : {}'.format(max_confirmed_cases['Discharge_Rate'].mean()))
top_cases = max_confirmed_cases.groupby('States')['Discharge_Rate'].max().sort_values(ascending=False).to_frame()
top_cases.style.background_gradient(cmap='flare')

In [None]:
discharge_rate = df[['Date','Discharge_Rate']]
discharge_rate.head()

In [None]:
discharge_rate.columns = ['ds', 'y']
discharge_rate['ds'] = pd.to_datetime(discharge_rate['ds'])

In [None]:
m = Prophet(interval_width=0.50)
m.fit(discharge_rate)
future = m.make_future_dataframe(periods=7)      

In [None]:
forecast = m.predict(future)
forecast[['ds','yhat','yhat_lower','yhat_upper']].tail()

In [None]:
confirmed_forecast_plot = m.plot(forecast)

In [None]:
confirmed_forecast_plot = m.plot_components(forecast)

In [None]:
print('Average recovery rate Covid-19 cases across India : {}'.format(max_confirmed_cases['Discharge_Rate'].mean()))
top_cases = max_confirmed_cases.groupby('States')['Death_Rate'].max().sort_values(ascending=False).to_frame()
top_cases.style.background_gradient(cmap='flare')

In [None]:
discharge_rate = df[['Date','Death_Rate']]
discharge_rate.head()

In [None]:
discharge_rate.columns = ['ds', 'y']
discharge_rate['ds'] = pd.to_datetime(discharge_rate['ds'])

In [None]:
m = Prophet(interval_width=0.50)
m.fit(discharge_rate)
future = m.make_future_dataframe(periods=7)       
future.tail(10)

In [None]:
forecast = m.predict(future)
forecast[['ds','yhat','yhat_lower','yhat_upper']].tail()

In [None]:
df.head()

In [None]:

state_cases = max_confirmed_cases.drop(['Sno','Date','ConfirmedIndianNational',
                                       'ConfirmedForeignNational'],axis=1)

state_wise_cases = state_cases.sort_values('Confirmed', ascending= False).fillna(0).style.background_gradient(cmap='Blues',subset=["Confirmed"])\
                        .background_gradient(cmap='Reds',subset=["Deaths"])\
                        .background_gradient(cmap='Greens',subset=["Cured"])\
                        .background_gradient(cmap='Oranges',subset=["Active_cases"])\
                        .background_gradient(cmap='RdYlBu',subset=["Death_Rate"])\
                        .background_gradient(cmap='Accent',subset=["Discharge_Rate"])

state_wise_cases

In [None]:
#Loading the vaccination data
vaccine_df = pd.read_csv('../input/covid19-in-india/covid_vaccine_statewise.csv')

In [None]:
vacc_df=vaccine_df.copy()
vaccine_df = vaccine_df[['Updated On','State','Total Doses Administered','Total Sessions Conducted','First Dose Administered','Second Dose Administered','Male(Individuals Vaccinated)','Female(Individuals Vaccinated)','Transgender(Individuals Vaccinated)','Total Covaxin Administered','Total CoviShield Administered','Total Sputnik V Administered','18-45 years (Age)','45-60 years (Age)','60+ years (Age)','Total Individuals Vaccinated']]
vaccine_df.columns = ['Date','States','Total_Doses_Administered','Total_Sessions_Conducted','First_Dose_Administered','Second_Dose_Administered','Male_Vaccinated','Female_Vaccinated','Transgender_Vaccinated','Total_Covaxin_Administered','Total_CoviShield_Administered','Total_SputnikV_Administered','18-45_Years','45-60_Years','60+_Years','Total_Individuals_Vaccinated']
vaccine_df.head()

In [None]:

vaccine_df.isnull().sum()

In [None]:
vaccine_df = vaccine_df[vaccine_df['Total_Individuals_Vaccinated'].notna()]

In [None]:
vaccine_df['Date'] = pd.to_datetime(vaccine_df['Date'], format = '%d/%m/%Y')
latest_date = max(vaccine_df['Date'])
print("Current Date : ", latest_date)

In [None]:
vaccine_df[vaccine_df['States'] == "Dadra and Nagar Haveli and Daman and Diu"]

In [None]:
fig, ax = plt.subplots(ncols=1, nrows=1, dpi=100, figsize=(6,4))

sns.lineplot(data=vaccine_df[vaccine_df['States']=='India'], x='Date', y='Total_Individuals_Vaccinated', ax=ax)
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax.xaxis.set_major_formatter(DateFormatter('%b%y'))
ax.set_ylabel('Total Indians Vaccinated so far in millions', fontsize=10)
ax.set_yticklabels([0,0,20,40,60,80,100,120])

Xstart, Xend = ax.get_xlim()
plt.title('India\'s Vaccination Performance')
plt.show()