In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import folium
import requests
from datetime import timedelta
from plotly.subplots import make_subplots
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.svm import SVR
from statsmodels.tsa.api import Holt,SimpleExpSmoothing,ExponentialSmoothing
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.preprocessing import PolynomialFeatures
from pmdarima import auto_arima
from statsmodels.tsa.arima_model import ARIMA
from IPython.core.display import HTML
import requests
from fbprophet import Prophet

In [None]:
df_covid19 = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")

In [None]:
df_covid19 = df_covid19.drop(["People_Tested","People_Hospitalized","UID","ISO3","Mortality_Rate"],axis =1)
df_covid19.head(2)

In [None]:
india_data_json = requests.get('https://api.rootnet.in/covid19-in/unofficial/covid19india.org/statewise').json()
df_india = pd.io.json.json_normalize(india_data_json['data']['statewise'])
df_india = df_india.set_index("state")

In [None]:
total = df_india.sum()
total.name = "Total"
df_t = pd.DataFrame(total,dtype=float).transpose()
df_t["Mortality Rate (per 100)"] = np.round(100*df_t["deaths"]/df_t["confirmed"],2)
df_t.style.background_gradient(cmap='Oranges',axis=1)

In [None]:
df_india["Mortality Rate (per 100)"]= np.round(np.nan_to_num(100*df_india["deaths"]/df_india["confirmed"]),2)
df_india.style.background_gradient(cmap='Blues',subset=["confirmed"])\
                        .background_gradient(cmap='Reds',subset=["deaths"])\
                        .background_gradient(cmap='Greens',subset=["recovered"])\
                        .background_gradient(cmap='Purples',subset=["active"])\
                        .background_gradient(cmap='YlOrBr',subset=["Mortality Rate (per 100)"])\
                        .format("{:.2f}",subset=["Mortality Rate (per 100)"])

##### States with Reported Deaths

In [None]:
df_india[df_india['deaths'] > 0].style.background_gradient(cmap='Blues',subset=["confirmed"])\
                        .background_gradient(cmap='Reds',subset=["deaths"])\
                        .background_gradient(cmap='Greens',subset=["recovered"])\
                        .background_gradient(cmap='Purples',subset=["active"])\
                        .background_gradient(cmap='YlOrBr',subset=["Mortality Rate (per 100)"])\
                        .format("{:.2f}",subset=["Mortality Rate (per 100)"])

###### 10 Most effected States

In [None]:
f = plt.figure(figsize=(10,5))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_india.sort_values('confirmed')["confirmed"].index[-10:],df_india.sort_values('confirmed')["confirmed"].values[-10:],color="darkcyan")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Confirmed Cases",fontsize=18)
plt.title("Top 10 States: India (Confirmed Cases)",fontsize=20)
plt.grid(alpha=0.3)
# plt.savefig(out+'Top 10 States_India (Confirmed Cases).png')

In [None]:
f = plt.figure(figsize=(10,5))
f.add_subplot(111)

plt.axes(axisbelow=True)
plt.barh(df_india.sort_values('deaths')["deaths"].index[-10:],df_india.sort_values('deaths')["deaths"].values[-10:],color="crimson")
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Deaths",fontsize=18)
plt.title("Top 10 States: India (Deaths Cases)",fontsize=20)
plt.grid(alpha=0.3)
# plt.savefig(out+'Top 10 States_India (Deaths Cases).png')

In [None]:
# Adding Location data (Latitude,Longitude)
locations = {
    "Kerala" : [10.8505,76.2711],
    "Maharashtra" : [19.7515,75.7139],
    "Karnataka": [15.3173,75.7139],
    "Telangana": [18.1124,79.0193],
    "Uttar Pradesh": [26.8467,80.9462],
    "Rajasthan": [27.0238,74.2179],
    "Gujarat":[22.2587,71.1924],
    "Delhi" : [28.7041,77.1025],
    "Punjab":[31.1471,75.3412],
    "Tamil Nadu": [11.1271,78.6569],
    "Haryana": [29.0588,76.0856],
    "Madhya Pradesh":[22.9734,78.6569],
    "Jammu and Kashmir":[33.7782,76.5762],
    "Ladakh": [34.1526,77.5770],
    "Andhra Pradesh":[15.9129,79.7400],
    "West Bengal": [22.9868,87.8550],
    "Bihar": [25.0961,85.3131],
    "Chhattisgarh":[21.2787,81.8661],
    "Chandigarh":[30.7333,76.7794],
    "Uttarakhand":[30.0668,79.0193],
    "Himachal Pradesh":[31.1048,77.1734],
    "Goa": [15.2993,74.1240],
    "Odisha":[20.9517,85.0985],
    "Andaman and Nicobar Islands": [11.7401,92.6586],
    "Puducherry":[11.9416,79.8083],
    "Manipur":[24.6637,93.9063],
    "Mizoram":[23.1645,92.9376],
    "Assam":[26.2006,92.9376],
    "Meghalaya":[25.4670,91.3662],
    "Tripura":[23.9408,91.9882],
    "Arunachal Pradesh":[28.2180,94.7278],
    "Jharkhand" : [23.6102,85.2799],
    "Nagaland": [26.1584,94.5624],
    "Sikkim": [27.5330,88.5122],
    "Dadra and Nagar Haveli":[20.1809,73.0169],
    "Lakshadweep":[10.5667,72.6417],
    "Daman and Diu":[20.4283,72.8397]    
}
df_india["Lat"] = ""
df_india["Long"] = ""

for index in df_india.index :
    if index in locations:
        df_india.loc[df_india.index == index,"Lat"] = locations[index][0]
        df_india.loc[df_india.index == index,"Long"] = locations[index][1]
    else:
        df_india.drop([index],inplace=True)

In [None]:
india = folium.Map(location=[23,80], zoom_start=4,max_zoom=6,min_zoom=4,height=500,width="80%")
for i in range(0,len(df_india[df_india['confirmed']>0].index)):
    folium.Circle(
        location=[df_india.iloc[i]['Lat'], df_india.iloc[i]['Long']],
        tooltip = "<h5 style='text-align:center;font-weight: bold'>"+df_india.iloc[i].name+"</h5>"+
                    "<hr style='margin:10px;'>"+
                    "<ul style='color: #444;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
        "<li>Confirmed: "+str(df_india.iloc[i]['confirmed'])+"</li>"+
        "<li>Active:   "+str(df_india.iloc[i]['active'])+"</li>"+
        "<li>Recovered:   "+str(df_india.iloc[i]['recovered'])+"</li>"+
        "<li>Deaths:   "+str(df_india.iloc[i]['deaths'])+"</li>"+
        
        "<li>Mortality Rate:   "+str(np.round(df_india.iloc[i]['Mortality Rate (per 100)'],2))+"</li>"+
        "</ul>"
        ,
        radius=(int(np.log2(df_india.iloc[i]['confirmed']+1)))*15000,
        color='#ff6600',
        fill_color='#ff8533',
        fill=True).add_to(india)

india

In [None]:
df_india.head()

In [None]:
df_india.columns

In [None]:
# stacked bar chart

# color pallette
dth = 'red'
rec = 'green'
act = 'orange'

datewise1 = datewise.reset_index()

# subset columns
temp = datewise1[['Date','Active Cases', 'Deaths', 'Cured']]

# rename columns
temp.columns = ['Date','Active', 'Deaths', 'Cured']

# melt data
temp = temp.melt(id_vars="Date", value_vars=['Cured', 'Deaths', 'Active'],
                 var_name='Case', value_name='Count')
temp.head()

# plot
fig_2 = px.bar(temp, x='Date', y="Count", color='Case', height=540, color_discrete_sequence = [rec, dth, act])
fig_2.update_layout(plot_bgcolor='rgba(0,0,0,0)')
fig_2.show()

In [None]:
df_india.index.unique()

In [None]:
temp = df_india.reset_index()
temp = temp.loc[temp.state != 'State Unassigned']
temp['Mortality Rate (per 100)'] = (100*(temp['deaths'])/temp['confirmed']).round(3)
temp['Recovery Rate'] = (100*(temp['recovered'])/temp['confirmed']).round(3)
fig_c = px.bar(temp.sort_values('confirmed').tail(10), x="confirmed", y="state", 
               text='confirmed', orientation='h', color_discrete_sequence = ['blue'])
fig_a = px.bar(temp.sort_values('active').tail(10), x="active", y="state", 
               text='active', orientation='h', color_discrete_sequence = ['#323232'])

fig_d = px.bar(temp.sort_values('deaths').tail(10), x="deaths", y="state", 
               text='deaths', orientation='h', color_discrete_sequence = ['#ff677d'])
fig_mr = px.bar(temp[temp['confirmed']>100].sort_values('Mortality Rate (per 100)').tail(10), 
                x="Mortality Rate (per 100)", y="state", 
                text='Mortality Rate (per 100)', orientation='h', color_discrete_sequence = ['#cd6684'])

fig_r = px.bar(temp.sort_values('recovered').tail(10), x="recovered", y="state", 
               text='recovered', orientation='h', color_discrete_sequence = ['#649d66'])
fig_rr = px.bar(temp[temp['confirmed']>100].sort_values('Recovery Rate').tail(10), 
                x="Recovery Rate", y="state", 
                text='Recovery Rate', orientation='h', color_discrete_sequence = ['#16817a'])

fig = make_subplots(rows=3, cols=2, shared_xaxes=False, horizontal_spacing=0.14, vertical_spacing=0.08,
                    subplot_titles=('Confirmed cases', 'Active cases', 
                                    'Deaths reported', 'Mortality Rate (per 100)', 
                                    'Cured', 'Recovery rate (per 100)'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_a['data'][0], row=1, col=2)

fig.add_trace(fig_d['data'][0], row=2, col=1)
fig.add_trace(fig_mr['data'][0], row=2, col=2)

fig.add_trace(fig_r['data'][0], row=3, col=1)
fig.add_trace(fig_rr['data'][0], row=3, col=2)

# fig.update_layout(height=1200, title_text="Leading State/UT")
fig.update_layout(height=1200, plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
fig.update_yaxes(showline=True, linewidth=1, linecolor='black')

In [None]:
f, ax = plt.subplots(figsize=(12, 8))
# df_india.sort_values('confirmed',ascending=False,inplace=True)
sns.set_color_codes("pastel")
sns.barplot(x="confirmed", y=df_india.index, data=df_india,
            label="Total", color="r")

sns.set_color_codes("muted")
sns.barplot(x="recovered", y=df_india.index, data=df_india,
            label="Recovered", color="g")


# Add a legend and informative axis label
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(xlim=(0, df_india.confirmed.max()), ylabel="",
       xlabel="Cases")
sns.despine(left=True, bottom=True)

In [None]:
f, ax = plt.subplots(figsize=(12, 8))
# df_india.sort_values('confirmed',ascending=False,inplace=True)
sns.set_color_codes("pastel")
sns.barplot(x="confirmed", y=df_india.iloc[1:,:].index, data=df_india.iloc[1:,:],
            label="Total", color="r")

sns.set_color_codes("muted")
sns.barplot(x="recovered", y=df_india.iloc[1:,:].index, data=df_india.iloc[1:,:],
            label="Recovered", color="g")


# Add a legend and informative axis label
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(xlim=(0, df_india.iloc[1:,:].confirmed.max()), ylabel="",
       xlabel="Cases")
sns.despine(left=True, bottom=True)

In [None]:
f, ax = plt.subplots(figsize=(12, 8))
# df_india.sort_values('confirmed',ascending=False,inplace=True)
sns.set_color_codes("pastel")
sns.barplot(x="confirmed", y=df_india[df_india['deaths']>0].index, data=df_india[df_india['deaths']>0],
            label="Total", color="orange")

sns.set_color_codes("muted")
sns.barplot(x="deaths", y=df_india[df_india['deaths']>0].index, data=df_india[df_india['deaths']>0],
            label="Deaths", color="red")


# Add a legend and informative axis label
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(xlim=(0, df_india[df_india['deaths']>0].confirmed.max()), ylabel="",
       xlabel="Cases")
sns.despine(left=True, bottom=True)

###### State wise TS data

In [None]:
#https://www.kaggle.com/sudalairajkumar/covid19-in-india?select=covid_19_india.csv
states_df = pd.read_csv('/Users/apple/Desktop/COVID/datasets_557629_1234650_covid_19_india.csv')

In [None]:
states_df.shape

In [None]:
states_df.columns

In [None]:
states_df = states_df.drop(columns = 'Sno')

In [None]:
states_df['Date'] = pd.to_datetime(states_df['Date'])

In [None]:
states_df.head()

In [None]:
states = states_df['State/UnionTerritory'].unique()
print(len(states))
states

In [None]:
states_df = states_df.loc[(states_df['State/UnionTerritory'] != 'Jharkhand#') & (states_df['State/UnionTerritory'] != 'Jharkhand#') & (states_df['State/UnionTerritory'] != 'Nagaland#') & (states_df['State/UnionTerritory'] != 'Cases being reassigned to states')& (states_df['State/UnionTerritory'] != 'Unassigned')]

In [None]:
states = states_df['State/UnionTerritory'].unique()

In [None]:
len(states)

In [None]:
states_df.index = states_df['Date']
states_df = states_df.drop(columns = ['Date'])

In [None]:
for i in states:
    plt.figure()
    states_df[states_df['State/UnionTerritory']==i]['Confirmed'].plot(title = i,figsize = (15,5))

In [None]:
states_df[states_df['State/UnionTerritory']=='Kerala']['Confirmed']

In [None]:
states_df.info()

In [None]:
states_df['Deaths'] = states_df['Deaths'].replace('0#',0).astype('int64')

In [None]:
states_df['Active'] = states_df['Confirmed'] - states_df['Cured'] - states_df['Deaths']
states_df['Active'] = states_df['Active'].astype('int64')

In [None]:
states

In [None]:
plt.style.use('seaborn')
temp = states_df.reset_index()
temp = temp.rename(columns = {'State/UnionTerritory' : 'State/UT'})
g = sns.FacetGrid(temp.sort_values(['State/UT','Date']), 
                  col="State/UT", hue="State/UT", 
                  sharey=False, col_wrap=5)
g = g.map(plt.plot, 'Date', "Confirmed")
g.set_xticklabels(rotation=90)
g.fig.subplots_adjust(top=0.9)
# g.fig.suptitle('Confirmed case in each state over time', 
#                fontsize=20)
plt.show()

In [None]:
plt.style.use('seaborn')
temp = states_df.reset_index()
temp = temp.rename(columns = {'State/UnionTerritory' : 'State/UT'})
list1 = ['Kerala', 'Telengana', 'Delhi', 'Rajasthan', 'Uttar Pradesh',
       'Haryana', 'Ladakh', 'Tamil Nadu', 'Karnataka', 'Maharashtra',
       'Punjab', 'Jammu and Kashmir', 'Andhra Pradesh', 'Uttarakhand',
       'Odisha', 'Puducherry', 'West Bengal', 'Chhattisgarh',
       'Chandigarh', 'Gujarat', 'Himachal Pradesh', 'Madhya Pradesh',
       'Bihar', 'Manipur', 'Andaman and Nicobar Islands',
       'Goa', 'Assam', 'Jharkhand', 'Tripura', 'Meghalaya']

# 'Sikkim' ,  'Mizoram', 'Dadar Nagar Haveli', 'Nagaland', 'Arunachal Pradesh' 
temp = temp[temp['State/UT'].isin(list1)]
g = sns.FacetGrid(temp.sort_values(['State/UT','Date']), 
                  col="State/UT", hue="State/UT", 
                  sharey=False, col_wrap=5)
g = g.map(plt.plot, 'Date', "Confirmed")
g.set_xticklabels(rotation=90)
g.fig.subplots_adjust(top=0.9)
# g.fig.suptitle('Confirmed case in each state over time', 
#                fontsize=20)
plt.show()

In [None]:
plt.style.use('seaborn')
sns.set_style("whitegrid", {'axes.grid' : True})
temp = states_df.reset_index()
temp = temp.rename(columns = {'State/UnionTerritory' : 'State/UT'})
list1 = ['Kerala', 'Telengana', 'Delhi', 'Rajasthan', 'Uttar Pradesh',
       'Haryana', 'Ladakh', 'Tamil Nadu', 'Karnataka', 'Maharashtra',
       'Punjab', 'Jammu and Kashmir', 'Andhra Pradesh', 'Uttarakhand',
       'Odisha', 'Puducherry', 'West Bengal', 'Chhattisgarh',
       'Chandigarh', 'Gujarat', 'Himachal Pradesh', 'Madhya Pradesh',
       'Bihar', 'Manipur', 'Andaman and Nicobar Islands',
       'Goa', 'Assam', 'Jharkhand', 'Tripura', 'Meghalaya']

# 'Sikkim' ,  'Mizoram', 'Dadar Nagar Haveli', 'Nagaland', 'Arunachal Pradesh' 
temp = temp[temp['State/UT'].isin(list1)]
g = sns.FacetGrid(temp.sort_values(['State/UT','Date']), 
                  col="State/UT", hue="State/UT", 
                  sharey=False, col_wrap=5)
g = g.map(plt.plot, 'Date', "Confirmed")
g.set_xticklabels(rotation=90)
g.fig.subplots_adjust(top=0.9)
# g.fig.suptitle('Confirmed case in each state over time', 
#                fontsize=20)
plt.show()

In [None]:
states_df.info()

In [None]:
states_df['Deaths'].groupby(states_df.index).agg({'Deaths' : 'sum'})

In [None]:
aggregate = {'Confirmed':'sum', 'Cured':'sum','Deaths' : 'sum', 'Active' : 'sum'}
datewise  = states_df.groupby(states_df.index).agg(aggregate)
# datewise.index = pd.to_datetime(datewise.index)
# datewise.index = datewise.index.strftime('%Y-%m-%d') 
datewise.index = pd.to_datetime(datewise.index)
datewise["Days Since"]=datewise.index-datewise.index.min()

In [None]:
datewise["Days Since"] = datewise["Days Since"].dt.days

In [None]:
datewise

In [None]:
datewise["WeekOfYear"]=datewise.index.weekofyear

week_num=[]
weekwise_confirmed=[]
weekwise_recovered=[]
weekwise_deaths=[]
weekwise_active=[]
w=1
for i in list(datewise["WeekOfYear"].unique()):
    weekwise_confirmed.append(datewise[datewise["WeekOfYear"]==i]["Confirmed"].iloc[-1])
    weekwise_recovered.append(datewise[datewise["WeekOfYear"]==i]["Cured"].iloc[-1])
    weekwise_deaths.append(datewise[datewise["WeekOfYear"]==i]["Deaths"].iloc[-1])
    weekwise_active.append(datewise[datewise["WeekOfYear"]==i]["Active"].iloc[-1])
    week_num.append(w)
    w=w+1

fig=go.Figure()
fig.add_trace(go.Scatter(x=week_num, y=weekwise_confirmed,
                    mode='lines+markers',
                    name='Weekly Growth of Confirmed Cases'))
fig.add_trace(go.Scatter(x=week_num, y=weekwise_deaths,
                    mode='lines+markers',
                    name='Weekly Growth of Death Cases'))
fig.add_trace(go.Scatter(x=week_num, y=weekwise_recovered,
                    mode='lines+markers',
                    name='Weekly Growth of Recovered Cases'))
fig.add_trace(go.Scatter(x=week_num, y=weekwise_active,
                    mode='lines+markers',
                    name='Weekly Growth of Active Cases'))
# fig.update_layout(title="Weekly Growth of different types of Cases in India",
#                  xaxis_title="Week Number",yaxis_title="Number of Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.update_layout(xaxis_title="Week Number",yaxis_title="Number of Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
fig=px.bar(x=datewise.index,y=datewise["Confirmed"]-datewise["Cured"]-datewise["Deaths"])
# fig.update_layout(title="Distribution of Number of Active Cases",
#                   xaxis_title="Date",yaxis_title="Number of Cases", plot_bgcolor='rgba(0,0,0,0)')
fig.update_layout(
                  xaxis_title="Date",yaxis_title="Number of Cases", plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
fig=px.bar(x=datewise.index,y=datewise["Cured"]+datewise["Deaths"])
# fig.update_layout(title="Distribution of Number of Closed Cases",
#                   xaxis_title="Date",yaxis_title="Number of Cases", plot_bgcolor='rgba(0,0,0,0)')
fig.update_layout(
                  xaxis_title="Date",yaxis_title="Number of Cases", plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
fig, (ax1,ax2) = plt.subplots(1, 2,figsize=(15,5))
sns.barplot(x=week_num,y=pd.Series(weekwise_confirmed).diff().fillna(0),ax=ax1)
sns.barplot(x=week_num,y=pd.Series(weekwise_deaths).diff().fillna(0),ax=ax2)
ax1.set_xlabel("Week Number")
ax2.set_xlabel("Week Number")
ax1.set_ylabel("Number of Confirmed Cases")
ax2.set_ylabel("Number of Death Cases")
ax1.set_title("Confirmed Cases (Weekly)")
ax2.set_title("Death Cases (Weekly)")


In [None]:
#Calculating the Mortality Rate and Recovery Rate
datewise["Mortality Rate"]=(datewise["Deaths"]/datewise["Confirmed"])*100
datewise["Recovery Rate"]=(datewise["Cured"]/datewise["Confirmed"])*100
datewise["Active Cases"]=datewise["Confirmed"]-datewise["Cured"]-datewise["Deaths"]
datewise["Closed Cases"]=datewise["Cured"]+datewise["Deaths"]

print("Average Mortality Rate",datewise["Mortality Rate"].mean())
print("Median Mortality Rate",datewise["Mortality Rate"].median())
print("Average Recovery Rate",datewise["Recovery Rate"].mean())
print("Median Recovery Rate",datewise["Recovery Rate"].median())

#Plotting Mortality and Recovery Rate 
fig = make_subplots(rows=1, cols=2,
                   subplot_titles=("Recovery Rate", "Mortatlity Rate"))
fig.add_trace(
    go.Scatter(x=datewise.index, y=(datewise["Cured"]/datewise["Confirmed"])*100,name="Recovery Rate"),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(x=datewise.index, y=(datewise["Deaths"]/datewise["Confirmed"])*100,name="Mortality Rate"),
    row=1, col=2
)
fig.update_layout(height=400,showlegend=False, plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(title_text="Date", row=1, col=1)
fig.update_yaxes(title_text="Recovery Rate", row=1, col=1)
fig.update_xaxes(title_text="Date", row=1, col=2)
fig.update_yaxes(title_text="Mortality Rate", row=1, col=2)
fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
fig.update_yaxes(showline=True, linewidth=1, linecolor='black')
fig.show()

In [None]:
temp = datewise[datewise.index <= '2020-06-01']

print("Average increase in number of Confirmed Cases every day: ",np.round(temp["Confirmed"].diff().fillna(0).mean()))
print("Average increase in number of Recovered Cases every day: ",np.round(temp["Cured"].diff().fillna(0).mean()))
print("Average increase in number of Deaths Cases every day: ",np.round(temp["Deaths"].diff().fillna(0).mean()))

fig=go.Figure()
fig.add_trace(go.Scatter(x=temp.index, y=temp["Confirmed"].diff().fillna(0),mode='lines+markers',
                    name='Confirmed Cases'))
fig.add_trace(go.Scatter(x=temp.index, y=temp["Deaths"].diff().fillna(0),mode='lines+markers',
                    name='Death Cases'))
fig.add_trace(go.Scatter(x=temp.index, y=temp["Cured"].diff().fillna(0),mode='lines+markers',
                    name='Recovered Cases'))
# fig.update_layout(title="Daily increase in different types of Cases",
#                  xaxis_title="Date",yaxis_title="Number of Cases",legend=dict(x=0,y=1,traceorder="normal"),plot_bgcolor='rgba(0,0,0,0)')
fig.update_layout(
                 xaxis_title="Date",yaxis_title="Number of Cases",legend=dict(x=0,y=1,traceorder="normal"),plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
fig.update_yaxes(showline=True, linewidth=1, linecolor='black')
fig.show()

In [None]:
fig=go.Figure()
for state in df_india.sort_values('deaths')["deaths"].index[-10:]:
    fig.add_trace(go.Scatter(x=states_df[states_df['State/UnionTerritory'] == state].index, y=states_df[states_df['State/UnionTerritory'] == state]["Confirmed"].rolling(window=7).mean().diff(),
                    mode='lines',name=state))
# fig.update_layout(height=800,title="7 Days Rolling Average of Daily increase of Confirmed Cases",
#                  xaxis_title="Date",yaxis_title="Confirmed Cases",
#                  legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.update_layout(height=800,
                 xaxis_title="Date",yaxis_title="Confirmed Cases",
                 legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
fig.update_yaxes(showline=True, linewidth=1, linecolor='black')
fig.show()

In [None]:
fig=go.Figure()
for state in df_india.sort_values('deaths')["deaths"].index[-5:]:
    fig.add_trace(go.Scatter(x=states_df[states_df['State/UnionTerritory'] == state].index, y=states_df[states_df['State/UnionTerritory'] == state]["Confirmed"].rolling(window=14).mean().diff(),
                    mode='lines',name=state))
fig.update_layout(height=800,title="14 Days Rolling Average of Daily increase of Confirmed Cases",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",
                 legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
fig.update_yaxes(showline=True, linewidth=1, linecolor='black')
fig.show()

In [None]:
india_increase_confirm=[]
india_increase_recover=[]
india_increase_deaths=[]
for i in range(datewise.shape[0]-1):
    india_increase_confirm.append(((datewise["Confirmed"].iloc[i+1])/datewise["Confirmed"].iloc[i]))
    india_increase_recover.append(((datewise["Cured"].iloc[i+1])/datewise["Cured"].iloc[i]))
    india_increase_deaths.append(((datewise["Deaths"].iloc[i+1])/datewise["Deaths"].iloc[i]))
india_increase_confirm.insert(0,1)
india_increase_recover.insert(0,1)
india_increase_deaths.insert(0,1)

fig=go.Figure()
fig.add_trace(go.Scatter(x=datewise.index, y=india_increase_confirm,
                    mode='lines',
                    name='Growth Factor of Confirmed Cases'))
fig.add_trace(go.Scatter(x=datewise.index, y=india_increase_deaths,
                    mode='lines',
                    name='Growth Factor of Death Cases'))
fig.add_trace(go.Scatter(x=datewise.index, y=india_increase_recover,
                    mode='lines',
                    name='Growth Factor of Recovered Cases'))
fig.update_layout(title="Datewise Growth Factor of Active and Closed cases in India",
                 xaxis_title="Date",yaxis_title="Growth Factor",
                 legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

### Prediction using Machine Learning Models

###### Linear Regression Model for Confirm Cases Prediction

In [None]:
datewise["Days Since"]=datewise.index-datewise.index[0]
datewise["Days Since"]=datewise["Days Since"].dt.days

In [None]:
train_ml=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid_ml=datewise.iloc[int(datewise.shape[0]*0.95):]
model_scores=[]

In [None]:
datewise

In [None]:
lin_reg=LinearRegression(normalize=True)

In [None]:
lin_reg.fit(np.array(train_ml["Days Since"]).reshape(-1,1),np.array(train_ml["Confirmed"]).reshape(-1,1))

In [None]:
prediction_valid_linreg=lin_reg.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))

In [None]:
model_scores.append(np.sqrt(mean_squared_error(valid_ml["Confirmed"],prediction_valid_linreg)))
print("Root Mean Square Error for Linear Regression: ",np.sqrt(mean_squared_error(valid_ml["Confirmed"],prediction_valid_linreg)))

In [None]:
plt.figure(figsize=(11,6))
prediction_linreg=lin_reg.predict(np.array(datewise["Days Since"]).reshape(-1,1))
linreg_output=[]
for i in range(prediction_linreg.shape[0]):
    linreg_output.append(prediction_linreg[i][0])

fig=go.Figure()
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["Confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=datewise.index, y=linreg_output,
                    mode='lines',name="Linear Regression Best Fit Line",
                    line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases Linear Regression Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

#### Polynomial Regression for Prediction of Confirmed Cases

In [None]:
train_ml=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid_ml=datewise.iloc[int(datewise.shape[0]*0.95):]

In [None]:
poly = PolynomialFeatures(degree = 10) 

In [None]:
train_poly=poly.fit_transform(np.array(train_ml["Days Since"]).reshape(-1,1))
valid_poly=poly.fit_transform(np.array(valid_ml["Days Since"]).reshape(-1,1))
y=train_ml["Confirmed"]

In [None]:
linreg=LinearRegression(normalize=True)
linreg.fit(train_poly,y)

In [None]:
prediction_poly=linreg.predict(valid_poly)
rmse_poly=np.sqrt(mean_squared_error(valid_ml["Confirmed"],prediction_poly))
model_scores.append(rmse_poly)
print("Root Mean Squared Error for Polynomial Regression: ",rmse_poly)

In [None]:
comp_data=poly.fit_transform(np.array(datewise["Days Since"]).reshape(-1,1))
plt.figure(figsize=(11,6))
predictions_poly=linreg.predict(comp_data)

fig=go.Figure()
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["Confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=datewise.index, y=predictions_poly,
                    mode='lines',name="Polynomial Regression Best Fit",
                    line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases Polynomial Regression Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",
                 legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
new_prediction_poly=[]
for i in range(1,30):
    new_date_poly=poly.fit_transform(np.array(datewise["Days Since"].max()+i).reshape(-1,1))
    new_prediction_poly.append(linreg.predict(new_date_poly)[0])

#### Support Vector Machine ModelRegressor for Prediction of Confirmed Cases

In [None]:
train_ml=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid_ml=datewise.iloc[int(datewise.shape[0]*0.95):]

In [None]:
#Intializing SVR Model
svm=SVR(C=1,degree=5,kernel='poly',epsilon=0.01)

In [None]:
#Fitting model on the training data
svm.fit(np.array(train_ml["Days Since"]).reshape(-1,1),np.array(train_ml["Confirmed"]).reshape(-1,1))

In [None]:
prediction_valid_svm=svm.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))

In [None]:
model_scores.append(np.sqrt(mean_squared_error(valid_ml["Confirmed"],prediction_valid_svm)))
print("Root Mean Square Error for Support Vectore Machine: ",np.sqrt(mean_squared_error(valid_ml["Confirmed"],prediction_valid_svm)))

In [None]:
plt.figure(figsize=(11,6))
prediction_svm=svm.predict(np.array(datewise["Days Since"]).reshape(-1,1))
fig=go.Figure()
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["Confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=datewise.index, y=prediction_svm,
                    mode='lines',name="Support Vector Machine Best fit Kernel",
                    line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases Support Vectore Machine Regressor Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
new_date=[]
new_prediction_lr=[]
new_prediction_svm=[]
for i in range(1,30):
    new_date.append(datewise.index[-1]+timedelta(days=i))
    new_prediction_lr.append(lin_reg.predict(np.array(datewise["Days Since"].max()+i).reshape(-1,1))[0][0])
    new_prediction_svm.append(svm.predict(np.array(datewise["Days Since"].max()+i).reshape(-1,1))[0])

In [None]:
pd.set_option('display.float_format', lambda x: '%.6f' % x)
model_predictions=pd.DataFrame(zip(new_date,new_prediction_lr,new_prediction_poly,new_prediction_svm),
                               columns=["Dates","Linear Regression Prediction","Polynonmial Regression Prediction","SVM Prediction"])
model_predictions.head()

#### Time Series Forecasting

###### Holt's Linear Model

#### India Overall

In [None]:
model_train=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid=datewise.iloc[int(datewise.shape[0]*0.95):]

In [None]:
holt=Holt(np.asarray(model_train["Confirmed"])).fit(smoothing_level=0.1, smoothing_slope=1.0,optimized=False)
y_pred=valid.copy()

In [None]:
y_pred["Holt"]=holt.forecast(len(valid))
model_scores.append(np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["Holt"])))
print("Root Mean Square Error Holt's Linear Model: ",np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["Holt"])))

In [None]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["Confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["Confirmed"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["Holt"],
                    mode='lines+markers',name="Prediction of Confirmed Cases",))
fig.update_layout(title="Confirmed Cases Holt's Linear Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
holt_new_date=[]
holt_new_prediction=[]
for i in range(1,30):
    holt_new_date.append(datewise.index[-1]+timedelta(days=i))
    holt_new_prediction.append(holt.forecast((len(valid)+i))[-1])

# model_predictions["Holt's Linear Model Prediction"]=holt_new_prediction
# model_predictions.head()

#### Holt's Winter Model for Daily Time Series

In [None]:
model_train=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid=datewise.iloc[int(datewise.shape[0]*0.95):]
y_pred=valid.copy()

In [None]:
es=ExponentialSmoothing(np.asarray(model_train['Confirmed']),seasonal_periods=4,trend='mul', seasonal='mul').fit()

In [None]:
y_pred["Holt's Winter Model"]=es.forecast(len(valid))

In [None]:
model_scores.append(np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["Holt's Winter Model"])))
print("Root Mean Square Error for Holt's Winter Model: ",np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["Holt's Winter Model"])))

In [None]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["Confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["Confirmed"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["Holt\'s Winter Model"],
                    mode='lines+markers',name="Prediction of Confirmed Cases",))
fig.update_layout(title="Confirmed Cases Holt's Winter Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
holt_winter_new_prediction=[]
for i in range(1,30):
    holt_winter_new_prediction.append(es.forecast((len(valid)+i))[-1])
model_predictions["Holt's Winter Model Prediction"]=holt_winter_new_prediction
model_predictions.head()

###### AR Model (using AUTO ARIMA)

In [None]:
model_train=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid=datewise.iloc[int(datewise.shape[0]*0.95):]
y_pred=valid.copy()

In [None]:
model_ar= auto_arima(model_train["Confirmed"],trace=True, error_action='ignore', start_p=0,start_q=0,max_p=5,max_q=0,
                   suppress_warnings=True,stepwise=False,seasonal=False)
model_ar.fit(model_train["Confirmed"])

In [None]:
prediction_ar=model_ar.predict(len(valid))
y_pred["AR Model Prediction"]=prediction_ar

In [None]:
model_scores.append(np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["AR Model Prediction"])))
print("Root Mean Square Error for AR Model: ",np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["AR Model Prediction"])))

In [None]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["Confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["Confirmed"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["AR Model Prediction"],
                    mode='lines+markers',name="Prediction of Confirmed Cases",))
fig.update_layout(title="Confirmed Cases AR Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
AR_model_new_prediction=[]
for i in range(1,30):
    AR_model_new_prediction.append(model_ar.predict(len(valid)+i)[-1])
model_predictions["AR Model Prediction"]=AR_model_new_prediction
model_predictions.head()

###### MA Model (using AUTO ARIMA)

In [None]:
model_train=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid=datewise.iloc[int(datewise.shape[0]*0.95):]
y_pred=valid.copy()

In [None]:
model_ma= auto_arima(model_train["Confirmed"],trace=True, error_action='ignore', start_p=0,start_q=0,max_p=0,max_q=5,
                   suppress_warnings=True,stepwise=False,seasonal=False)
model_ma.fit(model_train["Confirmed"])

In [None]:
prediction_ma=model_ma.predict(len(valid))
y_pred["MA Model Prediction"]=prediction_ma

In [None]:
model_scores.append(np.sqrt(mean_squared_error(valid["Confirmed"],prediction_ma)))
print("Root Mean Square Error for MA Model: ",np.sqrt(mean_squared_error(valid["Confirmed"],prediction_ma)))

In [None]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["Confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["Confirmed"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["MA Model Prediction"],
                    mode='lines+markers',name="Prediction for Confirmed Cases",))
fig.update_layout(title="Confirmed Cases MA Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
MA_model_new_prediction=[]
for i in range(1,30):
    MA_model_new_prediction.append(model_ma.predict(len(valid)+i)[-1])
model_predictions["MA Model Prediction"]=MA_model_new_prediction
model_predictions.head()

###### ARIMA Model (using AUTOARIMA)

In [None]:
model_train=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid=datewise.iloc[int(datewise.shape[0]*0.95):]
y_pred=valid.copy()

In [None]:
model_arima= auto_arima(model_train["Confirmed"],trace=True, error_action='ignore', start_p=1,start_q=1,max_p=3,max_q=3,
                   suppress_warnings=True,stepwise=False,seasonal=False)
model_arima.fit(model_train["Confirmed"])

In [None]:
prediction_arima=model_arima.predict(len(valid))
y_pred["ARIMA Model Prediction"]=prediction_arima

In [None]:
model_scores.append(np.sqrt(mean_squared_error(valid["Confirmed"],prediction_arima)))
print("Root Mean Square Error for ARIMA Model: ",np.sqrt(mean_squared_error(valid["Confirmed"],prediction_arima)))

In [None]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["Confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["Confirmed"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["ARIMA Model Prediction"],
                    mode='lines+markers',name="Prediction for Confirmed Cases",))
fig.update_layout(title="Confirmed Cases ARIMA Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
ARIMA_model_new_prediction=[]
for i in range(1,30):
    ARIMA_model_new_prediction.append(model_arima.predict(len(valid)+i)[-1])
model_predictions["ARIMA Model Prediction"]=ARIMA_model_new_prediction
model_predictions.head()

###### SARIMA Model (using AUTO ARIMA)

In [None]:
model_sarima= auto_arima(model_train["Confirmed"],trace=True, error_action='ignore', 
                         start_p=0,start_q=0,max_p=3,max_q=3,m=7,
                   suppress_warnings=True,stepwise=True,seasonal=True)
model_sarima.fit(model_train["Confirmed"])

In [None]:
prediction_sarima=model_sarima.predict(len(valid))
y_pred["SARIMA Model Prediction"]=prediction_sarima

In [None]:
model_scores.append(np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["SARIMA Model Prediction"])))
print("Root Mean Square Error for SARIMA Model: ",np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["SARIMA Model Prediction"])))

In [None]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["Confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["Confirmed"],
                    mode='lines+markers',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["SARIMA Model Prediction"],
                    mode='lines+markers',name="Prediction for Confirmed Cases",))
fig.update_layout(title="Confirmed Cases SARIMA Model Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
SARIMA_model_new_prediction=[]
for i in range(1,30):
    SARIMA_model_new_prediction.append(model_sarima.predict(len(valid)+i)[-1])
model_predictions["SARIMA Model Prediction"]=SARIMA_model_new_prediction
model_predictions.head()

###### Facebook's Prophet Model for forecasting

In [None]:
prophet_c=Prophet(interval_width=0.95,weekly_seasonality=True,)
prophet_confirmed=pd.DataFrame(zip(list(datewise.index),list(datewise["Confirmed"])),columns=['ds','y'])

In [None]:
prophet_c.fit(prophet_confirmed)

In [None]:
forecast_c=prophet_c.make_future_dataframe(periods=17)
forecast_confirmed=forecast_c.copy()

In [None]:
confirmed_forecast=prophet_c.predict(forecast_c)
#print(confirmed_forecast[['ds','yhat', 'yhat_lower', 'yhat_upper']])

In [None]:
model_scores.append(np.sqrt(mean_squared_error(datewise["Confirmed"],confirmed_forecast['yhat'].head(datewise.shape[0]))))
print("Root Mean Squared Error for Prophet Model: ",np.sqrt(mean_squared_error(datewise["Confirmed"],confirmed_forecast['yhat'].head(datewise.shape[0]))))

In [None]:
print(prophet_c.plot(confirmed_forecast))

In [None]:
print(prophet_c.plot_components(confirmed_forecast))

###### Forecast of the 30 days

In [None]:
model_predictions

###### Summarization of Forecasts using different Models

In [None]:
model_names=["Linear Regression","Polynomial Regression","Support Vector Machine Regressor","Holt's Linear","Holt's Winter Model",
            "Auto Regressive Model (AR)","Moving Average Model (MA)","ARIMA Model","SARIMA Model"]#,"Facebook's Prophet Model"]
model_summary=pd.DataFrame(zip(model_names,model_scores),columns=["Model Name","Root Mean Squared Error"]).sort_values(["Root Mean Squared Error"])
model_summary

###### Time Series Forecasting for Death Cases

In [None]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["Deaths"],
                    mode='lines+markers',name="Death Cases"))
fig.update_layout(title="Death Cases",
                 xaxis_title="Date",yaxis_title="Number of Death Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
model_train=datewise.iloc[:int(datewise.shape[0]*0.95)]
valid=datewise.iloc[int(datewise.shape[0]*0.95):]
y_pred=valid.copy()

In [None]:
model_arima_deaths=auto_arima(model_train["Deaths"],trace=True, error_action='ignore', start_p=0,start_q=0,
                              max_p=2,max_q=2,suppress_warnings=True,stepwise=False,seasonal=False)     
model_arima_deaths.fit(model_train["Deaths"])

In [None]:
predictions_deaths=model_arima_deaths.predict(len(valid))
y_pred["ARIMA Death Prediction"]=predictions_deaths

In [None]:
print("Root Mean Square Error: ",np.sqrt(mean_squared_error(valid["Deaths"],predictions_deaths)))

In [None]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=model_train.index, y=model_train["Deaths"],
                    mode='lines+markers',name="Train Data for Death Cases"))
fig.add_trace(go.Scatter(x=valid.index, y=valid["Deaths"],
                    mode='lines+markers',name="Validation Data for Death Cases",))
fig.add_trace(go.Scatter(x=valid.index, y=y_pred["ARIMA Death Prediction"],
                    mode='lines+markers',name="Prediction for Death Cases",))
fig.update_layout(title="Death Cases ARIMA Model Prediction",
                 xaxis_title="Date",yaxis_title="Death Cases",legend=dict(x=0,y=1,traceorder="normal"), plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
ARIMA_model_death_forecast=[]
for i in range(1,30):
    ARIMA_model_death_forecast.append(model_arima_deaths.predict(len(valid)+i)[-1])

###### Prediction for top 5 affected states

In [None]:
states_df["Days Since"]=states_df.index-states_df.index[0]
states_df["Days Since"]=states_df["Days Since"].dt.days

results = pd.DataFrame()
selected = pd.DataFrame()
for i in df_india.sort_values('confirmed')["confirmed"].index[-5:]:
    model_scores=[]
    #train-test split
    train = states_df[states_df['State/UnionTerritory'] == i].iloc[:int(states_df[states_df['State/UnionTerritory'] == i].shape[0]*0.95)]
    valid = states_df[states_df['State/UnionTerritory'] == i].iloc[int(states_df[states_df['State/UnionTerritory'] == i].shape[0]*0.95):]
    
    #Linear Regression model
    lin_reg=LinearRegression(normalize=True)
    lin_reg.fit(np.array(train["Days Since"]).reshape(-1,1),np.array(train["Confirmed"]).reshape(-1,1))
    prediction_valid_linreg=lin_reg.predict(np.array(valid["Days Since"]).reshape(-1,1))
    model_scores.append(np.sqrt(mean_squared_error(valid["Confirmed"],prediction_valid_linreg)))
    
    #Polynomial Regression
    poly = PolynomialFeatures(degree = 10) 
    train_poly=poly.fit_transform(np.array(train["Days Since"]).reshape(-1,1))
    valid_poly=poly.fit_transform(np.array(valid["Days Since"]).reshape(-1,1))
    y=train["Confirmed"]
    linreg=LinearRegression(normalize=True)
    linreg.fit(train_poly,y)
    prediction_poly=linreg.predict(valid_poly)
    rmse_poly=np.sqrt(mean_squared_error(valid["Confirmed"],prediction_poly))
    model_scores.append(rmse_poly)
    
    #Support Vector Machine Model Regressor
    svm=SVR(C=1,degree=5,kernel='poly',epsilon=0.01)
    svm.fit(np.array(train["Days Since"]).reshape(-1,1),np.array(train["Confirmed"]).reshape(-1,1))
    prediction_valid_svm=svm.predict(np.array(valid["Days Since"]).reshape(-1,1))
    model_scores.append(np.sqrt(mean_squared_error(valid["Confirmed"],prediction_valid_svm)))
    
    #Time Series Forecasting
    
    y_pred=valid.copy()
    #Holt's Linear Model
    holt=Holt(np.asarray(train["Confirmed"])).fit(smoothing_level=0.1, smoothing_slope=1.0,optimized=False)
    y_pred["Holt"]=holt.forecast(len(valid))
    model_scores.append(np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["Holt"])))
    
    #Holt's Winter Model
    y_pred=valid.copy()
    es=ExponentialSmoothing(np.asarray(train['Confirmed']),seasonal_periods=4,trend='mul', seasonal='mul').fit()
    y_pred["Holt's Winter Model"]=es.forecast(len(valid))
    model_scores.append(np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["Holt's Winter Model"])))
    
    #AR Model (using AUTO ARIMA)
    model_ar= auto_arima(train["Confirmed"],trace=True, error_action='ignore', start_p=0,start_q=0,max_p=5,max_q=0,
                   suppress_warnings=True,stepwise=False,seasonal=False)
    model_ar.fit(train["Confirmed"])
    prediction_ar=model_ar.predict(len(valid))
    y_pred["AR Model Prediction"]=prediction_ar
    model_scores.append(np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["AR Model Prediction"])))
    
    #MA Model (using AUTO ARIMA)
    model_ma= auto_arima(train["Confirmed"],trace=True, error_action='ignore', start_p=0,start_q=0,max_p=0,max_q=5,
                   suppress_warnings=True,stepwise=False,seasonal=False)
    model_ma.fit(train["Confirmed"])
    prediction_ma=model_ma.predict(len(valid))
    y_pred["MA Model Prediction"]=prediction_ma
    model_scores.append(np.sqrt(mean_squared_error(valid["Confirmed"],prediction_ma)))
    
    #ARIMA Model (using AUTOARIMA)
    model_arima= auto_arima(train["Confirmed"],trace=True, error_action='ignore', start_p=1,start_q=1,max_p=3,max_q=3,
                   suppress_warnings=True,stepwise=False,seasonal=False)
    model_arima.fit(train["Confirmed"])
    prediction_arima=model_arima.predict(len(valid))
    y_pred["ARIMA Model Prediction"]=prediction_arima
    model_scores.append(np.sqrt(mean_squared_error(valid["Confirmed"],prediction_arima)))

    #SARIMA Model (using AUTO ARIMA)
    model_sarima= auto_arima(train["Confirmed"],trace=True, error_action='ignore', 
                         start_p=0,start_q=0,max_p=3,max_q=3,m=7,
                   suppress_warnings=True,stepwise=True,seasonal=True)
    model_sarima.fit(train["Confirmed"])
    prediction_sarima=model_sarima.predict(len(valid))
    y_pred["SARIMA Model Prediction"]=prediction_sarima
    model_scores.append(np.sqrt(mean_squared_error(y_pred["Confirmed"],y_pred["SARIMA Model Prediction"])))

    Prophet Model 
    prophet_c=Prophet(interval_width=0.95,weekly_seasonality=True,)
    prophet_confirmed=pd.DataFrame(zip(list(states_df[states_df['State/UnionTerritory'] == i].index),list(states_df[states_df['State/UnionTerritory'] == i]["Confirmed"])),columns=['ds','y'])
    prophet_c.fit(prophet_confirmed)
    forecast_c=prophet_c.make_future_dataframe(periods=17)
    forecast_confirmed=forecast_c.copy()
    confirmed_forecast=prophet_c.predict(forecast_c)
    model_scores.append(np.sqrt(mean_squared_error(states_df[states_df['State/UnionTerritory'] == i]["Confirmed"],confirmed_forecast['yhat'].head(states_df[states_df['State/UnionTerritory'] == i].shape[0]))))
#     print(prophet_c.plot(confirmed_forecast))
#     print(prophet_c.plot_components(confirmed_forecast))
    
    model_names=["Linear Regression","Polynomial Regression","Support Vector Machine Regressor","Holt's Linear","Holt's Winter Model",
            "Auto Regressive Model (AR)","Moving Average Model (MA)","ARIMA Model","SARIMA Model"]#,"Facebook's Prophet Model"]
    model_summary=pd.DataFrame(zip(model_names,model_scores),columns=["Model Name","Root Mean Squared Error"]).sort_values(["Root Mean Squared Error"])
    model_summary['State/UT'] = i
    results = results.append(model_summary, ignore_index = True)
    selected = selected.append(model_summary.iloc[0,:], ignore_index = True)

In [None]:
results

In [None]:
selected