# Cornona Virus Active Rate prediction

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go 
import seaborn as sns
import plotly
import plotly.express as px
from fbprophet.plot import plot_plotly
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot, plot_mpl
import plotly.offline as py
init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings('ignore')
import os

### Uploading Data from Kaggle Noval Corona Virus Dataset

In [2]:
df1=pd.read_csv('covid_19_data.csv',parse_dates=['ObservationDate'])

In [3]:
list(df1)

['SNo',
 'ObservationDate',
 'Province/State',
 'Country/Region',
 'Last Update',
 'Confirmed',
 'Deaths',
 'Recovered']

In [4]:
df1["Active"]=df1['Confirmed']-df1['Deaths']-df1['Recovered']

df1['Country/Region']=df1['Country/Region'].replace('Mainland China', 'China')

df1['Province/State']=df1['Province/State'].fillna('')

df1['Confirmed']=df1['Confirmed'].astype(int)
df1['Deaths']=df1['Deaths'].astype(int)
df1['Recovered']=df1['Recovered'].astype(int)
df1['Active']=df1['Active'].astype(int)

### Gradiant Color Mapping

In [5]:
df1.head(10).style.background_gradient(cmap='PRGn')

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active
0,1,2020-01-22 00:00:00,Anhui,China,1/22/2020 17:00,1,0,0,1
1,2,2020-01-22 00:00:00,Beijing,China,1/22/2020 17:00,14,0,0,14
2,3,2020-01-22 00:00:00,Chongqing,China,1/22/2020 17:00,6,0,0,6
3,4,2020-01-22 00:00:00,Fujian,China,1/22/2020 17:00,1,0,0,1
4,5,2020-01-22 00:00:00,Gansu,China,1/22/2020 17:00,0,0,0,0
5,6,2020-01-22 00:00:00,Guangdong,China,1/22/2020 17:00,26,0,0,26
6,7,2020-01-22 00:00:00,Guangxi,China,1/22/2020 17:00,2,0,0,2
7,8,2020-01-22 00:00:00,Guizhou,China,1/22/2020 17:00,1,0,0,1
8,9,2020-01-22 00:00:00,Hainan,China,1/22/2020 17:00,4,0,0,4
9,10,2020-01-22 00:00:00,Hebei,China,1/22/2020 17:00,1,0,0,1


In [6]:
# Checking for Null values
df1=df1.drop(['SNo', 'Last Update'], axis=1)
df1=df1.rename(columns={'ObservationDate': 'Date', 'Province/State': 'State', 'Country/Region':'Country'})
df1.isnull().sum()


Date         0
State        0
Country      0
Confirmed    0
Deaths       0
Recovered    0
Active       0
dtype: int64

In [7]:
#Creating Temporary data set to Analyse the value counts under each category based on date
temp=df1.groupby('Date')[('Confirmed','Deaths','Recovered','Active')].sum().reset_index()

In [8]:
temp=temp.sort_values('Date', ascending=False)
temp['Confirmed']=temp['Confirmed'].astype(int)
temp['Deaths']=temp['Deaths'].astype(int)
temp['Recovered']=temp['Recovered'].astype(int)
temp.head().style.background_gradient(cmap='PRGn')


Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active
48,2020-03-10 00:00:00,118582,4262,64404,49916
47,2020-03-09 00:00:00,113582,3996,62512,47074
46,2020-03-08 00:00:00,109835,3803,60695,45337
45,2020-03-07 00:00:00,105836,3558,58359,43919
44,2020-03-06 00:00:00,101800,3460,55866,42474


In [9]:
#Confirmed ALL
fig = go.Figure()
fig.update_layout(template='plotly_dark')
fig.add_trace(go.Scatter(x=temp['Date'], 
                         y=temp['Confirmed'],
                         mode='lines+markers',
                         name='Confirmed',
                         line=dict(color='Yellow', width=2)))
fig.add_trace(go.Scatter(x=temp['Date'], 
                         y=temp['Deaths'],
                         mode='lines+markers',
                         name='Deaths',
                         line=dict(color='Red', width=2)))
fig.add_trace(go.Scatter(x=temp['Date'], 
                         y=temp['Recovered'],
                         mode='lines+markers',
                         name='Recovered',
                         line=dict(color='Green', width=2)))
fig.add_trace(go.Scatter(x=temp['Date'], 
                         y=temp['Active'],
                         mode='lines+markers',
                         name='Active',
                         line=dict(color='Blue', width=2)))

fig.show()


In [10]:
# Visualising Confirm and Active case only

china_vs_rest=df1.copy()
china_vs_rest=china_vs_rest[china_vs_rest['Country']!='China']
china_vs_rest=china_vs_rest.groupby(['Date','Country'])['Confirmed','Active'].agg(sum)
china_vs_rest.columns=['Confirmed All', 'Active All']
china_vs_rest=china_vs_rest.reset_index()
#Confirmed Cases
fig1 = px.line(china_vs_rest, x="Date", y="Confirmed All", color="Country",
              line_group="Country", hover_name="Country")
fig1.update_layout(template='plotly_dark')
#Active Cases
fig2 = px.line(china_vs_rest, x="Date", y="Active All", color="Country",
              line_group="Country", hover_name="Country")
fig2.update_layout(template='plotly_dark')
 

In [11]:
#Confirmed Cases
fig1

In [12]:
grouped_multiple=df1.groupby('Date').agg({'Deaths':'sum', 'Recovered':'sum','Active':'sum','Confirmed':'sum'})
grouped_multiple.columns=['Death All', 'Recovered All', 'Active All', 'Confirmed All']
grouped_mulitple=grouped_multiple.reset_index()
grouped_multiple['Difference_world']=grouped_multiple['Confirmed All'].diff().shift(-1)

grouped_multiple['Death_All%']=grouped_multiple.apply(lambda row: ((row['Death All'])/(row['Confirmed All']))*100, axis=1)
grouped_multiple['Active_All%']=grouped_multiple.apply(lambda row: ((row['Active All'])/(row['Confirmed All']))*100, axis=1)
grouped_multiple['Recovered_All%']=grouped_multiple.apply(lambda row: ((row['Recovered All'])/(row['Confirmed All']))*100, axis=1)
grouped_multiple['World_Growth_Rate']=grouped_multiple.apply(lambda row: ((row['Difference_world'])/(row['Confirmed All']))*100, axis=1)

In [13]:
grouped_multiple=grouped_multiple.reset_index()

In [14]:
fig=go.Figure()

fig.add_trace(go.Scatter(x=grouped_multiple['Date'], 
                         y=grouped_multiple['Death_All%'],
                         mode='lines+markers',
                         name='Death Rate',
                         line=dict(color='red', width=2)))
fig.add_trace(go.Scatter(x=grouped_multiple['Date'], 
                         y=grouped_multiple['Recovered_All%'],
                         mode='lines+markers',
                         name='Recovered Rate',
                         line=dict(color='Green', width=2)))
fig.add_trace(go.Scatter(x=grouped_multiple['Date'], 
                         y=grouped_multiple['World_Growth_Rate'],
                         mode='lines+markers',
                         name='Growth Rate Confirmed',
                         line=dict(color='Yellow', width=2)))
fig.add_trace(go.Scatter(x=grouped_multiple['Date'], 
                         y=grouped_multiple['Active_All%'],
                         mode='lines+markers',
                         name='Active Rate',
                         line=dict(color='Blue', width=2)))
fig.update_layout(template='plotly_dark')
fig.show()
grouped_multiple.tail()

Unnamed: 0,Date,Death All,Recovered All,Active All,Confirmed All,Difference_world,Death_All%,Active_All%,Recovered_All%,World_Growth_Rate
44,2020-03-06,3460,55866,42474,101800,4036.0,3.398821,41.722986,54.878193,3.964637
45,2020-03-07,3558,58359,43919,105836,3999.0,3.361805,41.497222,55.140973,3.778487
46,2020-03-08,3803,60695,45337,109835,3747.0,3.462466,41.277371,55.260163,3.411481
47,2020-03-09,3996,62512,47074,113582,5000.0,3.518163,41.444947,55.03689,4.402106
48,2020-03-10,4262,64404,49916,118582,,3.594137,42.094078,54.311784,


In [15]:
# Checking if the growth rate is accelerating or decellerating
mortality=df1.copy()

mortality = mortality.groupby(['Date','Country']).agg({'Confirmed':['sum'], 'Recovered':['sum'],'Deaths':['sum'],'Active':['sum']})
mortality.columns=['Confirmed All', 'Recovered All', 'Death All','Active All']
mortality=mortality.reset_index()
mortality=mortality[mortality['Death All']!=0]
mortality=mortality[mortality['Confirmed All']!=0]
mortality=mortality[mortality['Active All']!=0]
mortality['mortality rate']=mortality.apply(lambda row:((row['Death All']+1)/(row['Confirmed All']+1))*100,axis=1)
#filtering out where mortality rate is more than 10
d= mortality[mortality['mortality rate'] <10]

# Filtering the data to take into consideration those contry with more than 100 confirmed cases
dd=d[d['Confirmed All']>100]
fig=px.line(dd, x='Date', y='mortality rate', color='Country',line_group='Country',hover_name='Country')
fig.update_layout(template='plotly_dark')
fig.show(0)

In [16]:

# Checking if the Active Rate is accelerating or decellerating
act_1=df1.copy()

act_1 = act_1.groupby(['Date','Country']).agg({'Confirmed':['sum'], 'Recovered':['sum'],'Deaths':['sum'],'Active':['sum']})
act_1.columns=['Confirmed All', 'Recovered All', 'Death All','Active All']
act_1=act_1.reset_index()
act_1=act_1[act_1['Death All']!=0]
act_1=act_1[act_1['Confirmed All']!=0]
act_1=act_1[act_1['Active All']!=0]
act_1['active rate']=act_1.apply(lambda row:((row['Active All']+1)/(row['Confirmed All']+1))*100,axis=1)
#filtering out where mortality rate is more than 10
a= act_1[act_1['active rate'] >10]
# print(a.head(10))
# Filtering the data to take into consideration those contry with more than 100 confirmed cases
aa=a[a['Confirmed All']>100]
fig=px.line(aa, x='Date', y='active rate', color='Country',line_group='Country',hover_name='Country')
fig.update_layout(template='plotly_dark')
fig.show(0)

In [17]:

# Checking if the Active Rate is accelerating or decellerating
recovered_1=df1.copy()
recovered_1 = recovered_1.groupby(['Date','Country']).agg({'Confirmed':['sum'], 'Recovered':['sum'],'Deaths':['sum'],'Active':['sum']})

# recovered_1.head(10)
recovered_1.columns=['Confirmed All', 'Recovered All', 'Death All','Active All']
recovered_1=recovered_1.reset_index()
recovered_1=recovered_1[recovered_1['Recovered All']!=0]
recovered_1=recovered_1[recovered_1['Confirmed All']!=0]
recovered_1=recovered_1[recovered_1['Active All']!=0]
recovered_1=recovered_1[recovered_1['Death All']!=0]
recovered_1['recovered rate']=recovered_1.apply(lambda row:((row['Recovered All']+1)/(row['Confirmed All']+1))*100,axis=1)
# #filtering out where recovered rate is more than 10
a2= recovered_1[recovered_1['recovered rate'] > 10]
# # print(a.head(10))
# # Filtering the data to take into consideration those contry with more than 100 confirmed cases
aa1=a2[a2['Confirmed All']>100]
fig=px.line(aa1, x='Date', y='recovered rate', color='Country',line_group='Country',hover_name='Country')
fig.update_layout(template='plotly_dark')
fig.show(0)

In [18]:
from fbprophet.plot import plot_plotly
from fbprophet.plot import add_changepoints_to_plot

In [19]:
#Modelling Active Cases 
floorVar=0.8
worldPop=25000

#Modelled total Confirmed Cases
active_training_dataset=pd.DataFrame(df1.groupby('Date')['Active'].agg('sum').reset_index()).rename(columns={'Date':'ds','Active':'y'})
active_training_dataset['floor']=active_training_dataset.y*floorVar
active_training_dataset['cap']=active_training_dataset.y+worldPop

# Modelling Mortality Rate
mortality_training_dataset=pd.DataFrame(mortality.groupby('Date')['mortality rate'].agg('mean').reset_index()).rename(columns={'Date':'ds','mortality rate':'y'})

# Modelling Death
death_training_dataset=pd.DataFrame(df1.groupby('Date')['Deaths'].agg('sum').reset_index()).rename(columns={'Date':'ds','Deaths':'y'})

In [20]:
m=Prophet(interval_width=0.90,
    changepoint_prior_scale=0.05,
    changepoint_range=0.9,
    yearly_seasonality=False,
    weekly_seasonality=False,
    daily_seasonality=True,
    seasonality_mode='additive'
    )

m.fit(active_training_dataset)
future=m.make_future_dataframe(periods=61)
future['cap']=active_training_dataset.y+worldPop
future['floor']=active_training_dataset.y*floorVar
active_forcast=m.predict(future)

#Mortality rate
m_mortality= Prophet()
m_mortality.fit(mortality_training_dataset)
mortality_future=m_mortality.make_future_dataframe(periods=31)
mortality_forecast=m_mortality.predict(mortality_future)

#Death Rate 
m2=Prophet(interval_width=0.9)
m2.fit(death_training_dataset)
future2=m2.make_future_dataframe(periods=7)
death_forecast=m2.predict(future2)

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [21]:
fig = plot_plotly(m, active_forcast)
annotations = []
annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.05,
                              xanchor='left', yanchor='bottom',
                              text='Predictions for Total Active cases',
                              font=dict(family='Arial',
                                        size=30,
                                        color='rgb(37,37,37)'),
                              showarrow=False))
fig.update_layout(annotations=annotations)
fig

In [22]:
fig = plot_plotly(m_mortality, mortality_forecast)
annotations = []
annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.05,
                              xanchor='left', yanchor='bottom',
                              text='Predictions for mortality rate',
                              font=dict(family='Arial',
                                        size=30,
                                        color='rgb(37,37,37)'),
                              showarrow=False))
fig.update_layout(annotations=annotations)
fig

In [23]:
fig_death = plot_plotly(m2, death_forecast)  
annotations = []
annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.05,
                              xanchor='left', yanchor='bottom',
                              text='Predictions for Deaths',
                              font=dict(family='Arial',
                                        size=30,
                                        color='rgb(37,37,37)'),
                              showarrow=False))
fig_death.update_layout(annotations=annotations)
fig_death