-
Notifications
You must be signed in to change notification settings - Fork 0
/
NepalCoronaINFPrediction.Rmd
342 lines (293 loc) · 10.2 KB
/
NepalCoronaINFPrediction.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
---
title: "Prediction for Possible Daily New Cases of Corona Infection"
auther: "R.K.Mallik PhD Condidate Civil Engineering,IOE Pulchowk Campus "
output:
html_document: default
pdf_document: default
always_allow_html: true
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
R.K.Mallik PhD Condidate Civil Engineering,IOE Pulchowk Campus
### Import the Important Library
```{r import libraries}
library(plotly)
library(TTR)
library(tidyr)
library(dplyr)
library(ggplot2)
library(forecast)
```
### Reading Data
```{r read csv file}
coronaCases<-read.csv(file = "coronaNepal1.csv")
data1<-data.frame(coronaCases)
str(data1)
y<-data1$NewCases
length(y)
total_cases<-cumsum(y)
length(total_cases)
```
### Creating Time Series File for the Observations
```{r}
corona_ts<-ts(y,frequency=1)
total_cases_ts<-ts(total_cases,frequency=1)
corona_ts
time(corona_ts)
plot(corona_ts)
acf(corona_ts)
plot(diff(corona_ts))
acf(diff(corona_ts))
```
### Creating List of date corresponding to observations
```{r}
today <- Sys.Date()
tm <- seq(0,length(y)-1, by = +1)
x <- as.Date("2020-04-1") + tm
x
```
### Ploting observations vs date in plotly package
```{r using plotly for ploting observed data}
plot_ly(x = ~x, y = ~y,
mode = 'line',
xlab = 'Date',
ylab = 'No of New Cases',
main = 'Corona Daily Infection Observations',
text = paste(today-x, "days from today"))
plot_ly(x = ~x, y = ~cumsum(y),
mode = 'line',
xlab = 'Date',
ylab = 'Cumulative Cases',
main = 'Corona Cumulative Cases Of Infection',
text = paste(today-x, "days from today"))
```
### Isolating Oscillating part from the observed data
```{r}
corona_ts_noise = diff(corona_ts)
z_n<-corona_ts_noise
z_n
today <- Sys.Date()
tm <- seq(1,length(y)-1, by = +1)
tm
x <- as.Date("2020-04-1") + tm
plot_ly(x = ~x, y = z_n,
mode = 'line',
xlab = 'Date',
ylab = 'No of New Cases',
main = 'noise from the daily Observations data',
text = paste(today-x, "days from today"))
plot_ly(x = ~x, y = cumsum(z_n),
mode = 'line',
xlab = 'Date',
ylab = 'Cumulative Noise Cases',
main = 'noise from Cumulative cases of data',
text = paste(today-x, "days from today"))
```
### Isolating Trend part from the observation
```{r}
corona_ts_trend<-corona_ts-corona_ts_noise
z_t<-corona_ts_trend
plot_ly(x = ~x, y = z_t,
mode = 'line',
xlab = 'Date',
ylab = 'No of New Cases(Trend)',
main = 'Trend part of the Daily Corona Infections',
text = paste(today-x, "days from today"))
plot_ly(x = ~x, y = cumsum(z_t),
mode = 'line',
xlab = 'Date',
ylab = 'Cumulative No of Cases(Trend)',
main = 'Trend part of the Cumulative Corona Infections',
text = paste(today-x, "days from today"))
```
### DECOMPOSE TIME SERIES INTO TREND AND NOISE
```{r}
#corona_ts_decompose<-decompose(corona_ts)
```
### HOLTWINTER FORECASTING
```{r}
plot(corona_ts,main="Daily New Cases of Corona Infection")
acf(corona_ts,main="ACF of Daily New Cases")
acf(corona_ts,type="partial",main="PACF of Daily New Cases of Infection")
corona_ts_forecasts_hw <- HoltWinters(corona_ts,gamma=FALSE)
# beta= trend, gamma=Seasionality
corona_ts_forecasts_hw
corona_ts_forecasts_hw$fitted
plot(corona_ts_forecasts_hw,main='Nepal,Daily New Cases Observation vs Fitted Model',
xlab = 'Date Count starts from April,2,2020',
ylab ='Daily New Cases of Corona Infection')
corona_ts_forecasts_hw$SSE
# Total Cases
corona_ts_forecasts_total_cases_hw <- HoltWinters(total_cases_ts,gamma=FALSE)
corona_ts_forecasts_total_cases_hw
corona_ts_forecasts_total_cases_hw$fitted
plot(corona_ts_forecasts_total_cases_hw,main='Nepal,Cumulative Cases Observation vs Fitted Model',
xlab = 'Date Count starts from April,2,2020',
ylab ='Cumulative Cases of Corona Infection')
corona_ts_forecasts_total_cases_hw$SSE
```
### ONE WEEAK FORCASTED DATA
```{r}
library("forecast")
corona_ts_forecasts_hw2 <- forecast(corona_ts_forecasts_hw,h=8)
corona_ts_forecasts_hw2
plot(corona_ts_forecasts_hw2,main='Arima Model Forecast for one Week(6 June to 13 June)',
xlab = 'Date Count starts from April,2,2020',
ylab ='Daily New Cases of Corona Infection')
acf(corona_ts_forecasts_hw2)
# Total Cases
corona_ts_forecasts_total_cases_hw2 <- forecast(corona_ts_forecasts_total_cases_hw,h=30)
corona_ts_forecasts_total_cases_hw2
plot(corona_ts_forecasts_total_cases_hw2,main='HotWinter Model Forecast for one Week(6 June to 13 June)',
xlab = 'Date Count starts from April,2,2020',
ylab ='Cumulative Cases of Corona Infection')
```
### Plotting Residulas and lag
```{r}
acf(corona_ts_forecasts_hw2,lag.max=20)
plot.ts(corona_ts_forecasts_hw2$residuals)
```
### HOLT'S EXPONENTIAL SMOOTHING
```{r}
corona_ts_forecasts_exp <- HoltWinters(corona_ts,gamma=FALSE)
corona_ts_forecasts_exp
corona_ts_forecasts_exp$fitted
plot(corona_ts_forecasts_exp,main='Holts Exponential Model',
xlab = 'Date Count starts from April1,2020',
ylab ='Daily New Cases of Corona Infection')
corona_ts_forecasts_exp$SSE
# Total Cases..
corona_ts_forecasts_total_cases_exp <- HoltWinters(total_cases_ts,gamma=FALSE)
corona_ts_forecasts_total_cases_exp
corona_ts_forecasts_total_cases_exp$fitted
plot(corona_ts_forecasts_total_cases_exp,main='Holts Exponential Model',
xlab = 'Date Count starts from April1,2020',
ylab ='Cumulative Cases of Corona Infection')
```
### FORCASTED DATA CORRESPONDING TO EXPONENTIAL MODEL
```{r}
corona_ts_forecasts_exp2 <- forecast(corona_ts_forecasts_exp,h=8)
corona_ts_forecasts_exp2
plot(corona_ts_forecasts_exp2,main='Exponential Model Forecast for one Week(9June to 16 June)',
xlab = 'Date Count starts from April1,2020',
ylab ='Daily New Cases of Corona Infection')
acf(corona_ts_forecasts_exp2,lag.max=20)
Box.test(corona_ts_forecasts_exp2$residuals,lag=20,type='Ljung-Box')
plot.ts(corona_ts_forecasts_exp2$residuals)
# Total Cases Fore cast for one week
corona_ts_forecasts_total_cases_exp2 <- forecast(corona_ts_forecasts_total_cases_exp,h=8)
corona_ts_forecasts_total_cases_exp2
plot(corona_ts_forecasts_total_cases_exp2,main='Exponential Model Forecast for one Week(9June to 16 June)',
xlab = 'Date Count starts from April1,2020',
ylab ='Cumulative Cases of Corona Infection')
# Total Cases Fore cast for one month
corona_ts_forecasts_total_cases_exp2_month <- forecast(corona_ts_forecasts_total_cases_exp,h=30)
corona_ts_forecasts_total_cases_exp2_month
plot(corona_ts_forecasts_total_cases_exp2_month,main='Exponential Model Forecast for one Month(6June to 6 July)',
xlab = 'Date Count starts from April1,2020',
ylab ='Cumulative Cases of Corona Infection')
# Total Cases Fore cast for two month
corona_ts_forecasts_total_cases_exp2_month <- forecast(corona_ts_forecasts_total_cases_exp,h=90)
corona_ts_forecasts_total_cases_exp2_month
plot(corona_ts_forecasts_total_cases_exp2_month,main='Exponential Model Forecast for one Week(6June to 6 September)',
xlab = 'Date Count starts from April1,2020',
ylab ='Cumulative Cases of Corona Infection')
```
### EXTENDING EXPONENTIAL MODEL TO PREDICT SATURATION LEVEL
```{r}
today <- Sys.Date()
tm <- seq(0,length(y)-1, by = +1)
x <- as.Date("2020-04-1") + tm
x
length(total_cases)
data2<-data.frame(x,total_cases)
ggplot(data2,aes(x=x,y=total_cases/3000))+geom_point()+
stat_smooth(method="glm",method.args=list(family="binomial"),se=FALSE)
#par(mar=c(4,4,1,1))
#plot(data2$x,data2$total_cases)
```
#```{r}
# Assume effective time between generations
#t = 3
# Cumulative Data Ponts after gap of t days
#total_cases
#for (i in 1:length(total_cases)){
# total_slope[i] <- total_cases[i+1]-total_cases[i]
#}
#total_slope
#for (i in seq(length(total_cases),2,by=-t)){
# theta[i]<-(1/t)*log(total_cases[i]/total_cases[i-t])
#
#}
#theta
#theta1<-na.omit(theta)
#theta1
#theta2<-mean(theta1)
#theta2
# Saturation Value
#t = 1:length(total_cases)
#M<-total_cases/(1-total_cases*exp(theta1*t))
#M = 10000
#no = 1
#Nt<-M/(1+((M-no)/no)*exp(-theta2*t))
#Nt
#plot(t,Nt)
#```
## ARIMA MODEL
```{r}
diff_corona_ts<-diff(corona_ts)
plot(diff_corona_ts)
acf(diff_corona_ts)
pacf(diff_corona_ts)
# automatic selection of p,d,andq
auto.arima(diff_corona_ts)
corona_ts_arima<-arima(diff_corona_ts,order=c(2,0,2))
library(forecast)
# Weekly Forecast
corona_ts_arima<-arima(corona_ts,order=c(2,1,2))
corona_ts_arima_forecast<-forecast(corona_ts_arima)
corona_ts_arima_forecast
plot(corona_ts_arima_forecast,
xlab='Date Counts from April,01,2020',
ylab='Expected Daily New Cases',
main='ARIMA Model for the prediction from 9 June to 16 June')
# Monthly Forecast
# Total Cases...
auto.arima(total_cases_ts)
corona_ts_arima_total<-arima(total_cases_ts,order=c(3,2,1))
corona_ts_arima_total
corona_ts_arima_forecast_total<-forecast(corona_ts_arima_total,h=30)
corona_ts_arima_forecast_total
plot(corona_ts_arima_forecast_total,
xlab='Date Counts from April,01,2020',
ylab='Cumulative Cases',
main='ARIMA Model for the prediction from 6 June to 6 July')
# Three Month Forcast
corona_ts_arima_forecast_total<-forecast(corona_ts_arima_total,h=90)
corona_ts_arima_forecast_total
plot(corona_ts_arima_forecast_total,
xlab='Date Counts from April,01,2020',
ylab='Cumulative Cases',
main='ARIMA Model for the prediction from 6 June to 6 August')
```
### CONCLUSION FROM EXPONENTIAL MODEL
The prediction most likely to follow in between Lo95% to Hi95% Output
Date count starts from 2 June.....
```{r}
## One Week Daily New Cases
corona_ts_forecasts_exp2
plot(corona_ts_forecasts_exp2,main='Exponential Model Forecast for one Week(6June to 13 June)',
xlab = 'Date Count starts from April1,2020',
ylab ='Daily New Cases of Corona Infection')
## One Week Cumulative Cases
corona_ts_forecasts_total_cases_exp2
plot(corona_ts_forecasts_total_cases_exp2,main='Exponential Model Forecast for one Week(9June to 16 June)',
xlab = 'Date Count starts from April1,2020',
ylab ='Cumulative Cases of Corona Infection')
#Total Cases for the month
corona_ts_forecasts_total_cases_exp2_month
plot(corona_ts_forecasts_total_cases_exp2_month,main='Exponential Model Forecast for one Week(6June to 6 August)',
xlab = 'Date Count starts from April1,2020',
ylab ='Cumulative Cases of Corona Infection')
```