|
| 1 | +## ----InitialBlock, echo = FALSE------------------------------------------ |
| 2 | +library(knitr) |
| 3 | +opts_knit$set(concordance=TRUE) |
| 4 | + |
| 5 | +## ----FluDataLoadData, echo = TRUE---------------------------------------- |
| 6 | +library(cdcfluview) |
| 7 | +library(dplyr) |
| 8 | +library(lubridate) |
| 9 | +library(ggplot2) |
| 10 | +library(grid) |
| 11 | +library(kcde) |
| 12 | + |
| 13 | +usflu<-get_flu_data("national", "ilinet", years=1997:2015) |
| 14 | +ili_national <- transmute(usflu, |
| 15 | + region.type = REGION.TYPE, |
| 16 | + region = REGION, |
| 17 | + year = YEAR, |
| 18 | + week = WEEK, |
| 19 | + total_cases = as.numeric(X..WEIGHTED.ILI)) |
| 20 | +ili_national$time <- ymd(paste(ili_national$year, "01", "01", sep = "-")) |
| 21 | +week(ili_national$time) <- ili_national$week |
| 22 | +ili_national$time_index <- seq_len(nrow(ili_national)) |
| 23 | + |
| 24 | +str(ili_national) |
| 25 | + |
| 26 | +## ----FluDataInitialPlotTotalCases, echo = TRUE--------------------------- |
| 27 | +ggplot() + |
| 28 | + geom_line(aes(x = as.Date(time), y = total_cases), data = |
| 29 | +ili_national) + |
| 30 | + geom_vline(aes(xintercept = as.numeric(as.Date(time))), |
| 31 | + colour = "grey", |
| 32 | + data = ili_national[is.na(ili_national$total_cases), ]) + |
| 33 | + scale_x_date() + |
| 34 | + xlab("Time") + |
| 35 | + ylab("Total Cases") + |
| 36 | + theme_bw() |
| 37 | + |
| 38 | +## ----FluDataHistogramPlotTotalCases, echo = TRUE------------------------- |
| 39 | +hist_df <- rbind( |
| 40 | + data.frame(value = ili_national$total_cases, |
| 41 | + variable = "Total Cases"), |
| 42 | + data.frame(value = log(ili_national$total_cases), |
| 43 | + variable = "log(Total Cases)") |
| 44 | +) |
| 45 | + |
| 46 | +ggplot(aes(x = value), data = hist_df) + |
| 47 | + geom_histogram() + |
| 48 | + facet_wrap( ~ variable, ncol = 2) + |
| 49 | + xlab("Total Cases") + |
| 50 | + theme_bw() |
| 51 | + |
| 52 | +## ----FluDataACFPlotTotalCases, echo = TRUE------------------------------- |
| 53 | +last_na_ind <- max(which(is.na(ili_national$total_cases))) |
| 54 | +non_na_inds <- seq(from = last_na_ind + 1, to=nrow(ili_national)) |
| 55 | +acf(ili_national$total_cases[non_na_inds], |
| 56 | + lag.max = 52 * 4) |
| 57 | + |
| 58 | +## ----FluDataKernelComponentsSetup, echo = TRUE--------------------------- |
| 59 | +## Definitions of kernel components. A couple of notes: |
| 60 | +## 1) In the current implementation, it is required that separate kernel |
| 61 | +## components be used for lagged (predictive) variables and for leading |
| 62 | +## (prediction target) variables. |
| 63 | +## 2) The current syntax is verbose; in a future version of the package, |
| 64 | +## convenience functions may be provided. |
| 65 | + |
| 66 | +## Define kernel components -- 3 pieces: |
| 67 | +## 1) Periodic kernel acting on time index |
| 68 | +## 2) pdtmvn kernel acting on lagged total cases (predictive) -- all continuous |
| 69 | +## 3) pdtmvn kernel acting on lead total cases (prediction target) -- all continuous |
| 70 | +kernel_components <- list( |
| 71 | + list( |
| 72 | + vars_and_offsets = data.frame(var_name = "time_index", |
| 73 | + offset_value = 0L, |
| 74 | + offset_type = "lag", |
| 75 | + combined_name = "time_index_lag0", |
| 76 | + stringsAsFactors = FALSE), |
| 77 | + kernel_fn = periodic_kernel, |
| 78 | + theta_fixed = list(period=pi / 52.2), |
| 79 | + theta_est = list("bw"), |
| 80 | + initialize_kernel_params_fn = initialize_params_periodic_kernel, |
| 81 | + initialize_kernel_params_args = NULL, |
| 82 | + vectorize_kernel_params_fn = vectorize_params_periodic_kernel, |
| 83 | + vectorize_kernel_params_args = NULL, |
| 84 | + update_theta_from_vectorized_theta_est_fn = update_theta_from_vectorized_theta_est_periodic_kernel, |
| 85 | + update_theta_from_vectorized_theta_est_args = NULL |
| 86 | + ), |
| 87 | + list( |
| 88 | + vars_and_offsets = data.frame(var_name = "total_cases", |
| 89 | + offset_value = 1L, |
| 90 | + offset_type = "horizon", |
| 91 | + combined_name = "total_cases_horizon1", |
| 92 | + stringsAsFactors = FALSE), |
| 93 | + kernel_fn = pdtmvn_kernel, |
| 94 | + rkernel_fn = rpdtmvn_kernel, |
| 95 | + theta_fixed = list( |
| 96 | + parameterization = "bw-diagonalized-est-eigenvalues", |
| 97 | + continuous_vars = "total_cases_horizon1", |
| 98 | + discrete_vars = NULL, |
| 99 | + discrete_var_range_fns = NULL, |
| 100 | + lower = -Inf, |
| 101 | + upper = Inf |
| 102 | + ), |
| 103 | + theta_est = list("bw"), |
| 104 | + initialize_kernel_params_fn = initialize_params_pdtmvn_kernel, |
| 105 | + initialize_kernel_params_args = NULL, |
| 106 | + vectorize_kernel_params_fn = vectorize_params_pdtmvn_kernel, |
| 107 | + vectorize_kernel_params_args = NULL, |
| 108 | + update_theta_from_vectorized_theta_est_fn = update_theta_from_vectorized_theta_est_pdtmvn_kernel, |
| 109 | + update_theta_from_vectorized_theta_est_args = NULL |
| 110 | + )) |
| 111 | +#, |
| 112 | +# list( |
| 113 | +# vars_and_lags = vars_and_lags[3:5, ], |
| 114 | +# kernel_fn = pdtmvn_kernel, |
| 115 | +# rkernel_fn = rpdtmvn_kernel, |
| 116 | +# theta_fixed = NULL, |
| 117 | +# theta_est = list("bw"), |
| 118 | +# initialize_kernel_params_fn = initialize_params_pdtmvn_kernel, |
| 119 | +# initialize_kernel_params_args = list( |
| 120 | +# continuous_vars = vars_and_lags$combined_name[3:4], |
| 121 | +# discrete_vars = vars_and_lags$combined_name[5], |
| 122 | +# discrete_var_range_fns = list( |
| 123 | +# c_lag2 = list(a = pdtmvn::floor_x_minus_1, b = floor, in_range = pdtmvn::equals_integer, discretizer = round_up_.5)) |
| 124 | +# ), |
| 125 | +# vectorize_theta_est_fn = vectorize_params_pdtmvn_kernel, |
| 126 | +# vectorize_theta_est_args = NULL, |
| 127 | +# update_theta_from_vectorized_theta_est_fn = update_theta_from_vectorized_theta_est_pdtmvn_kernel, |
| 128 | +# update_theta_from_vectorized_theta_est_args = list( |
| 129 | +# parameterization = "bw-diagonalized-est-eigenvalues" |
| 130 | +# ) |
| 131 | +# )) |
| 132 | + |
| 133 | +## ----FluDataKCDESetup, echo=TRUE----------------------------------------- |
| 134 | +kcde_control <- create_kcde_control(X_names = "time_index", |
| 135 | + y_names = "total_cases", |
| 136 | + time_name = "time", |
| 137 | + prediction_horizons = 1L, |
| 138 | + kernel_components = kernel_components, |
| 139 | + crossval_buffer = ymd("2010-01-01") - ymd("2009-01-01"), |
| 140 | + loss_fn = neg_log_score_loss, |
| 141 | + loss_fn_prediction_args = list( |
| 142 | + prediction_type = "distribution", |
| 143 | + log = TRUE), |
| 144 | + filter_control <- NULL, |
| 145 | + loss_args = NULL, |
| 146 | + prediction_inds_not_included = c()) |
| 147 | + |
| 148 | +## ----FluDataKCDEEstimation, echo=TRUE------------------------------------ |
| 149 | +# flu_kcde_fit_orig_scale <- kcde(data = ili_national[ili_national$year <= 2014, ], |
| 150 | +# kcde_control = kcde_control) |
| 151 | + |
| 152 | +## ----FluDataKCDEPredictiveSampleAndPlot, echo=TRUE----------------------- |
| 153 | +# predictive_sample <- kcde_predict(kcde_fit = flu_kcde_fit_orig_scale, |
| 154 | +# prediction_data = ili_national[ili_national$year == 2014 & ili_national$week == 53, , drop = FALSE], |
| 155 | +# leading_rows_to_drop = 0, |
| 156 | +# trailing_rows_to_drop = 1L, |
| 157 | +# additional_training_rows_to_drop = NULL, |
| 158 | +# prediction_type = "sample", |
| 159 | +# n = 10000L) |
| 160 | + |
| 161 | +# ggplot() + |
| 162 | +# geom_density(aes(x = predictive_sample)) + |
| 163 | +# geom_vline(aes(xintercept = ili_national$total_cases[ili_national$year == 2015 & ili_national$week == 1]), |
| 164 | +# colour = "red") + |
| 165 | +# xlab("Total Cases") + |
| 166 | +# ylab("Predictive Density") + |
| 167 | +# ggtitle("Realized total cases vs. one week ahead predictive density\nWeek 1 of 2015") + |
| 168 | +# theme_bw() |
| 169 | + |
0 commit comments