Skip to content
This repository was archived by the owner on Aug 29, 2024. It is now read-only.

Commit 6f8c8e5

Browse files
committed
Merge branch 'master' of https://github.com/reichlab/kcde
2 parents cbc9e5b + a0cb272 commit 6f8c8e5

13 files changed

+571
-57
lines changed

.Rbuildignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ method-description
22
^.*\.Rproj$
33
^\.Rproj\.user$
44
.gitignore
5-
*.org
6-
^\.travis\.yml$
5+
^\.travis\.yml$
6+
^codecov\.yml$

.gitignore

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,11 @@
22
.Rhistory
33
.RData
44

5-
*.Rproj
6-
75
data
86
data-raw
9-
107
*.bbl
118
*.log
129
*.synctex.gz
1310
*concordance.tex
14-
1511
*.so
1612
*.o
17-
18-
*.org

.travis.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,6 @@
33
language: R
44
sudo: false
55
cache: packages
6+
7+
after_success:
8+
- Rscript -e 'covr::codecov()'

DESCRIPTION

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: kcde
22
Title: Kernel conditional density estimation with flexible kernel
3-
specifications.
3+
specifications
44
Version: 0.0.0.9000
55
Authors@R: person("Evan", "Ray", , "elray@umass.edu", c("aut", "cre"))
66
Description: A flexible implementation of kernel conditional density estimation.
@@ -19,11 +19,20 @@ Imports:
1919
robust,
2020
pdtmvn,
2121
plyr
22-
License: What license is it under?
22+
License: GPL-3
2323
LazyData: true
2424
Suggests:
2525
knitr,
26-
testthat
26+
testthat,
27+
covr,
28+
reshape,
29+
cdcfluview,
30+
dplyr,
31+
lubridate,
32+
ggplot2,
33+
grid
34+
URL: https://github.com/reichlab/kcde
35+
BugReports: https://github.com/reichlab/kcde/issues
2736
VignetteBuilder: knitr
2837
Remotes: reichlab/pdtmvn
29-
RoxygenNote: 5.0.1
38+
RoxygenNote: 5.0.1

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
[![Travis-CI Build Status](https://travis-ci.org/reichlab/kcde.svg?branch=master)](https://travis-ci.org/reichlab/kcde)
2-
31
# kcde
42

3+
[![Travis-CI Build Status](https://travis-ci.org/reichlab/kcde.svg?branch=master)](https://travis-ci.org/reichlab/kcde) [![Coverage Status](https://img.shields.io/codecov/c/github/reichlab/kcde/master.svg)](https://codecov.io/github/reichlab/kcde?branch=master)
4+
55
Kernel conditional density estimation with flexible kernel specifications.

codecov.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
comment: false

inst/doc/kcde.R

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
## ----InitialBlock, echo = FALSE------------------------------------------
2+
library(knitr)
3+
opts_knit$set(concordance=TRUE)
4+
5+
## ----FluDataLoadData, echo = TRUE----------------------------------------
6+
library(cdcfluview)
7+
library(dplyr)
8+
library(lubridate)
9+
library(ggplot2)
10+
library(grid)
11+
library(kcde)
12+
13+
usflu<-get_flu_data("national", "ilinet", years=1997:2015)
14+
ili_national <- transmute(usflu,
15+
region.type = REGION.TYPE,
16+
region = REGION,
17+
year = YEAR,
18+
week = WEEK,
19+
total_cases = as.numeric(X..WEIGHTED.ILI))
20+
ili_national$time <- ymd(paste(ili_national$year, "01", "01", sep = "-"))
21+
week(ili_national$time) <- ili_national$week
22+
ili_national$time_index <- seq_len(nrow(ili_national))
23+
24+
str(ili_national)
25+
26+
## ----FluDataInitialPlotTotalCases, echo = TRUE---------------------------
27+
ggplot() +
28+
geom_line(aes(x = as.Date(time), y = total_cases), data =
29+
ili_national) +
30+
geom_vline(aes(xintercept = as.numeric(as.Date(time))),
31+
colour = "grey",
32+
data = ili_national[is.na(ili_national$total_cases), ]) +
33+
scale_x_date() +
34+
xlab("Time") +
35+
ylab("Total Cases") +
36+
theme_bw()
37+
38+
## ----FluDataHistogramPlotTotalCases, echo = TRUE-------------------------
39+
hist_df <- rbind(
40+
data.frame(value = ili_national$total_cases,
41+
variable = "Total Cases"),
42+
data.frame(value = log(ili_national$total_cases),
43+
variable = "log(Total Cases)")
44+
)
45+
46+
ggplot(aes(x = value), data = hist_df) +
47+
geom_histogram() +
48+
facet_wrap( ~ variable, ncol = 2) +
49+
xlab("Total Cases") +
50+
theme_bw()
51+
52+
## ----FluDataACFPlotTotalCases, echo = TRUE-------------------------------
53+
last_na_ind <- max(which(is.na(ili_national$total_cases)))
54+
non_na_inds <- seq(from = last_na_ind + 1, to=nrow(ili_national))
55+
acf(ili_national$total_cases[non_na_inds],
56+
lag.max = 52 * 4)
57+
58+
## ----FluDataKernelComponentsSetup, echo = TRUE---------------------------
59+
## Definitions of kernel components. A couple of notes:
60+
## 1) In the current implementation, it is required that separate kernel
61+
## components be used for lagged (predictive) variables and for leading
62+
## (prediction target) variables.
63+
## 2) The current syntax is verbose; in a future version of the package,
64+
## convenience functions may be provided.
65+
66+
## Define kernel components -- 3 pieces:
67+
## 1) Periodic kernel acting on time index
68+
## 2) pdtmvn kernel acting on lagged total cases (predictive) -- all continuous
69+
## 3) pdtmvn kernel acting on lead total cases (prediction target) -- all continuous
70+
kernel_components <- list(
71+
list(
72+
vars_and_offsets = data.frame(var_name = "time_index",
73+
offset_value = 0L,
74+
offset_type = "lag",
75+
combined_name = "time_index_lag0",
76+
stringsAsFactors = FALSE),
77+
kernel_fn = periodic_kernel,
78+
theta_fixed = list(period=pi / 52.2),
79+
theta_est = list("bw"),
80+
initialize_kernel_params_fn = initialize_params_periodic_kernel,
81+
initialize_kernel_params_args = NULL,
82+
vectorize_kernel_params_fn = vectorize_params_periodic_kernel,
83+
vectorize_kernel_params_args = NULL,
84+
update_theta_from_vectorized_theta_est_fn = update_theta_from_vectorized_theta_est_periodic_kernel,
85+
update_theta_from_vectorized_theta_est_args = NULL
86+
),
87+
list(
88+
vars_and_offsets = data.frame(var_name = "total_cases",
89+
offset_value = 1L,
90+
offset_type = "horizon",
91+
combined_name = "total_cases_horizon1",
92+
stringsAsFactors = FALSE),
93+
kernel_fn = pdtmvn_kernel,
94+
rkernel_fn = rpdtmvn_kernel,
95+
theta_fixed = list(
96+
parameterization = "bw-diagonalized-est-eigenvalues",
97+
continuous_vars = "total_cases_horizon1",
98+
discrete_vars = NULL,
99+
discrete_var_range_fns = NULL,
100+
lower = -Inf,
101+
upper = Inf
102+
),
103+
theta_est = list("bw"),
104+
initialize_kernel_params_fn = initialize_params_pdtmvn_kernel,
105+
initialize_kernel_params_args = NULL,
106+
vectorize_kernel_params_fn = vectorize_params_pdtmvn_kernel,
107+
vectorize_kernel_params_args = NULL,
108+
update_theta_from_vectorized_theta_est_fn = update_theta_from_vectorized_theta_est_pdtmvn_kernel,
109+
update_theta_from_vectorized_theta_est_args = NULL
110+
))
111+
#,
112+
# list(
113+
# vars_and_lags = vars_and_lags[3:5, ],
114+
# kernel_fn = pdtmvn_kernel,
115+
# rkernel_fn = rpdtmvn_kernel,
116+
# theta_fixed = NULL,
117+
# theta_est = list("bw"),
118+
# initialize_kernel_params_fn = initialize_params_pdtmvn_kernel,
119+
# initialize_kernel_params_args = list(
120+
# continuous_vars = vars_and_lags$combined_name[3:4],
121+
# discrete_vars = vars_and_lags$combined_name[5],
122+
# discrete_var_range_fns = list(
123+
# c_lag2 = list(a = pdtmvn::floor_x_minus_1, b = floor, in_range = pdtmvn::equals_integer, discretizer = round_up_.5))
124+
# ),
125+
# vectorize_theta_est_fn = vectorize_params_pdtmvn_kernel,
126+
# vectorize_theta_est_args = NULL,
127+
# update_theta_from_vectorized_theta_est_fn = update_theta_from_vectorized_theta_est_pdtmvn_kernel,
128+
# update_theta_from_vectorized_theta_est_args = list(
129+
# parameterization = "bw-diagonalized-est-eigenvalues"
130+
# )
131+
# ))
132+
133+
## ----FluDataKCDESetup, echo=TRUE-----------------------------------------
134+
kcde_control <- create_kcde_control(X_names = "time_index",
135+
y_names = "total_cases",
136+
time_name = "time",
137+
prediction_horizons = 1L,
138+
kernel_components = kernel_components,
139+
crossval_buffer = ymd("2010-01-01") - ymd("2009-01-01"),
140+
loss_fn = neg_log_score_loss,
141+
loss_fn_prediction_args = list(
142+
prediction_type = "distribution",
143+
log = TRUE),
144+
filter_control <- NULL,
145+
loss_args = NULL,
146+
prediction_inds_not_included = c())
147+
148+
## ----FluDataKCDEEstimation, echo=TRUE------------------------------------
149+
# flu_kcde_fit_orig_scale <- kcde(data = ili_national[ili_national$year <= 2014, ],
150+
# kcde_control = kcde_control)
151+
152+
## ----FluDataKCDEPredictiveSampleAndPlot, echo=TRUE-----------------------
153+
# predictive_sample <- kcde_predict(kcde_fit = flu_kcde_fit_orig_scale,
154+
# prediction_data = ili_national[ili_national$year == 2014 & ili_national$week == 53, , drop = FALSE],
155+
# leading_rows_to_drop = 0,
156+
# trailing_rows_to_drop = 1L,
157+
# additional_training_rows_to_drop = NULL,
158+
# prediction_type = "sample",
159+
# n = 10000L)
160+
161+
# ggplot() +
162+
# geom_density(aes(x = predictive_sample)) +
163+
# geom_vline(aes(xintercept = ili_national$total_cases[ili_national$year == 2015 & ili_national$week == 1]),
164+
# colour = "red") +
165+
# xlab("Total Cases") +
166+
# ylab("Predictive Density") +
167+
# ggtitle("Realized total cases vs. one week ahead predictive density\nWeek 1 of 2015") +
168+
# theme_bw()
169+

0 commit comments

Comments
 (0)