generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dummy_data_tests.R
80 lines (54 loc) · 2.3 KB
/
dummy_data_tests.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
######################################
# This script:
# - creates dummy data for study_definition_tests.py
######################################
library(tidyverse)
library(lubridate)
library(glue)
# read study parameters
study_parameters <- readr::read_rds(
here::here("output", "lib", "study_parameters.rds"))
source(here::here("analysis", "lib", "dummy_data_functions.R"))
# individuals eligible based on box d criteria
data_eligible_e <- readr::read_csv(
here::here("output", "data", "data_eligible_e.csv"))
set.seed(5476)
dummy_data <- arrow::read_feather(
file = here::here("analysis", "dummy_data.feather")) %>%
select(patient_id, elig_date) %>%
right_join(data_eligible_e %>% select(patient_id, start_1_date),
by = "patient_id") %>%
mutate(across(patient_id, as.integer)) %>%
mutate(across(ends_with("_date"), as.Date))
nrows <- nrow(dummy_data)
dates_seq <- seq(as.Date("2021-01-01"), as.Date("2021-11-30"), 1)
K <- study_parameters$max_comparisons
# function for result_test_k_date
test_k_date <- function(k, test_result = "any") {
name <- glue("{test_result}_test_{k}_date")
date <- sample(dates_seq, size = nrows, replace = TRUE)
dummy_data %>%
mutate(!! sym(name) := date) %>%
mutate(across(!! sym(name),
~ case_when(
start_1_date + days((k-1)*28) < .x & .x <= start_1_date + days(k*28) ~ .x,
TRUE ~ as.Date(NA_character_)
))) %>%
select(!! sym(name))
}
# function for result_test_k_date so that any_test_k_n >= positive_test_k_n
test_k_n <- function(k) {
name_any <- glue("any_test_{k}_n")
name_positive <- glue("positive_test_{k}_n")
dummy_data %>%
mutate(!! sym(name_positive) := rpois(n = nrow(.), lambda = 0.25)) %>%
mutate(!! sym(name_any) := !! sym(name_positive) + rpois(n = nrow(.), lambda = 1)) %>%
select(!! sym(name_any), !! sym(name_positive))
}
dummy_data_tests <- dummy_data %>%
bind_cols(lapply(1:(K+1), test_k_date)) %>%
bind_cols(lapply(1:(K+1), test_k_n)) %>%
mutate(covid_test_pre_elig_n = rpois(n = nrow(.), lambda = 3),
covid_test_post_elig_n = rpois(n = nrow(.), lambda = 3)) %>%
mutate(across(ends_with("date"), as.POSIXct))
arrow::write_feather(dummy_data_tests, here::here("analysis", "dummy_data_tests.feather"))