Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase k #27

Draft
wants to merge 14 commits into
base: include-prev-infection
Choose a base branch
from
19 changes: 14 additions & 5 deletions analysis/comparisons/check_fu.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ fs::dir_create(here::here("output", "tte", "images"))
################################################################################
study_parameters <- readr::read_rds(
here::here("analysis", "lib", "study_parameters.rds"))
K <- study_parameters$K

################################################################################
# redaction functions
Expand All @@ -29,7 +30,7 @@ data_tte <- readr::read_rds(
pivot_longer(
cols = c(starts_with(c("start","end"))),
names_to = c(".value", "k"),
names_pattern = "(.*)_(.)_date"
names_pattern = "(.*)_(.*)_date"
) %>%
mutate(across(k, as.integer)) %>%
filter(
Expand All @@ -45,7 +46,8 @@ data_tte <- readr::read_rds(
filter(start <= as.Date(study_parameters$end_date)) %>%
mutate(across(ends_with("date"), ~if_else(start <= .x & .x <= end, .x, as.Date(NA_character_)))) %>%
mutate(across(end, ~pmin(end, death_date, dereg_date, subsequent_vax_date, as.Date(study_parameters$end_date), na.rm = TRUE))) %>%
select(patient_id, subgroup, arm, k, start, end)
select(patient_id, subgroup, arm, k, start, end) %>%
mutate(across(k, factor, levels = 1:K))

min_max <- data_tte %>%
group_by(subgroup) %>%
Expand Down Expand Up @@ -131,7 +133,7 @@ for (s in levels(min_max$subgroup)) {
ann_text <- tibble(
date = xintercepts,
n = n_mult*max(data_tte_long$n),
k = factor(k_print, levels = 1:6),
k = factor(k_print, levels = 1:K),
lab = names(xintercepts),
lab_col = col_palette[1:length(xintercepts)]
)
Expand All @@ -140,7 +142,14 @@ for (s in levels(min_max$subgroup)) {
ggplot(aes(x = date, y = n)) +
geom_bar(stat = "identity", alpha = 0.5, width=1) +
geom_vline(
data = bind_rows(lapply(1:max(data_tte_long$k), function(x) ann_text %>% mutate(k=x))) %>% mutate(across(k, factor)),
data = bind_rows(
lapply(
1:max(as.integer(as.character(data_tte_long$k))),
function(x)
ann_text %>% mutate(k=x)
)
) %>%
mutate(across(k, factor)),
aes(xintercept = date, colour = lab_col),
linetype = "dashed") +
labs(
Expand Down Expand Up @@ -174,7 +183,7 @@ for (s in levels(min_max$subgroup)) {

ggsave(p,
filename = here::here("output", "tte", "images", glue("check_fu_{s}.png")),
width=15, height=14, units="cm")
width=15, height=20, units="cm")

}

Expand Down
2 changes: 1 addition & 1 deletion analysis/comparisons/combine_estimates.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ readr::write_csv(
################################################################################
# model estimates
all_files <- list.files(path = here::here("output", "models_cox", "data"),
pattern = "modelcox_tidy_\\w+_.+_\\w+_\\d.rds",
pattern = "modelcox_tidy_\\w+_.+_\\w+_\\d+.rds",
all.files = FALSE,
full.names = FALSE, recursive = FALSE,
ignore.case = FALSE, include.dirs = FALSE)
Expand Down
4 changes: 2 additions & 2 deletions analysis/comparisons/data_tte_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ data <- data_all %>%
# filter subgroups
filter(subgroup %in% select_subgroups) %>%
pivot_longer(
cols = matches("\\w+_\\d_date"),
cols = matches("\\w+_\\d+_date"),
names_to = c(".value", "k"),
names_pattern = "(.*)_(.)_date"
names_pattern = "(.*)_(.*)_date"
) %>%
rename_with(~str_c(.x, "_date"), .cols = all_of(c("start", "end", "anytest"))) %>%
mutate(across(k, as.integer)) %>%
Expand Down
4 changes: 2 additions & 2 deletions analysis/dummy_data_vax.R
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ dummy_data_covs <- dummy_data_vax %>%
.data = .,
name = !! a,
incidence = b,
earliest="2020-11-01",
latest="2021-12-31",
earliest=study_parameters$pandemic_start,
latest=study_parameters$end_date,
keep_vars = FALSE
))) %>%
# add death_date if coviddeath_date
Expand Down
Binary file modified analysis/dummy_data_vax.feather
Binary file not shown.
5 changes: 3 additions & 2 deletions analysis/lib/study_parameters.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
{
"seed": 123456,
"n": 100000,
"K": 6,
"K": 12,
"ref_age_1": "2021-03-31",
"ref_age_2": "2021-07-01",
"ref_cev": "2021-01-18",
"ref_ar": "2021-02-15",
"pandemic_start": "2020-01-01",
"start_date": "2020-12-08",
"end_date": "2021-12-15"
"end_date": "2022-05-30",
"end_date_model": "2022-03-01"
}
Binary file modified analysis/lib/study_parameters.rds
Binary file not shown.
7 changes: 5 additions & 2 deletions analysis/preprocess/data_covariates_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ data_all <- data_arm %>%
left_join(
data_covariates %>%
select(patient_id,
matches(c("start_\\d_date", "end_\\d_date")),
matches(c("start_\\d+_date", "end_\\d+_date")),
starts_with("anytest"), asplenia,
any_of(unname(unlist(model_varlist)))) %>%
mutate(across(contains("_date"),
Expand Down Expand Up @@ -126,11 +126,14 @@ fs::dir_create(here::here("output", "lib"))
# redaction functions
source(here::here("analysis", "functions", "redaction_functions.R"))

end_K_date <- glue("end_{K}_date")

data_min_max_fu <- data_all %>%
rename("end_K_date" = end_K_date) %>%
group_by(subgroup) %>%
summarise(
min_fu_date = min(start_1_date),
max_fu_date = max(end_6_date),
max_fu_date = max(end_K_date),
# round total to nereast 7 for disclosure control
n = ceiling_any(n(), to=7),
.groups = "keep"
Expand Down
4 changes: 3 additions & 1 deletion analysis/preprocess/data_eligible_cde.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ data_eligible_e <- bind_rows(
end_1_date = start_1_date + days(56),
arm = "unvax")
) %>%
mutate(across(ends_with("_date"), as.POSIXct)) %>%
left_join(data_processed %>% select(patient_id, subgroup),
by = "patient_id") %>%
group_by(subgroup) %>%
Expand All @@ -238,6 +237,9 @@ for (k in 2:study_parameters$K) {

}

data_eligible_e <- data_eligible_e %>%
mutate(across(ends_with("_date"), as.POSIXct))

readr::write_csv(
data_eligible_e,
here::here("output", "data", "data_eligible_e.csv")
Expand Down
11 changes: 2 additions & 9 deletions analysis/second_vax_period/data_2nd_vax_dates.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,7 @@ second_vax_period_dates <- data_vax_plot %>%
summarise(across(c(cumulative_sum, end_of_period, start_of_period),
min, na.rm = TRUE),
.groups = "keep") %>%
ungroup() %>%
mutate(
# time between start of first comparison and last date of available data
days_of_data = as.integer(as.Date(study_parameters$end_date) - start_of_period) + 14,
# set n_comparisons based on days of available data
n_comparisons = pmin(ceiling(days_of_data/28), study_parameters$K)
) %>%
select(-days_of_data)
ungroup()

brand_counts <- second_vax_period_dates %>%
left_join(data_vax_plot,
Expand All @@ -171,7 +164,7 @@ second_vax_period_dates <- second_vax_period_dates %>%
left_join(brand_counts,
by = c("jcvi_group", "elig_date", "region")) %>%
select(jcvi_group, elig_date, region, n_ChAdOx1, n_BNT162b2, cumulative_sum,
start_of_period, end_of_period, n_comparisons)
start_of_period, end_of_period)

# save for plotting
readr::write_rds(
Expand Down
6 changes: 3 additions & 3 deletions analysis/study_definition_vax.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
## Oxford AZ - first record of an Oxford AZ vaccine
# NB *** may be patient's first COVID vaccine dose or their second if mixed types are given ***
covid_vax_az_1_date=patients.with_tpp_vaccination_record(
product_name_matches="COVID-19 Vac AstraZeneca (ChAdOx1 S recomb) 5x10000000000 viral particles/0.5ml dose sol for inj MDV",
product_name_matches="COVID-19 AZD2816 AstraZeneca (ChAdOx1 nCOV-19) 3.5x10*9 viral particles/0.5ml dose sol for inj MDV",
on_or_after=start_date,
find_first_match_in_period=True,
returning="date",
Expand All @@ -130,7 +130,7 @@
},
),
covid_vax_az_2_date=patients.with_tpp_vaccination_record(
product_name_matches="COVID-19 Vac AstraZeneca (ChAdOx1 S recomb) 5x10000000000 viral particles/0.5ml dose sol for inj MDV",
product_name_matches="COVID-19 AZD2816 AstraZeneca (ChAdOx1 nCOV-19) 3.5x10*9 viral particles/0.5ml dose sol for inj MDV",
on_or_after="covid_vax_az_1_date + 1 day",
find_first_match_in_period=True,
returning="date",
Expand All @@ -144,7 +144,7 @@
},
),
covid_vax_az_3_date=patients.with_tpp_vaccination_record(
product_name_matches="COVID-19 Vac AstraZeneca (ChAdOx1 S recomb) 5x10000000000 viral particles/0.5ml dose sol for inj MDV",
product_name_matches="COVID-19 AZD2816 AstraZeneca (ChAdOx1 nCOV-19) 3.5x10*9 viral particles/0.5ml dose sol for inj MDV",
on_or_after="covid_vax_az_2_date + 1 day",
find_first_match_in_period=True,
returning="date",
Expand Down
15 changes: 8 additions & 7 deletions analysis/subsequent_vax/plot_cumulative_incidence.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ library(lubridate)
## import study_parameters
study_parameters <- readr::read_rds(
here::here("analysis", "lib", "study_parameters.rds"))
K <- study_parameters$K

# read subgroups
subgroups <- readr::read_rds(
Expand Down Expand Up @@ -54,8 +55,9 @@ if(Sys.getenv("OPENSAFELY_BACKEND") %in% "") {
# read data
data_all <- readr::read_rds(
here::here("output", "data", "data_all.rds")) %>%
select(patient_id, subgroup, arm, start_1_date, end_6_date, subsequent_vax_date, dereg_date, death_date)

rename("end_K_date" = glue("end_{K}_date")) %>%
select(patient_id, subgroup, arm, start_1_date, end_K_date,
subsequent_vax_date, dereg_date, death_date)

image_path <- here::here("output", "subsequent_vax", "images")

Expand All @@ -73,9 +75,9 @@ if(Sys.getenv("OPENSAFELY_BACKEND") %in% "") {
# start date of comparison 1
start_fu_date = start_1_date,
# end date of final comparison or end of data availability
end_fu_date = pmin(end_6_date, study_parameters$end_date)
end_fu_date = pmin(end_K_date, study_parameters$end_date)
) %>%
select(-start_1_date, -end_6_date) %>%
select(-start_1_date, -end_K_date) %>%
# remove if subsequent vaccine, death or dereg on or before start_of_period
filter_at(
all_of(c("subsequent_vax_date", "death_date", "dereg_date")),
Expand Down Expand Up @@ -128,7 +130,6 @@ if(Sys.getenv("OPENSAFELY_BACKEND") %in% "") {

################################################################################
# scale for x-axis
K <- study_parameters$K
x_breaks <- seq(0,K*4,4)
x_labels <- x_breaks + 2

Expand All @@ -141,8 +142,8 @@ plot_out <- survtable_redacted %>%
name = "Subgroup"
) +
scale_x_continuous(
breaks = seq(0,24,4), # scale is time since start of period 1
labels = seq(2,26,4) # label scale as time since second vax
breaks = x_breaks, # scale is time since start of period 1
labels = x_labels # label scale as time since second vax
) +
labs(
x = "Weeks since second dose",
Expand Down
5 changes: 3 additions & 2 deletions create_project.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ library(glue)
fs::dir_create(here::here("analysis", "lib"))

################################################################################
K <- 6L # the number of comparison periods
K <- 12L # the number of comparison periods

study_parameters <-
list(
Expand All @@ -23,7 +23,8 @@ study_parameters <-
# start_date_pfizer = "2020-12-08",
# start_date_az = "2021-01-04",
# start_date_moderna = "2021-03-04",
end_date = "2021-12-15" # last date of available data
end_date = lubridate::today(), # for study definition
end_date_model = "2022-03-01" # TBC based on availability of hospital data
)

readr::write_rds(study_parameters, here::here("analysis", "lib", "study_parameters.rds"))
Expand Down
6 changes: 2 additions & 4 deletions project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6954,10 +6954,8 @@ actions:
- apply_model_cox_both_1_75_noncoviddeath
outputs:
moderately_sensitive:
event_counts_all: output/release_objects/event_counts_all.csv
event_counts_6575: output/release_objects/event_counts_6575.csv
estimates_all: output/release_objects/estimates_all.csv
estimates_6575: output/release_objects/estimates_6575.csv
event_counts_all: output/release_objects/event_counts*.csv
estimates_all: output/release_objects/estimates*.csv

## ####################################
## plot to check estimates
Expand Down