generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 1
/
12_event_rates_incidence.R
141 lines (103 loc) · 3.65 KB
/
12_event_rates_incidence.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
library(data.table)
library(purrr)
library(tidyverse)
options(datatable.fread.datatable=FALSE)
# setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# setwd('../')
dat <- fread('output/incidence_t.csv')
### add a constant to obtain results not broken down (i.e. all participants)
dat$all <- "all"
### change age into age groups so that there's fewer categories(domains) to loop through
#dat <- dat %>%
# mutate(
# #create categories
# age_groups = case_when(
# age >= 16 & age <= 24 ~ "16 to 24",
# age >= 25 & age <= 34 ~ "25 to 34",
# age >= 35 & age <= 49 ~ "35 to 49",
# age >= 50 & age <= 69 ~ "50 to 69",
# age >= 70 ~ "70 and over"))
### list domains to loop over
domains <- c("all",
"age_groups",
"alcohol",
"obese_binary_flag",
"cancer",
"digestive_disorder",
"hiv_aids",
"mental_behavioural_disorder",
"kidney_disorder",
"respiratory_disorder",
"metabolic_disorder",
"sex",
"ethnicity",
"region",
"hhsize",
"work_status_new",
"CVD",
"musculoskeletal",
"neurological",
"imd",
"rural_urban")
### list exposures to loop over
exposures <- c("all", "exposed") #our exposure of interest is all and our infected vs control
### list outcomes to loop over
outcomes <- c("mh_outcome") #check if we have anyother outcomes that i should list
### set up empty lists for outputs
out1 <- as.list(NULL)
out2 <- as.list(NULL)
out3 <- as.list(NULL)
for(k in 1:length(outcomes)){
outcome <- outcomes[k]
for(j in 1:length(exposures)){
exposure <- exposures[j]
for(i in 1:length(domains)){
domain <- domains[i]
### calculate exposure time on 'per 1,000 person-years' basis
dat$ptime <- dat$t/(365.25*1000)
### sum number of events by domain and exposure
events <- aggregate(x=list(events=dat[[outcome]]==1),
by=list(level=dat[[domain]], exposure=dat[[exposure]]),
FUN=sum)
### sum person-time by domain and exposure
ptime <- aggregate(x=list(person_time=dat[["ptime"]]),
by=list(level=dat[[domain]], exposure=dat[[exposure]]),
FUN=sum)
### combine events and person-time
comb <- merge(x=events, y=ptime, all=TRUE)
comb$outcome <- outcome
comb$domain <- domain
comb <- comb[c(5:6,1:4)]
out1[[i]] <- comb
#print(i)
}
df1 <- out1[[1]]
if(length(domains)>1){
for(i in 2:length(domains)) {df1 <- rbind(df1, out1[[i]])}
}
out2[[j]] <- df1
}
df2 <- out2[[1]]
if(length(exposures)>1){
for(j in 2:length(exposures)) {df2 <- rbind(df2, out2[[j]])}
}
out3[[k]] <- df2
}
df3 <- out3[[1]]
if(length(outcomes)>1){
for(k in 2:length(outcomes)) {df3 <- rbind(df3, out3[[k]])}
}
### calculate event rates
df3$rate <- unlist(map2(df3$events,
df3$person_time,
~poisson.test(.x,.y)$estimate))
### calculate Poisson CI - lower limit
df3$lcl <- unlist(map2(df3$events,
df3$person_time,
~poisson.test(.x,.y)$conf.int[1]))
### calculate Poisson CI - upper limit
df3$ucl <- unlist(map2(df3$events,
df3$person_time,
~poisson.test(.x,.y)$conf.int[2]))
### save result to working directory
write_csv(df3, 'output/event_counts_and_rates_incidence.csv')