-
Notifications
You must be signed in to change notification settings - Fork 60
/
ad_advs.R
307 lines (274 loc) · 8.92 KB
/
ad_advs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# Name: ADVS
#
# Label: Vital Signs Analysis Dataset
#
# Input: adsl, vs
library(admiral)
library(pharmaversesdtm) # Contains example datasets from the CDISC pilot project
library(dplyr)
library(lubridate)
library(stringr)
# Load source datasets ----
# Use e.g. `haven::read_sas()` to read in .sas7bdat, or other suitable functions
# as needed and assign to the variables below.
# For illustration purposes read in admiral test data
data("vs")
data("admiral_adsl")
adsl <- admiral_adsl
# When SAS datasets are imported into R using haven::read_sas(), missing
# character values from SAS appear as "" characters in R, instead of appearing
# as NA values. Further details can be obtained via the following link:
# https://pharmaverse.github.io/admiral/articles/admiral.html#handling-of-missing-values # nolint
vs <- convert_blanks_to_na(vs)
# Lookup tables ----
# Assign PARAMCD, PARAM, and PARAMN
param_lookup <- tibble::tribble(
~VSTESTCD, ~PARAMCD, ~PARAM, ~PARAMN,
"SYSBP", "SYSBP", "Systolic Blood Pressure (mmHg)", 1,
"DIABP", "DIABP", "Diastolic Blood Pressure (mmHg)", 2,
"PULSE", "PULSE", "Pulse Rate (beats/min)", 3,
"WEIGHT", "WEIGHT", "Weight (kg)", 4,
"HEIGHT", "HEIGHT", "Height (cm)", 5,
"TEMP", "TEMP", "Temperature (C)", 6,
"MAP", "MAP", "Mean Arterial Pressure (mmHg)", 7,
"BMI", "BMI", "Body Mass Index(kg/m^2)", 8,
"BSA", "BSA", "Body Surface Area(m^2)", 9
)
attr(param_lookup$VSTESTCD, "label") <- "Vital Signs Test Short Name"
# Assign ANRLO/HI, A1LO/HI
range_lookup <- tibble::tribble(
~PARAMCD, ~ANRLO, ~ANRHI, ~A1LO, ~A1HI,
"SYSBP", 90, 130, 70, 140,
"DIABP", 60, 80, 40, 90,
"PULSE", 60, 100, 40, 110,
"TEMP", 36.5, 37.5, 35, 38
)
# ASSIGN AVALCAT1
avalcat_lookup <- tibble::tribble(
~PARAMCD, ~AVALCA1N, ~AVALCAT1,
"HEIGHT", 1, ">100 cm",
"HEIGHT", 2, "<= 100 cm"
)
# User defined functions ----
# Here are some examples of how you can create your own functions that
# operates on vectors, which can be used in `mutate()`.
format_avalcat1n <- function(param, aval) {
case_when(
param == "HEIGHT" & aval > 140 ~ 1,
param == "HEIGHT" & aval <= 140 ~ 2
)
}
# Derivations ----
# Get list of ADSL vars required for derivations
adsl_vars <- exprs(TRTSDT, TRTEDT, TRT01A, TRT01P)
advs <- vs %>%
# Join ADSL with VS (need TRTSDT for ADY derivation)
derive_vars_merged(
dataset_add = adsl,
new_vars = adsl_vars,
by_vars = exprs(STUDYID, USUBJID)
) %>%
## Calculate ADT, ADY ----
derive_vars_dt(
new_vars_prefix = "A",
dtc = VSDTC
) %>%
derive_vars_dy(reference_date = TRTSDT, source_vars = exprs(ADT))
advs <- advs %>%
## Add PARAMCD only - add PARAM etc later ----
derive_vars_merged_lookup(
dataset_add = param_lookup,
new_vars = exprs(PARAMCD),
by_vars = exprs(VSTESTCD)
) %>%
## Calculate AVAL and AVALC ----
# AVALC should only be mapped if it contains non-redundant information.
mutate(
# AVALC = VSSTRESC,
AVAL = VSSTRESN
) %>%
## Derive new parameters based on existing records ----
# Note that, for the following three `derive_param_*()` functions, only the
# variables specified in `by_vars` will be populated in the newly created
# records.
# Derive Mean Arterial Pressure
derive_param_map(
by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, VISIT, VISITNUM, ADT, ADY, VSTPT, VSTPTNUM),
set_values_to = exprs(PARAMCD = "MAP"),
get_unit_expr = VSSTRESU,
filter = VSSTAT != "NOT DONE" | is.na(VSSTAT)
) %>%
# Derive Body Surface Area
derive_param_bsa(
by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, VISIT, VISITNUM, ADT, ADY, VSTPT, VSTPTNUM),
method = "Mosteller",
set_values_to = exprs(PARAMCD = "BSA"),
get_unit_expr = VSSTRESU,
filter = VSSTAT != "NOT DONE" | is.na(VSSTAT),
constant_by_vars = exprs(USUBJID)
) %>%
# Derive Body Mass Index
derive_param_bmi(
by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, VISIT, VISITNUM, ADT, ADY, VSTPT, VSTPTNUM),
set_values_to = exprs(PARAMCD = "BMI"),
get_unit_expr = VSSTRESU,
filter = VSSTAT != "NOT DONE" | is.na(VSSTAT),
constant_by_vars = exprs(USUBJID)
)
## Get visit info ----
# See also the "Visit and Period Variables" vignette
# (https://pharmaverse.github.io/admiral/articles/visits_periods.html#visits)
advs <- advs %>%
# Derive Timing
mutate(
ATPTN = VSTPTNUM,
ATPT = VSTPT,
AVISIT = case_when(
str_detect(VISIT, "SCREEN|UNSCHED|RETRIEVAL|AMBUL") ~ NA_character_,
!is.na(VISIT) ~ str_to_title(VISIT),
TRUE ~ NA_character_
),
AVISITN = as.numeric(case_when(
VISIT == "BASELINE" ~ "0",
str_detect(VISIT, "WEEK") ~ str_trim(str_replace(VISIT, "WEEK", "")),
TRUE ~ NA_character_
))
)
## Derive a new record as a summary record (e.g. mean of the triplicates at each time point) ----
advs <- advs %>%
derive_summary_records(
dataset_add = advs,
by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, PARAMCD, AVISITN, AVISIT, ADT, ADY),
filter_add = !is.na(AVAL),
set_values_to = exprs(
AVAL = mean(AVAL),
DTYPE = "AVERAGE"
)
)
advs <- advs %>%
## Calculate ONTRTFL ----
derive_var_ontrtfl(
start_date = ADT,
ref_start_date = TRTSDT,
ref_end_date = TRTEDT,
filter_pre_timepoint = AVISIT == "Baseline"
)
## Calculate ANRIND : requires the reference ranges ANRLO, ANRHI ----
# Also accommodates the ranges A1LO, A1HI
advs <- advs %>%
derive_vars_merged(dataset_add = range_lookup, by_vars = exprs(PARAMCD)) %>%
# Calculate ANRIND
derive_var_anrind()
## Derive baseline flags ----
advs <- advs %>%
# Calculate BASETYPE
derive_basetype_records(
basetypes = exprs(
"LAST: AFTER LYING DOWN FOR 5 MINUTES" = ATPTN == 815,
"LAST: AFTER STANDING FOR 1 MINUTE" = ATPTN == 816,
"LAST: AFTER STANDING FOR 3 MINUTES" = ATPTN == 817,
"LAST" = is.na(ATPTN)
)
) %>%
# Calculate ABLFL
restrict_derivation(
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(STUDYID, USUBJID, BASETYPE, PARAMCD),
order = exprs(ADT, VISITNUM, VSSEQ),
new_var = ABLFL,
mode = "last"
),
filter = (!is.na(AVAL) &
ADT <= TRTSDT & !is.na(BASETYPE) & is.na(DTYPE))
)
## Derive baseline information ----
advs <- advs %>%
# Calculate BASE
derive_var_base(
by_vars = exprs(STUDYID, USUBJID, PARAMCD, BASETYPE),
source_var = AVAL,
new_var = BASE
) %>%
# Calculate BASEC
# only if AVALC is mapped
# derive_var_base(
# by_vars = exprs(STUDYID, USUBJID, PARAMCD, BASETYPE),
# source_var = AVALC,
# new_var = BASEC
# ) %>%
# Calculate BNRIND
derive_var_base(
by_vars = exprs(STUDYID, USUBJID, PARAMCD, BASETYPE),
source_var = ANRIND,
new_var = BNRIND
) %>%
# Calculate CHG
derive_var_chg() %>%
# Calculate PCHG
derive_var_pchg()
## ANL01FL: Flag last result within an AVISIT and ATPT for post-baseline records ----
advs <- advs %>%
restrict_derivation(
derivation = derive_var_extreme_flag,
args = params(
new_var = ANL01FL,
by_vars = exprs(USUBJID, PARAMCD, AVISIT, ATPT, DTYPE),
order = exprs(ADT, AVAL),
mode = "last"
),
filter = !is.na(AVISITN) & ONTRTFL == "Y"
)
## Get treatment information ----
# See also the "Visit and Period Variables" vignette
# (https://pharmaverse.github.io/admiral/articles/visits_periods.html#treatment_bds)
advs <- advs %>%
# Assign TRTA, TRTP
# Create End of Treatment Record
derive_extreme_records(
dataset_add = advs,
by_vars = exprs(STUDYID, USUBJID, PARAMCD, ATPTN),
order = exprs(ADT, AVISITN, AVAL),
mode = "last",
filter_add = (4 < AVISITN & AVISITN <= 13 & ANL01FL == "Y" & is.na(DTYPE)),
set_values_to = exprs(
AVISIT = "End of Treatment",
AVISITN = 99,
DTYPE = "LOV"
)
) %>%
mutate(
TRTP = TRT01P,
TRTA = TRT01A
)
## Get ASEQ and AVALCATx and add PARAM/PARAMN ----
advs <- advs %>%
# Calculate ASEQ
derive_var_obs_number(
new_var = ASEQ,
by_vars = exprs(STUDYID, USUBJID),
order = exprs(PARAMCD, ADT, AVISITN, VISITNUM, ATPTN, DTYPE),
check_type = "error"
) %>%
# Derive AVALCA1N and AVALCAT1
mutate(AVALCA1N = format_avalcat1n(param = PARAMCD, aval = AVAL)) %>%
derive_vars_merged(dataset_add = avalcat_lookup, by_vars = exprs(PARAMCD, AVALCA1N)) %>%
# Derive PARAM and PARAMN
derive_vars_merged(dataset_add = select(param_lookup, -VSTESTCD), by_vars = exprs(PARAMCD))
# Add all ADSL variables
advs <- advs %>%
derive_vars_merged(
dataset_add = select(adsl, !!!negate_vars(adsl_vars)),
by_vars = exprs(STUDYID, USUBJID)
)
# Final Steps, Select final variables and Add labels
# This process will be based on your metadata, no example given for this reason
# ...
# Save output ----
# Change to whichever directory you want to save the dataset in
dir <- tools::R_user_dir("admiral_templates_data", which = "cache")
if (!file.exists(dir)) {
# Create the folder
dir.create(dir, recursive = TRUE, showWarnings = FALSE)
}
save(advs, file = file.path(dir, "advs.rda"), compress = "bzip2")