-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmockConditionOccurrence.R
123 lines (111 loc) · 4.01 KB
/
mockConditionOccurrence.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#' Generates a mock condition occurrence table and integrates it into an existing CDM object.
#'
#' This function simulates condition occurrences for individuals within a
#' specified cohort. It helps create a realistic dataset by generating
#' condition records for each person, based on the number of records specified
#' per person.The generated data are aligned with the existing observation
#' periods to ensure that all conditions are recorded within valid observation
#' windows.
#'
#' @param cdm A `cdm_reference` object that should already include 'person',
#' 'observation_period', and 'concept' tables.This object is the base
#' CDM structure where the condition occurrence data will be added.
#' It is essential that these tables are not empty as they provide
#' the necessary context for generating condition data.
#'
#' @param recordPerson An integer specifying the expected number of condition
#' records to generate per person.This parameter allows
#' the simulation of varying frequencies of condition
#' occurrences among individuals in the cohort,
#' reflecting the variability seen in real-world medical
#' data.
#'
#' @param seed An optional integer used to set the seed for random number
#' generation, ensuring reproducibility of the generated data.If
#' provided, it allows the function to produce the same results
#' each time it is run with the same inputs.If 'NULL', the seed is
#' not set, resulting in different outputs on each run.
#'
#' @return Returns the modified `cdm` object with the new
#' 'condition_occurrence' table added. This table includes the
#' simulated condition data for each person, ensuring that each
#' record is within the valid observation periods and linked to the
#' correct individuals in the 'person' table.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(omock)
#'
#' # Create a mock CDM reference and add condition occurrences
#' cdm <- mockCdmReference() |>
#' mockPerson() |>
#' mockObservationPeriod() |>
#' mockConditionOccurrence(recordPerson = 2)
#'
#' # View the generated condition occurrence data
#' print(cdm$condition_occurrence)
#' }
mockConditionOccurrence <- function(cdm,
recordPerson = 1,
seed = NULL) {
checkInput(
cdm = cdm,
recordPerson = recordPerson,
seed = seed
)
# check if table are empty
if (cdm$person |> nrow() == 0 ||
cdm$observation_period |> nrow() == 0 || is.null(cdm$concept)) {
cli::cli_abort(
"person, observation_period and concept table cannot be empty")
}
if (!is.null(seed)) {
set.seed(seed = seed)
}
concept_id <-
cdm$concept |>
dplyr::filter(.data$domain_id == "Condition" &
.data$standard_concept == "S") |>
dplyr::select("concept_id") |>
dplyr::pull() |>
unique()
# number of rows per concept_id
numberRows <-
recordPerson * (cdm$person |> dplyr::tally() |> dplyr::pull()) |> round()
con <- list()
for (i in seq_along(concept_id)) {
num <- numberRows
con[[i]] <- dplyr::tibble(
condition_concept_id = concept_id[i],
subject_id = sample(
x = cdm$person |> dplyr::pull("person_id"),
size = num,
replace = TRUE
)
) |>
addCohortDates(
start = "condition_start_date",
end = "condition_end_date",
observationPeriod = cdm$observation_period
)
}
con <-
con |>
dplyr::bind_rows() |>
dplyr::mutate(
condition_occurrence_id = dplyr::row_number(),
condition_type_concept_id = 1
) |>
dplyr::rename(person_id = "subject_id") |>
addOtherColumns(tableName = "condition_occurrence") |>
correctCdmFormat(tableName = "condition_occurrence")
cdm <-
omopgenerics::insertTable(
cdm = cdm,
name = "condition_occurrence",
table = con
)
return(cdm)
}