-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmockObservation.R
113 lines (100 loc) · 3.95 KB
/
mockObservation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#' Generates a mock observation table and integrates it into an existing CDM object.
#'
#' This function simulates observation records for individuals within a specified cohort. It creates a realistic dataset by generating observation records based on the specified number of records per person. Each observation record is correctly associated with an individual within valid observation periods, ensuring the integrity of the data.
#'
#' @param cdm A `cdm_reference` object that must already include 'person', 'observation_period', and 'concept' tables.
#' This object serves as the base CDM structure where the observation data will be added.
#' The 'person' and 'observation_period' tables must be populated as they are necessary for generating accurate observation records.
#'
#' @param recordPerson An integer specifying the expected number of observation records to generate per person.
#' This parameter allows for the simulation of varying frequencies of healthcare observations among individuals in the cohort,
#' reflecting real-world variability in patient monitoring and health assessments.
#'
#' @param seed An optional integer used to set the seed for random number generation, ensuring reproducibility of the generated data.
#' If provided, this seed enables the function to produce consistent results each time it is run with the same inputs.
#' If 'NULL', the seed is not set, which can lead to different outputs on each run.
#'
#' @return Returns the modified `cdm` object with the new 'observation' table added. This table includes the simulated
#' observation data for each person, ensuring that each record is correctly linked to individuals in the 'person' table
#' and falls within valid observation periods.
#'
#' @export
#'
#' @examples
#' library(omock)
#'
#' # Create a mock CDM reference and add observation records
#' cdm <- mockCdmReference() |>
#' mockPerson() |>
#' mockObservationPeriod() |>
#' mockObservation(recordPerson = 3)
#'
#' # View the generated observation data
#' print(cdm$observation)
mockObservation <- function(cdm,
recordPerson = 1,
seed = NULL) {
checkInput(
cdm = cdm,
recordPerson = recordPerson,
seed = seed
)
if (!is.null(seed)) {
set.seed(seed = seed)
}
# check if table are empty
if (cdm$person |> nrow() == 0 |
cdm$observation_period |> nrow() == 0 | is.null(cdm$concept)) {
cli::cli_abort("person and observation_period table cannot be empty")
}
concept_id <-
cdm$concept |>
dplyr::filter(.data$domain_id == "Observation" &
.data$standard_concept == "S") |>
dplyr::select("concept_id") |>
dplyr::pull() |>
unique()
# concept count
concept_count <- length(concept_id)
# number of rows per concept_id
numberRows <-
recordPerson * (cdm$person |> dplyr::tally() |> dplyr::pull()) |> round()
observation <- list()
for (i in seq_along(concept_id)) {
num <- numberRows
observation[[i]] <- dplyr::tibble(
observation_concept_id = concept_id[i],
subject_id = sample(
x = cdm$person |> dplyr::pull("person_id"),
size = num,
replace = TRUE
)
) |>
addCohortDates(
start = "observation_start_date",
end = "observation_end_date",
observationPeriod = cdm$observation_period
)
}
observation <-
observation |>
dplyr::bind_rows() |>
dplyr::mutate(
observation_id = dplyr::row_number(),
observation_type_concept_id = 1
) |>
dplyr::rename(
person_id = "subject_id",
observation_date = "observation_start_date"
) |>
dplyr::select(-"observation_end_date") |>
addOtherColumns(tableName = "observation") |>
correctCdmFormat(tableName = "observation")
cdm <-
omopgenerics::insertTable(
cdm = cdm,
name = "observation",
table = observation
)
return(cdm)
}