/
utilities-units.R
787 lines (692 loc) · 24.8 KB
/
utilities-units.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
#' Dimension existence
#'
#' @param dimension String name of the dimension.
#' @details Returns `TRUE` if the provided dimension is supported otherwise `FALSE`
#' @export
hasDimension <- function(dimension) {
validateIsString(dimension)
dimensionTask <- .getNetTaskFromCache("DimensionTask")
rClr::clrCall(dimensionTask, "HasDimension", enc2utf8(dimension))
}
#' Validate dimension
#'
#' @param dimension String name of the dimension.
#' @details Check if the provided dimension is supported. If not, throw an error
#' @export
validateDimension <- function(dimension) {
validateIsString(dimension)
if (!hasDimension(dimension)) {
stop(messages$errorDimensionNotSupported(dimension))
}
}
#' Unit existence
#'
#' @param unit String name of the unit
#' @param dimension String name of the dimension.
#' @details Check if the unit is valid for the dimension.
#' @export
hasUnit <- function(unit, dimension) {
validateIsString(unit)
validateDimension(dimension)
dimensionTask <- .getNetTaskFromCache("DimensionTask")
rClr::clrCall(dimensionTask, "HasUnit", enc2utf8(dimension), .encodeUnit(unit))
}
#' Validate unit
#'
#' @param unit String name of the unit
#' @param dimension String name of the dimension.
#' @details Check if the unit is valid for the dimension. If not, throw an error
#' @export
validateUnit <- function(unit, dimension) {
if (!hasUnit(unit, dimension)) {
stop(messages$errorUnitNotSupported(unit, dimension))
}
}
#' Check if quantity can be represented in the unit
#'
#' @param quantity `Quantity` object
#' @param unit Unit name to check for
#'
#' @return
#' If validations are successful, `NULL` is returned. Otherwise, error is
#' signaled.
.validateHasUnit <- function(quantity, unit) {
validateIsOfType(quantity, "Quantity")
validateIsString(unit)
if (quantity$hasUnit(unit)) {
return()
}
stop(messages$errorUnitNotDefined(quantity$name, quantity$dimension, unit))
}
#' Get base unit of a dimension
#'
#' @param quantityOrDimension Instance of a quantity from which the dimension will be retrieved or name of dimension
#'
#' @return String name of the base unit.
#' @export
getBaseUnit <- function(quantityOrDimension) {
if (isOfType(quantityOrDimension, "Quantity")) {
dimension <- quantityOrDimension$dimension
} else {
dimension <- quantityOrDimension
}
validateDimension(dimension)
dimensionTask <- .getNetTaskFromCache("DimensionTask")
rClr::clrCall(dimensionTask, "BaseUnitFor", enc2utf8(dimension))
}
#' Converts a value given in a specified unit into the base unit of a quantity
#'
#' @param quantityOrDimension Instance of a quantity from which the dimension will be retrieved or name of dimension
#' @param values Value in unit (single or vector)
#' @param unit Unit of value
#' @param molWeight Optional molecule weight to use when converting, for example, from molar to mass amount or concentration. If `molWeightUnit` is not specified, `molWeight` is assumed to be in kg/µmol
#' @param molWeightUnit Unit of the molecular weight value. If `NULL` (default), kg/µmol is assumed.
#' @examples
#' simPath <- system.file("extdata", "simple.pkml", package = "ospsuite")
#' sim <- loadSimulation(simPath)
#' par <- getParameter("Organism|Liver|Volume", sim)
#'
#' # Converts the value in unit (1000 ml) to the base unit (l) => 1
#' valueInBaseUnit <- toBaseUnit(par, 1000, "ml")
#'
#' valuesInBaseUnit <- toBaseUnit(par, c(1000, 2000, 3000), "ml")
#' @export
toBaseUnit <- function(quantityOrDimension, values, unit, molWeight = NULL, molWeightUnit = NULL) {
validateIsOfType(quantityOrDimension, c("Quantity", "character"))
# Get the base unit of the dimension and call `toUnit()`
baseUnit <- getBaseUnit(quantityOrDimension)
toUnit(
quantityOrDimension = quantityOrDimension,
values = values,
targetUnit = baseUnit,
sourceUnit = unit,
molWeight = molWeight,
molWeightUnit = molWeightUnit
)
}
#' Converts a value given in base unit of a quantity into a target unit
#'
#' @param quantityOrDimension Instance of a quantity from which the dimension will be retrieved or name of dimension
#' @param values Values to convert (single or vector). If `sourceUnit` is not specified, `values` are in the base unit of the dimension
#' @param targetUnit Unit to convert to
#' @param sourceUnit Optional Name of the unit to convert from. If `NULL` (default), the values are assumed to be in base unit.
#' @param molWeight Optional molecular weight to use when converting, for example, from molar to mass amount or concentration. If `molWeightUnit` is not specified, `molWeight` is assumed to be in kg/µmol
#' @param molWeightUnit Optional Unit of the molecular weight value. If `NULL` (default), kg/µmol is assumed.
#'
#' @examples
#' simPath <- system.file("extdata", "simple.pkml", package = "ospsuite")
#' sim <- loadSimulation(simPath)
#' par <- getParameter("Organism|Liver|Volume", sim)
#'
#' # Converts the value in base unit (1L) to ml => 1000
#' valueInMl <- toUnit(par, 1, "ml")
#'
#' valuesInMl <- toUnit(par, c(1, 5, 5), "ml")
#'
#' # Converts a numerical value in from mmol/l to mg/dl
#' valuesInMgDl <- toUnit(ospDimensions$`Concentration (molar)`, 5,
#' targetUnit = "mmol/l",
#' sourceUnit = "mg/dl", molWeight = 180, molWeightUnit = "g/mol"
#' )
#' @export
toUnit <- function(quantityOrDimension,
values,
targetUnit,
sourceUnit = NULL,
molWeight = NULL,
molWeightUnit = NULL) {
validateIsOfType(quantityOrDimension, c("Quantity", "character"))
validateIsNumeric(values, nullAllowed = TRUE)
validateIsNumeric(molWeight, nullAllowed = TRUE)
# covers all NULL or NA
if (all(is.na(values))) {
return(values)
}
targetUnit <- .encodeUnit(targetUnit)
if (!is.null(sourceUnit)) {
sourceUnit <- .encodeUnit(sourceUnit)
# If source and target units are equal, return early
if (sourceUnit == targetUnit) {
return(values)
}
}
dimension <- quantityOrDimension
if (isOfType(quantityOrDimension, "Quantity")) {
dimension <- quantityOrDimension$dimension
}
baseUnit <- getBaseUnit(dimension)
# Return early
# If no source unit is defined and target is the base unit
if (is.null(sourceUnit) && targetUnit == baseUnit) {
return(values)
}
if (all(is.na(molWeight))) {
molWeight <- NULL
}
dimensionTask <- .getNetTaskFromCache("DimensionTask")
# ensure that we are dealing with an list of values seen as number (and not integer)
values <- as.numeric(c(values))
# Case - no molecular weight is provided
if (is.null(molWeight)) {
# Convert values to base unit first if the source unit is provided
if (!is.null(sourceUnit)) {
values <- rClr::clrCall(dimensionTask, "ConvertToBaseUnit", dimension, sourceUnit, values)
}
# Return early if target unit is the base unit
if (targetUnit == baseUnit) {
return(values)
}
return(rClr::clrCall(dimensionTask, "ConvertToUnit", dimension, targetUnit, values))
}
# Case - molecular weight is provided
# Convert molWeight value to base unit if a unit is provided
if (!is.null(molWeightUnit)) {
molWeight <- rClr::clrCall(dimensionTask, "ConvertToBaseUnit", ospDimensions$`Molecular weight`, molWeightUnit, molWeight)
}
# Convert values to base unit first if the source unit is provided
if (!is.null(sourceUnit)) {
values <- rClr::clrCall(dimensionTask, "ConvertToBaseUnit", dimension, sourceUnit, values, molWeight)
}
# Return early if target unit is the base unit
if (targetUnit == baseUnit) {
return(values)
}
rClr::clrCall(dimensionTask, "ConvertToUnit", dimension, targetUnit, values, molWeight)
}
#' @title Convert base unit to display unit
#'
#' @description
#'
#' Converts a value given in base unit of a quantity into the display unit of a quantity
#'
#' @param quantity Instance of a quantity from which the base unit will be retrieved
#' @param values Value in base unit (single or vector)
#'
#' @examples
#' simPath <- system.file("extdata", "simple.pkml", package = "ospsuite")
#' sim <- loadSimulation(simPath)
#' par <- getParameter("Organism|Liver|Volume", sim)
#'
#' # Converts the value in base unit (1L) to display unit
#' valueInMl <- toDisplayUnit(par, 1)
#'
#' valuesInDisplayUnit <- toDisplayUnit(par, c(1, 5, 5))
#' @export
toDisplayUnit <- function(quantity, values) {
validateIsOfType(quantity, "Quantity")
toUnit(quantity, values, quantity$displayUnit)
}
#' @title List all available dimensions in the `OSPSuite` platform
#'
#' @return
#'
#' Returns the names of all available dimensions defined in the `OSPSuite`
#' platform.
#'
#' @examples
#'
#' allAvailableDimensions()
#' @export
allAvailableDimensions <- function() {
dimensionTask <- .getNetTaskFromCache("DimensionTask")
rClr::clrCall(dimensionTask, "AllAvailableDimensionNames")
}
#' @title Get dimension for a given unit
#'
#' @return
#'
#' Returns the name of dimension that can be used to support the given unit or
#' `NULL` if the dimension cannot be found.
#'
#' @param unit Unit used to find the corresponding dimension.
#'
#' @examples
#'
#' getDimensionForUnit("mg")
#' @export
getDimensionForUnit <- function(unit) {
validateIsString(unit)
unit <- .encodeUnit(unit)
dimensionTask <- .getNetTaskFromCache("DimensionTask")
dim <- rClr::clrCall(dimensionTask, "DimensionForUnit", unit)
ifNotNull(dim, rClr::clrGet(dim, "Name"))
}
#' @title Get units for a given dimension
#'
#' @return
#'
#' Returns a vector containing all units defined in the dimension
#'
#' @param dimension Name of dimension for which units should be returned
#'
#' @examples
#'
#' getUnitsForDimension("Mass")
#' @export
getUnitsForDimension <- function(dimension) {
validateIsString(dimension)
dimensionTask <- .getNetTaskFromCache("DimensionTask")
rClr::clrCall(dimensionTask, "AllAvailableUnitNamesFor", enc2utf8(dimension))
}
#' @title Get dimension by name
#'
#' @return
#'
#' Returns the an instance of the dimension with the given name if found or `NULL`
#' otherwise.
#'
#' @param name Name of dimension that should be retrieved
#'
#' @examples
#'
#' getDimensionByName("Time")
#' @export
getDimensionByName <- function(name) {
validateIsString(name)
dimensionTask <- .getNetTaskFromCache("DimensionTask")
rClr::clrCall(dimensionTask, "DimensionByName", enc2utf8(name))
}
#' @title Create a list of all units available for each dimension
#'
#' @details
#'
#' Loop through dimensions and build a list containing an enum of all units
#' available for each dimension
#'
#' @return enum of all units for each dimension
#'
#' @examples
#'
#' ospsuite:::.getUnitsEnum()
#' @keywords internal
.getUnitsEnum <- function() {
dimensions <- allAvailableDimensions()
errors <- c()
units <- lapply(dimensions, function(dimension) {
x <- tryCatch(
{
# on some systems, we have issues loading units because of encoding
# see https://github.com/Open-Systems-Pharmacology/OSPSuite-R/issues/923#issuecomment-1119442789
getUnitsForDimension(dimension = dimension)
},
error = function(cond) {
errors <<- c(errors, dimension)
# making sure that in this case, the user sees that something went wrong
return(c("Unavailable"))
}
)
return(enum(replace(x, x == "", "Unitless")))
})
if (length(errors) > 0L) {
message(messages$errorLoadingUnitsForDimension(errors))
}
names(units) <- sapply(dimensions, function(str) {
str <- gsub(pattern = "[(]", replacement = "[", x = str)
str <- gsub(pattern = "[)]", replacement = "]", x = str)
})
return(units)
}
#' @title Function to return an enum of all available dimensions
#'
#' @return enum of all dimensions
#'
#' @examples
#'
#' ospsuite:::.getDimensionsEnum()
#' @keywords internal
.getDimensionsEnum <- function() {
enum(allAvailableDimensions())
}
#' @title Supported dimensions defined as a named list
#'
#' @details
#' ospDimensions$Mass => "Mass"
#'
#' @export
ospDimensions <- NULL
#' Supported units defined as a named list of lists
#'
#' ospUnits$Mass$kg => "kg"
#' @export
ospUnits <- NULL
#' parse OSPSuite.Dimensions.xml containing dimensions and units
#'
#' @return An XML document
#' @import xml2
.parseDimensionsXML <- function() {
# Read the XML file
xmlFile <- system.file("lib/OSPSuite.Dimensions.xml", package = "ospsuite")
xmlData <- xml2::read_xml(xmlFile)
return(xmlData)
}
#' get OSP Dimensions from OSPSuite.Dimensions.xml data
#'
#' @param xmlData XML data from `.parseDimensionsXML()`
#'
#' @return a list of supported dimensions
.getOspDimensions <- function(xmlData) {
ospDimensions <- list()
dimensionsNodes <- xmlData %>%
xml2::xml_find_all(".//Dimension")
for (dimNode in dimensionsNodes) {
name <- xml2::xml_attr(dimNode, "name")
ospDimensions[[name]] <- name
}
ospDimensions[["Dimensionless"]] <- "Dimensionless"
ospDimensions <- ospDimensions[order(names(ospDimensions))]
return(ospDimensions)
}
#' get OSP Units from OSPSuite.Dimensions.xml data
#'
#' @param xmlData XML data from `.parseDimensionsXML()`
#'
#' @return a list of supported units
.getOspUnits <- function(xmlData) {
# Extract information from the XML
dimensionsNodes <- xmlData %>%
xml2::xml_find_all(".//Dimension")
ospUnits <- list()
for (dim in dimensionsNodes) {
dim_name <- dim %>%
xml2::xml_attr("name") %>%
gsub(pattern = "[(]", replacement = "[") %>%
gsub(pattern = "[)]", replacement = "]")
dim_units <- dim %>%
xml2::xml_find_all(".//Unit")
unit_list <- list()
for (unit in dim_units) {
unit_name <- unit %>% xml2::xml_attr("name")
# if unit_name equals "" replace by Unitless
if (unit_name == "") {
unit_name <- "Unitless"
}
unit_list[[unit_name]] <- unit_name
}
ospUnits[[dim_name]] <- unit_list
}
ospUnits[["Dimensionless"]] <- list("Unitless" = "Unitless")
ospUnits <- ospUnits[order(names(ospUnits))]
return(ospUnits)
}
.initializeDimensionAndUnitLists <- function() {
# This initializes the two lists in the parent environment which is the package environments
xmlData <- .parseDimensionsXML()
utils::assignInMyNamespace("ospDimensions", .getOspDimensions(xmlData))
utils::assignInMyNamespace("ospUnits", .getOspUnits(xmlData))
}
#' Convert a data frame to common units
#'
#' @param data A data frame (or a tibble) from `DataCombined$toDataFrame()`.
#' @inheritParams convertUnits
#'
#' @seealso toUnit
#'
#' @examples
#'
#' # small dataframe to illustrate the conversion
#' (df <- dplyr::tibble(
#' dataType = c(rep("simulated", 3), rep("observed", 3)),
#' xValues = c(0, 14.482, 28.965, 0, 1, 2),
#' xUnit = "min",
#' xDimension = "Time",
#' yValues = c(1, 1, 1, 1, 1, 1),
#' yUnit = c("mol", "mol", "mol", "g", "g", "g"),
#' yDimension = c("Amount", "Amount", "Amount", "Mass", "Mass", "Mass"),
#' yErrorValues = c(2.747, 2.918, 2.746, NA, NA, NA),
#' yErrorUnit = c("mol", "mol", "mol", "g", "g", "g"),
#' molWeight = c(10, 10, 20, 20, 20, 10)
#' ))
#'
#' # default conversion
#' ospsuite:::.unitConverter(df)
#'
#' # customizing conversion with specified unit(s)
#' ospsuite:::.unitConverter(df, xUnit = ospUnits$Time$h)
#' ospsuite:::.unitConverter(df, yUnit = ospUnits$Mass$kg)
#' ospsuite:::.unitConverter(df, xUnit = ospUnits$Time$s, yUnit = ospUnits$Amount$mmol)
#' @keywords internal
.unitConverter <- function(data, xUnit = NULL, yUnit = NULL) {
# No validation of inputs for this non-exported function.
# All validation will take place in the `DataCombined` class itself.
# early return --------------------------
# Return early if there are only unique units present in the provided data and
# `xUnit` and `yUnit` arguments are `NULL`. This helps avoid expensive and
# redundant computations.
#
# *DO NOT* use short-circuiting `&&` logical operator here.
if (length(unique(data$xUnit)) == 1L & is.null(xUnit) &
length(unique(data$yUnit)) == 1L & is.null(yUnit)) {
return(data)
}
# target units --------------------------
# The observed and simulated data should have the same units for
# visual/graphical comparison.
#
# Therefore, if target units are not specified by the user, we need to choose
# one ourselves. The most frequent units will be selected: one for X-axis, and
# one for Y-axis. If multiple units are tied in terms of their frequency, the
# first will be selected.
xTargetUnit <- xUnit %||% .extractMostFrequentUnit(data, unitColumn = "xUnit")
yTargetUnit <- yUnit %||% .extractMostFrequentUnit(data, unitColumn = "yUnit")
# Strategy --------------------------
# The strategy is to split the data frame (using `split()`) for each source
# unit and carry out conversion separately per data frame. This is the most
# performant option since there can only be as many expensive calls to
# `toUnit()`/`{rClr}` as there are source units.
#
# The problem occurs when source units are missing (`NA`). The `toUnit()`
# function can handle them but not `split()`, which would drop the entire
# section of the data frame corresponding to `NA` and thus there will be loss
# of data when a data frame is split into a list of data frames.
#
# The trick is to create copies of source unit and molecular weight columns
# and fill in the missing values with something other than `NA`. Using these
# new columns with `split()` makes sure that the parts of a data frame where
# source units are missing won't be dropped. Note that the original columns
# containing source units remain unchanged.
#
# These newly created columns are removed before the converted data frame is
# returned to the user.
# internal --------------------------
# `yErrorUnit` column won't be present when only simulated datasets are
# entered, but it can be assumed to be the same as `yUnit`.
#
# If there is no `yErrorValues` column in the entered data frame, it doesn't
# make sense for this function to introduce a new column called `yErrorUnit`.
if ((any(colnames(data) == "yErrorValues")) &&
!(any(colnames(data) == "yErrorUnit"))) {
data <- dplyr::mutate(data, yErrorUnit = yUnit)
}
# Add suffix `Split` to the following columns:
# `xUnit`, `yUnit`, `yErrorUnit`, `molWeight`
data <- dplyr::mutate(
data,
dplyr::across(
.cols = dplyr::matches("Unit$|Weight$"), # use pattern matching to select columns
.fns = as.character,
.names = "{.col}Split" # = original column name + Split suffix
)
)
# Replace missing values in these new columns with `"missing"`, so that
# `split()` won't remove the corresponding portion of the data frame.
data <- dplyr::mutate(
data,
dplyr::across(
.cols = dplyr::matches("Split$"), # use pattern matching to select columns
.fns = function(x) tidyr::replace_na(x, "missing")
)
)
# `split()` will change the row order of the data frame depending on the
# alphabetical order of the levels of the variable used to split the data
# frame into a list.
#
# Therefore, an internal row identifier is kept to restore the original
# data frame row order before the data is returned.
data <- dplyr::mutate(data, .rowidInternal = dplyr::row_number())
# splitting data frames and unit conversions --------------------------
# Split data frame to a list, mutate the unit column using the corresponding
# `*UnitConverter()`, and then rebind.
#
# The `_dfr` variant of `purrr::map()` signals this intent:
# It will return a single data frame. This data frame is created by binding
# row-wise resulting data frames from mapping the given function `.f` to each
# element data frame in the list provided to `.x`.
# xUnit
xDataList <- .removeEmptyDataFrame(split(data, data$xUnitSplit))
data <- purrr::map_dfr(
.x = xDataList,
.f = function(data) .xUnitConverter(data, xTargetUnit)
)
# yUnit
yDataList <- .removeEmptyDataFrame(split(data, list(data$yUnitSplit, data$molWeightSplit)))
data <- purrr::map_dfr(
.x = yDataList,
.f = function(data) .yUnitConverter(data, yTargetUnit)
)
# yUnit error
if (any(colnames(data) == "yErrorValues")) {
yErrorDataList <- .removeEmptyDataFrame(split(data, list(data$yErrorUnitSplit, data$molWeightSplit)))
data <- purrr::map_dfr(
.x = yErrorDataList,
.f = function(data) .yErrorUnitConverter(data, yTargetUnit)
)
} else {
# For some reason, if the user dataset doesn't have error values, but
# still have columns about error units, update them as well. The quantity
# and its error should always have the same unit in the final data frame.
if (any(colnames(data) == "yErrorUnit")) {
data <- dplyr::mutate(data, yErrorUnit = yUnit)
}
}
# clean up and return --------------------------
# Restore the original row order using the internal row id
data <- dplyr::arrange(data, .rowidInternal)
# Remove all columns that were added only for internal workings of the function.
data <- dplyr::select(data, -dplyr::matches("Split$|.rowidInternal"))
return(data)
}
#' Remove empty data frames from a list of data frames
#'
#' @description
#'
#' Remove empty data frames sometimes produced due to the non-existent
#' combination of source unit and molecular weight.
#'
#' @param x A list of data frames.
#'
#' @examples
#'
#' # Create a list of data frames
#' (ls <- split(mtcars, list(mtcars$vs, mtcars$cyl)))
#'
#' # Remove element data frames with 0 rows
#' ospsuite:::.removeEmptyDataFrame(ls)
#' @keywords internal
.removeEmptyDataFrame <- function(x) purrr::keep(x, function(data) nrow(data) > 0L)
#' @keywords internal
#' @noRd
.xUnitConverter <- function(xData, xTargetUnit) {
xData$xValues <- toUnit(
quantityOrDimension = xData$xDimension[[1]],
values = xData$xValues,
targetUnit = xTargetUnit,
sourceUnit = xData$xUnit[[1]]
)
xData$xUnit <- xTargetUnit
return(xData)
}
#' @keywords internal
#' @noRd
.yUnitConverter <- function(yData, yTargetUnit) {
yData$yValues <- toUnit(
quantityOrDimension = yData$yDimension[[1]],
values = yData$yValues,
targetUnit = yTargetUnit,
sourceUnit = yData$yUnit[[1]],
molWeight = yData$molWeight[[1]],
molWeightUnit = ospUnits$`Molecular weight`$`g/mol`
)
if (any(colnames(yData) == "lloq")) {
yData$lloq <- toUnit(
quantityOrDimension = yData$yDimension[[1]],
values = yData$lloq,
targetUnit = yTargetUnit,
sourceUnit = yData$yUnit[[1]],
molWeight = yData$molWeight[[1]],
molWeightUnit = ospUnits$`Molecular weight`$`g/mol`
)
}
yData$yUnit <- yTargetUnit
return(yData)
}
#' @keywords internal
#' @noRd
.yErrorUnitConverter <- function(yData, yTargetUnit) {
# If error type is geometric, conversion of `yValues` to different units
# should not trigger conversion of error values (and units)
if (any(colnames(yData) == "yErrorType") &&
!is.na(unique(yData$yErrorType)) &&
unique(yData$yErrorType) == DataErrorType$GeometricStdDev) {
return(yData)
}
yData$yErrorValues <- toUnit(
quantityOrDimension = yData$yDimension[[1]],
values = yData$yErrorValues,
targetUnit = yTargetUnit,
sourceUnit = yData$yErrorUnit[[1]],
molWeight = yData$molWeight[[1]],
molWeightUnit = ospUnits$`Molecular weight`$`g/mol`
)
yData$yErrorUnit <- yTargetUnit
return(yData)
}
#' Find the most common units
#'
#' @inheritParams .unitConverter
#' @param unitColumn The name of the column containing units (e.g. `xUnit`).
#'
#' @examples
#'
#' df <- dplyr::tibble(
#' xValues = c(15, 30, 60),
#' xUnit = "min",
#' xDimension = "Time",
#' yValues = c(0.25, 45, 78),
#' yUnit = c("", "%", "%"),
#' yErrorUnit = c("", "%", "%"),
#' yDimension = "Fraction",
#' molWeight = 10
#' )
#'
#' ospsuite:::.extractMostFrequentUnit(df, unitColumn = "xUnit")
#' ospsuite:::.extractMostFrequentUnit(df, unitColumn = "yUnit")
#'
#' @keywords internal
.extractMostFrequentUnit <- function(data, unitColumn) {
# Converting to argument to symbol makes sure that both ways of specifying
# arguments will be treated the same way:
# - unquoted (`unitColumn = xUnit`)
# - quoted (`unitColumn = "xUnit"`)
unitColumn <- rlang::ensym(unitColumn)
# Create a new data frame with frequency for each unit
unitUsageFrequency <- data %>%
# The embrace operator (`{{`) captures the user input and evaluates it in
# the current data frame.
dplyr::group_by({{ unitColumn }}) %>%
dplyr::tally(name = "unitFrequency")
mostFrequentUnit <- unitUsageFrequency %>%
# Select only the row(s) with maximum frequency.
#
# In case of ties, there can be more than one row. In such cases, setting
# `with_ties = FALSE` make sure that only the first row (and the
# corresponding) unit will be selected.
#
# Do *not* select randomly as that would introduce randomness in plotting
# functions with each run of the plotting function defaulting to a different
# unit.
dplyr::slice_max(unitFrequency, n = 1L, with_ties = FALSE) %>%
# Remove the frequency column, which is not useful outside the context of
# this function.
dplyr::select(-unitFrequency)
return(mostFrequentUnit[[1]])
}