-
Notifications
You must be signed in to change notification settings - Fork 16
/
edata_transform.R
executable file
·151 lines (138 loc) · 5.76 KB
/
edata_transform.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#' Apply a Transformation to the Data
#'
#' This function applies a transformation to the e_data element of omicsData
#'
#' @param omicsData an object of the class 'pepData', 'proData', 'metabData',
#' 'lipidData', or 'nmrData', created by
#' \code{\link{as.pepData}}, \code{\link{as.proData}},
#' \code{\link{as.metabData}}, \code{\link{as.lipidData}}, or
#' \code{\link{as.nmrData}}, respectively.
#' @param data_scale a character string indicating the type of transformation to
#' be applied to the data. Valid values for 'pepData', 'proData', 'metabData',
#' 'lipidData', or 'nmrData': 'log2', 'log', 'log10', or 'abundance'. A value
#' of 'abundance' indicates the data has previously undergone one of the log
#' transformations and should be transformed back to raw values with no
#' transformation applied.
#'
#' Valid values for 'seqData': 'upper', 'median',
#' 'lcpm'. For 'seqData', 'lcpm' transforms by log2 counts per million,
#' 'upper' transforms by the upper quartile of non-zero counts, and 'median'
#' transforms by the median of non-zero counts.
#'
#' @details For all but seqData, this function is intended to be used before
#' analysis of the data begins, and data are typically analyzed on a log
#' scale. This function is not applicable to seqData objects, as any
#' transformations needed e.g. to allow more meaningful visualization of
#' seqData objects are performed within the pertinent functions.
#'
#' @return data object of the same class as omicsData
#'
#' @examplesIf requireNamespace("pmartRdata", quietly = TRUE)
#' library(pmartRdata)
#' mymetab <- edata_transform(omicsData = metab_object, data_scale = "log2")
#' attr(mymetab, "data_info")$data_scale
#'
#' @author Kelly Stratton, Natalie Heller
#'
#' @export
#'
edata_transform <- function(omicsData, data_scale) {
# Initial checks -------------------------------------------------------------
# check that omicsData is of appropriate class #
if (!inherits(omicsData, c(
"pepData", "proData", "metabData",
"lipidData", "nmrData"
))) {
# Throw an error that the input for omicsData is not the appropriate class.
stop(paste("omicsData must be of class 'pepData', 'proData', 'metabData',",
"'lipidData', or 'nmrData'",
sep = ' '
))
}
# check that data_scale is one of the acceptable options #
if (!(data_scale %in% c('log2', 'log10', 'log', 'abundance'))) {
# Tell the user that the input to data_scale is an abomination!
stop(paste(data_scale, "is not a valid option for 'data_scale'.",
"See details of as.pepData for specifics.",
sep = " "
))
}
# Check to make sure the data isn't already on the scale input by the user.
if (get_data_scale(omicsData) == data_scale) {
# Stop all further calculations with an error message.
stop(paste("Data is already on",
data_scale,
"scale.",
sep = " "
))
}
# Perform the actual transmogrification --------------------------------------
# Fish out the column index where edata_cname occurs.
iCol <- which(names(omicsData$e_data) == get_edata_cname(omicsData))
# Extract the data_scale from the omics data object.
scale <- get_data_scale(omicsData)
# Execute the transmogrification given the current scale and the input scale.
switch(scale,
# Transmogrify the data from abundance to something else.
'abundance' = {
# Find input data scale and "make that change".
if (data_scale == "log") {
# Natural logify the data.
omicsData$e_data[, -iCol] <- log(omicsData$e_data[, -iCol])
} else if (data_scale == "log2") {
# Log base 2ify the data.
omicsData$e_data[, -iCol] <- log2(omicsData$e_data[, -iCol])
} else if (data_scale == "log10") {
# Log base 10ify the data.
omicsData$e_data[, -iCol] <- log10(omicsData$e_data[, -iCol])
}
},
# Mutate the data from log to another scale.
'log' = {
# Find input data scale and "make that change".
if (data_scale == "abundance") {
# Natural logify the data.
omicsData$e_data[, -iCol] <- exp(omicsData$e_data[, -iCol])
} else if (data_scale == "log2") {
# Log base 2ify the data.
omicsData$e_data[, -iCol] <- log2(exp(omicsData$e_data[, -iCol]))
} else if (data_scale == "log10") {
# Log base 10ify the data.
omicsData$e_data[, -iCol] <- log10(exp(omicsData$e_data[, -iCol]))
}
},
# Recast the data from the log2 scale to another scale.
'log2' = {
# Find input data scale and "make that change".
if (data_scale == "abundance") {
# Natural logify the data.
omicsData$e_data[, -iCol] <- 2^(omicsData$e_data[, -iCol])
} else if (data_scale == "log") {
# Log base 2ify the data.
omicsData$e_data[, -iCol] <- log(2^(omicsData$e_data[, -iCol]))
} else if (data_scale == "log10") {
# Log base 10ify the data.
omicsData$e_data[, -iCol] <- log10(2^(omicsData$e_data[, -iCol]))
}
},
# Change the data from the log10 scale to a different one.
'log10' = {
# Find input data scale and "make that change".
if (data_scale == "abundance") {
# Natural logify the data.
omicsData$e_data[, -iCol] <- 10^(omicsData$e_data[, -iCol])
} else if (data_scale == "log") {
# Log base 2ify the data.
omicsData$e_data[, -iCol] <- log(10^(omicsData$e_data[, -iCol]))
} else if (data_scale == "log2") {
# Log base 10ify the data.
omicsData$e_data[, -iCol] <- log2(10^(omicsData$e_data[, -iCol]))
}
}
)
# Update data_scale in the data_info attribute.
attr(omicsData, 'data_info')$data_scale <- data_scale
# Return the transmogrified omics object along with its attributes (some of
# them updated and others left alone).
return(omicsData)
}