/
map_orthologs.R
120 lines (119 loc) Β· 4.22 KB
/
map_orthologs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#' Map orthologs
#'
#' Map orthologs from one species to another.
#'
#' \code{map_orthologs()} is a core function within
#' \code{convert_orthologs()}, but does not have many
#' of the extra checks, such as \code{non121_strategy})
#' and \code{drop_nonorths}.
#'
#' @param genes can be a mixture of any format
#' (HGNC, Ensembl, RefSeq, UniProt, etc.)
#' and will be automatically converted to
#' standardised HGNC symbol format.
#' @inheritParams convert_orthologs
#' @inheritParams aggregate_mapped_genes
#' @inheritParams gprofiler2::gorth
#'
#' @return Ortholog map \code{data.frame} with at
#' least the columns "input_gene" and "ortholog_gene".
#' @export
#'
#' @examples
#' data("exp_mouse")
#' gene_map <- map_orthologs(
#' genes = rownames(exp_mouse),
#' input_species = "mouse")
map_orthologs <- function(genes,
standardise_genes = FALSE,
input_species,
output_species = "human",
method = c("gprofiler",
"homologene",
"babelgene"),
mthreshold = Inf,
#### Used only when gene_map supplied ####
gene_map = NULL,
input_col = "input_gene",
output_col = "ortholog_gene",
verbose = TRUE,
...) {
method <- tolower(method)[1]
if(!is.null(gene_map)){
method <- "user-supplied gene_map"
}
messager("Converting", input_species, "==>", output_species,
"orthologs using:", method, v = verbose)
#### Standardise gene names first ####
if (isTRUE(standardise_genes)) {
messager("Standardising gene names first.", v = verbose)
syn_map <- map_genes(
genes = genes,
species = input_species,
drop_na = TRUE,
verbose = verbose
)
genes <- syn_map$name
}
#deal with case where no genes found for species
if(!is.null(genes)){
#### Select mapping method ####
#### User-supplied mapping ####
if(!is.null(gene_map)){
gene_map <- map_orthologs_custom(gene_map = gene_map,
input_species = input_species,
output_species = output_species,
input_col = input_col,
output_col = output_col,
verbose = verbose)
}
# Both methods will return a dataframe with at least the columns
# "input_gene" and "ortholog_gene"
#### gprofiler ####
if (methods_opts(method = method, gprofiler_opts = TRUE)) {
gene_map <- map_orthologs_gprofiler(
genes = genes,
input_species = input_species,
output_species = output_species,
mthreshold = mthreshold,
verbose = verbose,
...
)
}
#### homologene ####
if (methods_opts(method = method, homologene_opts = TRUE)) {
gene_map <- map_orthologs_homologene(
genes = genes,
input_species = input_species,
output_species = output_species,
verbose = verbose,
...
)
}
#### babelgene ####
if (methods_opts(method = method, babelgene_opts = TRUE)) {
gene_map <- map_orthologs_babelgene(
genes = genes,
input_species = input_species,
output_species = output_species,
verbose = verbose,
...
)
}
}
#### Check is already in the same species ####
if(isFALSE(standardise_genes) &&
is.null(gene_map)){
messager("input_species already formatted as output species.",
"Returning input data directly.",v=verbose)
return(NULL)
}
#### Add back in original gene names ####
if (isTRUE(standardise_genes) && exists("syn_map")) {
gene_map <- add_synonyms(
gene_map = gene_map,
syn_map = syn_map
)
}
return(gene_map)
}