From 0616e98d96c2eb166442283c2b41a1430d03f8ad Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Fri, 16 Feb 2024 18:56:25 +0100
Subject: [PATCH 1/3] `generate_homologs`: use OMA instead of HomoloGene

---
 NAMESPACE       |  2 +-
 R/liana_ortho.R | 65 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 046a4e0e..cd783ae5 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -76,9 +76,9 @@ importFrom(ComplexHeatmap,Heatmap)
 importFrom(ComplexHeatmap,HeatmapAnnotation)
 importFrom(ComplexHeatmap,anno_barplot)
 importFrom(ComplexHeatmap,rowAnnotation)
-importFrom(OmnipathR,homologene_download)
 importFrom(OmnipathR,homologene_raw)
 importFrom(OmnipathR,import_omnipath_intercell)
+importFrom(OmnipathR,oma_pairwise)
 importFrom(SeuratObject,GetAssayData)
 importFrom(SeuratObject,Idents)
 importFrom(dplyr,arrange)
diff --git a/R/liana_ortho.R b/R/liana_ortho.R
index ff526fa1..661809cb 100644
--- a/R/liana_ortho.R
+++ b/R/liana_ortho.R
@@ -20,7 +20,7 @@
 #' Hence, increasing the number of matches, but likely introducing some
 #' mismatches.
 #'
-#' @param symbols_dict `NULL` by default, then `get_homologene_dict` is called
+#' @param symbols_dict `NULL` by default, then `homology_dict` is called
 #' to generate a dictionary from OmniPathR's homologene resource. Alternatively,
 #' one can pass their own symbols_dictionary.
 #'
@@ -30,6 +30,8 @@
 #'
 #' @param verbose logical for verbosity
 #'
+#' @param mappings Character vector: control ambiguous mappings.
+#'
 #' @return a converted ligand-receptor resource
 #'
 #' @export
@@ -40,7 +42,8 @@ generate_homologs <- function(op_resource,
                               symbols_dict = NULL,
                               columns = c("source_genesymbol",
                                           "target_genesymbol"),
-                              verbose = TRUE){
+                              verbose = TRUE,
+                              mappings = c("1:1", "1:m", "n:1", "n:m")){
 
     op_resource %<>% mutate(across(all_of(columns),
                                    ~str_replace(., "COMPLEX:", "")))
@@ -55,8 +58,12 @@ generate_homologs <- function(op_resource,
     entities <- purrr::reduce(map(columns, function(col) decomp[[col]]), union)
 
     # generate homology geneset
-    symbols_dict <- get_homologene_dict(entities = entities,
-                                        target_organism = target_organism)
+    symbols_dict <-
+        homology_dict(
+            entities = entities,
+            target_organism = target_organism,
+            mappings = mappings
+        )
 
 
     # Remove any missing antities
@@ -67,7 +74,7 @@ generate_homologs <- function(op_resource,
                                     names(symbols_dict))
 
         liana_message(
-            str_glue("Entries without homologs:
+            str_glue("Entries without homologs ({length(missing_entities)}):
                      {paste(missing_entities, collapse = '; ')}"),
             verbose = verbose
         )
@@ -100,7 +107,12 @@ generate_homologs <- function(op_resource,
         pull(genesymbol_source)
 
     liana_message(
-        stringr::str_glue("One-to-many homolog matches: {paste(entity_2many, collapse = '; ')}"),
+        stringr::str_glue(
+            paste0(
+                "One-to-many homolog matches ({length(entity_2many)}): ",
+                "{paste(entity_2many, collapse = '; ')}"
+            )
+        ),
         verbose = verbose
     )
 
@@ -386,28 +398,31 @@ recode.character2 <- function(.x,
 
 #' Helper function to get homologene dictionary
 #'
-#' @param entities genes to be converted - function will return a dictionary
-#' with only those.
-#'
-#' @param target_organism target organism (obtain tax id from `show_homologene`)
+#' @param entities Character vector: symbols of genes to be converted - this
+#'     function returns a dictionary restricted to these genes.
+#' @param target_organism Character or numeric: name or NCBI Taxonomy ID of the
+#'     target organism.
+#' @param mappings Character vector: control ambiguous mappings.
 #'
 #' @keywords internal
 #'
-#' @importFrom OmnipathR homologene_download
-get_homologene_dict <- function(entities,
-                                target_organism,
-                                id_type = "genesymbol"){
-
-    # Load homology geneset
-    hg_gs <- homologene_download(target = !!target_organism,
-                                 source = 9606L, # always human
-                                 id_type = !!id_type) %>%
-        select(-hgroup) %>%
-        # Limit to the universe of the resource
-        filter(.data[[str_glue("{id_type}_source")]] %in% entities)
-
-    # Convert to dictionary
-    return(hg_gs %>% deframe())
+#' @importFrom OmnipathR oma_pairwise
+homology_dict <- function(
+        entities,
+        target_organism,
+        id_type = "genesymbol",
+        mappings = c("1:1", "1:m", "n:1", "n:m")
+    ){
+
+    oma_pairwise(
+        organism_b = target_organism,
+        id_type = id_type,
+        mappings = mappings
+    ) %>%
+    # Limit to the universe of the resource
+    filter(.data[["id_organism_a"]] %in% entities) %>%
+    deframe
+
 }
 
 

From e94d61a54020a638f929eeaaaeee04adf711c0ca Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Fri, 16 Feb 2024 18:56:51 +0100
Subject: [PATCH 2/3] constrain OmnipathR version to >= 3.11.3

---
 DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 90720b7b..f71d0e40 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -57,7 +57,7 @@ Imports:
     dplyr,
     readr,
     rlang,
-    OmnipathR,
+    OmnipathR (>= 3.11.3),
     SingleCellExperiment,
     scran,
     scater,
@@ -91,5 +91,5 @@ Suggests:
     broom
 biocViews: scater, scran, SingleCellExperiment, ComplexHeatmap, muscData, ExperimentHub
 Remotes: sqjin/CellChat, saezlab/OmnipathR, saezlab/decoupleR, LTLA/basilisk.utils, LTLA/basilisk
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.0
 Config/testthat/edition: 3

From 6bb8ab83f85871083ee532d46ac0a0f83af39348 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Fri, 16 Feb 2024 18:59:07 +0100
Subject: [PATCH 3/3] version bump (0.1.14)

---
 DESCRIPTION |  2 +-
 NEWS.md     | 82 ++++++++++++++++++++++++++++-------------------------
 2 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f71d0e40..c9ae0c30 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: liana
 Type: Package
 Title: LIANA: a LIgand-receptor ANalysis frAmework
-Version: 0.1.13
+Version: 0.1.14
 Authors@R: c(
     person(
         given = "Daniel",
diff --git a/NEWS.md b/NEWS.md
index af9754b4..65e86632 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,10 @@
+# LIANA 0.1.14 (15.02.24)
+
+- Orthology translation uses OMA instead of the outdated NCBI HomoloGene
+
 # LIANA 0.1.13 (03.11.23)
 
-- Changed the way that `max` rank is imputed when NAs are presented, or when `return_all` is true. 
+- Changed the way that `max` rank is imputed when NAs are presented, or when `return_all` is true.
 Essentially, `RobustRankAggregate` will use the max rank in the matrix, rather than the size of the dataframe.
 
 - Fixed a bug related to newer versions of CellChat with unused argument #75
@@ -17,9 +21,9 @@ aggregates. Essentially runs `liana_aggregate` twice with different `aggregate_h
 parameters and joins.
 - Added `invert_specificity`, `invert_magnitude`, `invert_function` parameters
 to `liana_dotplot`.
-- Added `decompose_tensor` as a function to run only the decomposition on a 
+- Added `decompose_tensor` as a function to run only the decomposition on a
 pre-built Tensor.
-- Aggregation can now be performed also via `liana_bysample`, takes `aggregate_how` 
+- Aggregation can now be performed also via `liana_bysample`, takes `aggregate_how`
 parameter, which allows `magnitude`, `specificity`, or `both`.
 - Added `preprocess_scores` function that handles the conversion of liana's
 scores to Tensor format.
@@ -73,8 +77,8 @@ by specificity and magnitude scores.
 
 
 # LIANA 0.1.8 (08.11.22)
-## New Implementations 
-- Untargeted between-condition (context/sample) decomposition of cell-cell 
+## New Implementations
+- Untargeted between-condition (context/sample) decomposition of cell-cell
 communication latent patterns /w `tensor_cell2cell`. Makes use of `basilisk` to
 automatically set-up a conda env for liana.
 - added `min_cells` parameter to `liana_wrap`, to exclude any cell identity
@@ -82,7 +86,7 @@ which does not pass a minimum cells threshold.
 
 ## Changes
 - Mouse Consensus resource is now provided by default.
-- The intracellular OmniPath vignette was removed. An updated and more user-friendly one 
+- The intracellular OmniPath vignette was removed. An updated and more user-friendly one
 will be provided in next updates. In the meantime, the old one can still be downloaded
 from [drive](https://drive.google.com/file/d/1lqxHhmz0Jq7eIuQAe0SxvInGgo2U-RlC/view?usp=share_link)
 - Source and Target titles are now plotted by the `liana_dotplot`
@@ -118,7 +122,7 @@ are additional - no need to account for complexes (e.g. also `global_mean`).
 ## Changes
 - Fixed an issue where interactions with complexes will not filtered be according to
 `expr_prop` for some methods. I now filter twice - once via `.filt_liana_pipe`
-for computational speed, and once after `recomplexify` to also remove the 
+for computational speed, and once after `recomplexify` to also remove the
 complexes with `expr_prop` <= X. Will now also filter `crosstalk_scores` to `expr_prop`.
 
 - In the edgecase of complexes with subunits with equal expression, LIANA's internal
@@ -129,7 +133,7 @@ discarded by `liana_aggregate`.
 
 - `liana_doplot` function is now more explicit in the way interactions are
 selected. Will now take `topn` and return the highest ranked interactions.
-Size of dots is also more distinguishable by default and the user can now 
+Size of dots is also more distinguishable by default and the user can now
 pass a customizable value for the size range.
 
 - Added a `rank_method` helper function to rank single methods according to
@@ -161,16 +165,16 @@ complicated cases, such as complex subunits with one-to-many mapping homologs.
 NATMI suggest for between-condition comparisons. Add NATMI to the housekeeping
 aggregate ranking.
 
-- Enable weighing of interactions by cell pairs (using a DF in which each 
+- Enable weighing of interactions by cell pairs (using a DF in which each
 cell pair has  an assigned weight). This would typically be done by spatial
 constraints, etc. These weights can also be used to mask any cell-pair interactions
 which are not relevant (by assigning weights of 0). This currently assumes that
 the weights would be between 0 to 1 - to be extended. Tutorial on this /w appropriate
-spatial weight generation to be written. 
+spatial weight generation to be written.
 
 
 ## Minor Changes
-- By default, the base for logFC will now be automatically assigned depending 
+- By default, the base for logFC will now be automatically assigned depending
 on the object passed to LIANA, i.e. `.antilog1m` for SCE will use 2 as base,
 and Euler's number for Seurat. One could also pass the base they wish to use
 via `liana_wrap`.
@@ -183,16 +187,16 @@ via `liana_wrap`.
 - Changed the aggregation columns of `liana_aggregate`, as in some cases
 methods would assign different subunits as the minimum, which results in
 redundancies for the same complex. As such, `liana_aggregate` will now
-return only the complex columns, nevertheless, the methods will still return 
+return only the complex columns, nevertheless, the methods will still return
 both the minimum (lowest expressed subunit) and it's corresponding complex.
 
-- `base` used to calculate logFC (via `get_log2FC`) can now be passed as 
+- `base` used to calculate logFC (via `get_log2FC`) can now be passed as
 a parameter to `liana_wrap` via `liana_pipe.params`.
 Passing `NaN` to base would result in log2FC calculation using the raw counts
 without any pre-processing (e.g. no batch correction, etc).
 
 The base is by default set to 2, assuming that log2 transformation is performed
-following library size normalization, and thus preserving the normalization, 
+following library size normalization, and thus preserving the normalization,
 while reverting back to ~counts.
 
 ## Minor Changes
@@ -221,7 +225,7 @@ This heatmap was inspired by CellPhoneDB and CellChat.
 # LIANA 0.1.1 (26.04.22)
 
 ## Changes
-- Change the order of  non-expressed genes and empty droplet filtering. 
+- Change the order of  non-expressed genes and empty droplet filtering.
 I now appropriately filter cells in the `sce` object *after* limiting the gene
 universe to ligands and receptors in the resource.
 
@@ -294,7 +298,7 @@ idents/colLabels for SCE and Seurat, respectively.
 ## Changes
 
 - LIANA will now use the `Consensus` resource by default. This is a highly-literature supported resource, generated using similar
-filtering steps as the 'OmniPath' (old default) resource. This resource is similar in size (~4,700 interactions), but contains a 
+filtering steps as the 'OmniPath' (old default) resource. This resource is similar in size (~4,700 interactions), but contains a
 higher complex and curation content.
 
 - All resources might show some very minor changes related to an update of UniProt IDs and homology-conversion improvements.
@@ -325,9 +329,9 @@ and hence returns a all of those columns
 
 - Complexes with missing subunits are not correctly assigned as 'missing' and hence filtered/treated as non-expressed.
 
-- Fixed a bug where LIANA will return the minimum subunit expression, instead of the mean for some methods. 
+- Fixed a bug where LIANA will return the minimum subunit expression, instead of the mean for some methods.
   This stemmed from not properly passing the incorrect `complex_policy` to certain methods, i.e. they were getting a hard-coded value instead.
-  
+
 - Remove `decomplexify` logical from `liana_call` and `liana_pipe` -> redundant.
 
 - edge case fix: liana_aggregate should now rank interactions with the same subunits, but coming from different complexes seperately
@@ -344,7 +348,7 @@ the BioConductor single-cell framework (for all internal methods).
 
 - added `liana_dotplot` as a basic, but flexible, dotplot function for LIANA output. (+ tests)
 
-## Changes  
+## Changes
 
 - LIANA will now perform a basic filtering step, where all genes and cells with 0 summed counts are removed.
 
@@ -353,7 +357,7 @@ the BioConductor single-cell framework (for all internal methods).
 - `assay.type` in `liana_pipe` was passed to `get_logFC` would
 result in using the logcounts, rather than the library-normalized counts.
 
-## Bug Fixes  
+## Bug Fixes
 
 - Fixed a bug where incorrectly passing method names in different cases results in an error.
 
@@ -362,7 +366,7 @@ result in using the logcounts, rather than the library-normalized counts.
 
 - External LIANA methods (i.e. `call_`) are now deprecated. The pipelines will be maintained solely for power users,
 who intend to benchmark the original implementations, but will not be the focus of any downstream analyses.
-These will be solely developed for the internal (or re-implemented methods). These still rely on a `SeuratObject` as 
+These will be solely developed for the internal (or re-implemented methods). These still rely on a `SeuratObject` as
 interface, but will now accept both sce and seurat as input.
 
 
@@ -376,7 +380,7 @@ would be simply NAs and 0s for Crosstalk scores. Should do the same for Connecto
 * `CellChat` and Crosstalk scores/`cytotalk` will no longer by called by default by liana_wrap.
 However, it both are available as an option to be passed to the `method` parameter.
 
-* I now filter all methods by `expr_prop`. This is done in a slightly different manner for Connectome's 
+* I now filter all methods by `expr_prop`. This is done in a slightly different manner for Connectome's
 scaled weights and crosstalk scores, since they require all pairs/clusters to be present to appropriately
 calculate their scores. Thus, for them we filter after we calculate the scores, while for the others methods
 we pre-filter.
@@ -391,7 +395,7 @@ how to [combine LIANA with NicheNet](https://saezlab.github.io/liana/articles/li
 
 * `CellCall` and `Cellinker` resources were added, while talklr was removed. The OmniPath resources itself was filtered further and 1,000
 lower quality interactions were excluded. Further improvements were made to all resources,
-most of which were minor. Changes worth mentioning were made to ICELLNET (updated to latest resource version), 
+most of which were minor. Changes worth mentioning were made to ICELLNET (updated to latest resource version),
 CellPhoneDB (was filtered for ambigous interactions), and CellChatDB was filtered for mislaballed interactions.
 
 
@@ -399,16 +403,16 @@ CellPhoneDB (was filtered for ambigous interactions), and CellChatDB was filtere
 ## Improvements
 * The R re-implementation of CellPhoneDBv2's permutation algorithm was optimized
 to work with sparse matrices (and is now uqicker), and set as the default option
-in LIANA (replacing the re-implementation of the same algorithm from squidpy)  
+in LIANA (replacing the re-implementation of the same algorithm from squidpy)
 
 * Custom proportion filtering - Connectome and CytoTalk are now not filtered by
-expr_prop as this affects the way that their scores are calculated, since they 
+expr_prop as this affects the way that their scores are calculated, since they
 require all clusters/cluster pairs to be present to appropriately scale or
 normalize their scores.
 
 
 ## Bug Fixes
-* Fixed an issue where logFC was assigned only the value of the ligand   
+* Fixed an issue where logFC was assigned only the value of the ligand
 
 
 
@@ -420,15 +424,15 @@ In contrast to the CytoTalk, in our calculation CTS with ligand or receptor with
 PEM of 0 are assigned 0 CTS. Furthermore, we use the inverse of the non-self-talk
 scores calculated in CytoTalk to also allow for autocrine signalling interactions,
 and thus make Crosstalk scores comparable to the rest of the methods in LIANA.
-Finally, as part of LIANA, CytoTalk's re-implemented scores would not take 
+Finally, as part of LIANA, CytoTalk's re-implemented scores would not take
 account of complexes and we also apply liana-specifc filtering such as according
 to `expr_prop`. Worth noting, we only re-implement the cross-talk scores, but we
 don't include the intracellular part of Cytotalk.
 
 ## Changes
 
-* Changed `expr_thresh` to 0.1, based on lack of improvement in performance when using 0.2, hence opted out for the less conservative threshold as default   
-* Changed the way that default parameters are passed to each method  
+* Changed `expr_thresh` to 0.1, based on lack of improvement in performance when using 0.2, hence opted out for the less conservative threshold as default
+* Changed the way that default parameters are passed to each method
 * Enabled housekeeping score aggregation for external methods (needed for revisions) via `.score_housekeep`
 * Fixed Bug where external methods could not be called with their default DB. The resource is now always decomplexified
 * Seurat Testdata is now properly normalized
@@ -442,12 +446,12 @@ don't include the intracellular part of Cytotalk.
 
 `liana_wrap` and `liana_aggragate` as the two highest level functions to run all the methods in liana and aggragate them, respectively.
 
-### Re-implemented the following scores in LIANA:   
+### Re-implemented the following scores in LIANA:
 
-* logFC  
-* NATMI specificity edges  
-* Connectome scaled_weights    
-* CellPhoneDB algorithm   
+* logFC
+* NATMI specificity edges
+* Connectome scaled_weights
+* CellPhoneDB algorithm
 * SingleCellSignalR LRScore
 
 each called via `liana_call`, which leverages the statistics provided by `liana_pipe`,
@@ -456,14 +460,14 @@ each called via `liana_call`, which leverages the statistics provided by `liana_
 
 * Not re-implemented method score names now start with `call_*`
 
-* `decomplexify` and `recomplexify` as functions used to dissociate complexes in resources and 
-account for complexes of the re-implemented methods above   
+* `decomplexify` and `recomplexify` as functions used to dissociate complexes in resources and
+account for complexes of the re-implemented methods above
 
-* `liana_aggragate` - a handy wrapper to aggregate results   
+* `liana_aggragate` - a handy wrapper to aggregate results
 
-* `LIANA` and `LIANA++` are now the user-friendly and benchmark version of LIANA, respectively   
+* `LIANA` and `LIANA++` are now the user-friendly and benchmark version of LIANA, respectively
 
-* A webpage with vignettes showing the validity of the re-implemented methods, a developer/benchmark-focused vignette, and a vignette to customize OmniPath  
+* A webpage with vignettes showing the validity of the re-implemented methods, a developer/benchmark-focused vignette, and a vignette to customize OmniPath
 
 ## Bug fixes