put column_matches_exact back in

basic tests for combine_lipidData
pmartR · Mar 14, 2022 · 6084612 · 6084612
1 parent a72c919
commit 6084612
Show file tree

Hide file tree

Showing 3 changed files with 101 additions and 9 deletions.
diff --git a/R/combine_omicsdata.R b/R/combine_omicsdata.R
@@ -85,6 +85,12 @@ combine_lipidData <- function(obj_1, obj_2, retain_groups = FALSE, retain_filter
       ))
   )
 
+  molnames <- new_edata[,get_edata_cname(obj_1)]
+
+  if(length(molnames) != length(unique(molnames))) {
+    message("Duplicate molecule identifiers were found in your combined data.")
+  }
+
   # Combined f_data is simply a left join, since we require the sample names
   # are the same.
   new_fdata <- obj_1$f_data %>% 
@@ -98,7 +104,7 @@ combine_lipidData <- function(obj_1, obj_2, retain_groups = FALSE, retain_filter
     new_emeta <- dplyr::bind_rows(
       obj_1$e_meta, 
       obj_2$e_meta %>% 
-        rename(setNames(
+        dplyr::rename(setNames(
           get_emeta_cname(obj_2), 
           get_emeta_cname(obj_1)
         ))
@@ -112,8 +118,7 @@ combine_lipidData <- function(obj_1, obj_2, retain_groups = FALSE, retain_filter
 
     if (length(unique(new_emeta_ids)) != 
         length(unique(emeta_ids_1)) + length(unique(emeta_ids_2))) {
-      wrap_warning("There were e_meta identifiers that occurred in both datasets,
-              they have been duplicated in the new object's e_meta.")
+      warning("There were e_meta identifiers that occurred in both datasets, they have been duplicated in the new object's e_meta.")
     }
 
   } else{
@@ -167,7 +172,11 @@ combine_lipidData <- function(obj_1, obj_2, retain_groups = FALSE, retain_filter
     }) %>% unlist()
 
     covariate_matches = if(!is.null(attr(group_df, "covariates"))) {
-      lapply(attr(group_df, "covariates"), function(x) {
+      covnames = attr(group_df, "covariates") %>% 
+        dplyr::select(-one_of(get_fdata_cname(obj_1))) %>% 
+        colnames()
+
+      lapply(covnames, function(x) {
         column_matches_exact(samp_info, tmp_fdata[,x])[1]
       }) %>% unlist()
     } else NULL

diff --git a/R/helper_fn.R b/R/helper_fn.R
@@ -979,13 +979,32 @@ get_group_table <- function (omicsObject) {
 
 }
 
-#' Custom message functions to pretty-print text with newlines so you can follow
+#'Helper to find the names of columns of a data.frame that contain exactly all
+#'the elements of an input column
+#'
+#'@param df A data.frame whose columns we want to match to some query column.
+#'@param col Vector of values which will be compared to a column in df.
+#'
+#'@return vector of column names of df that contain exactly all the elements of
+#'the input column
+#'
+#'@keywords internal
+column_matches_exact <- function(df, col) {
+  diffs = lapply(df, function(df_col) {
+    length(setdiff(
+      union(df_col, col),
+      intersect(df_col, col)
+    ))
+  })
+
+  matched_cnames = names(diffs)[which(diffs == 0)] 
+
+  return(matched_cnames)
+}
+
+#' Custom message function to pretty-print text with newlines so you can follow
 #' character limit guidelines in source code.
 #' @noRd
 wrap_message <- function(..., prefix = " ", initial = ""){
   message(strwrap(..., prefix = prefix, initial = initial))
 }
-
-wrap_warning <- function(..., prefix = " ", initial = ""){
-  warning(strwrap(..., prefix = prefix, initial = initial))
-}
diff --git a/tests/testthat/test_combine_omicsData.R b/tests/testthat/test_combine_omicsData.R
@@ -0,0 +1,64 @@
+source(system.file('testdata', 'load_data.R', package = 'pmartR'))
+
+obj1 <- edata_transform(ldata, "log2")
+obj1 <- normalize_global(obj1, "all", "median", apply_norm = T)
+
+fake_cov <- c(rep("A", 5), rep("B", 6))
+obj1$f_data["cov"] <- fake_cov
+
+obj1 <- group_designation(obj1, "Condition", covariates = "cov")
+obj2 <- obj1
+
+# Some fake edata ID's to make it unique
+obj2$e_data[,get_edata_cname(obj2)] <- paste0("obj2_", obj2$e_data[,get_edata_cname(obj2)])
+obj2$e_meta[,get_edata_cname(obj2)] <- paste0("obj2_", obj2$e_meta[,get_edata_cname(obj2)])
+
+obj1 <- applyFilt(molecule_filter(obj1),obj1, min_num = 2)
+obj2 <- applyFilt(cv_filter(obj2),obj2, cv_thresh = 60)
+
+suppressWarnings({
+  combn1 <- combine_lipidData(obj1, obj2)
+  combn2 <- combine_lipidData(obj1, obj2, retain_groups = T)
+  combn3 <- combine_lipidData(obj1, obj2, retain_groups = F, retain_filters = T)
+  combn4 <- combine_lipidData(obj1, obj2, retain_groups = T, retain_filters = T)
+})
+
+test_that("attributes correctly stored", {
+  expect_true(all(
+    is.null(attr(combn1, "group_DF")),
+    length(attr(combn1, "filters")) == 0
+  ))
+
+  # drop filters and grouping info
+  expect_true(all(
+    !is.null(attr(combn2, "group_DF")),
+    length(attr(combn2, "filters")) == 0
+  ))
+
+  # no filters, keep groups
+  expect_true(all(
+    !is.null(attr(combn2, "group_DF")),
+    length(attr(combn2, "filters")) == 0
+  ))
+
+  ### no groups, keep filters
+  ftypes <- attr(combn3, "filters") %>% 
+    lapply(function(x) x$type)
+
+  expect_true(all(
+    is.null(attr(combn3, "group_DF")),
+    length(ftypes) == 2,
+    all(ftypes == c("moleculeFilt", "cvFilt"))
+  ))
+
+  ftypes <- attr(combn4, "filters") %>% 
+    lapply(function(x) x$type)
+
+  # keep both filters and groups
+  expect_true(all(
+    !is.null(attr(combn4, "group_DF")),
+    length(ftypes) == 2,
+    all(ftypes == c("moleculeFilt", "cvFilt"))
+  ))
+})
+