a lot done

rscherrer · Jun 8, 2021 · 38cb018 · 38cb018
1 parent 124b3f3
commit 38cb018
Show file tree

Hide file tree

Showing 63 changed files with 580 additions and 214 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -15,6 +15,7 @@ export(give_edges_coordinates)
 export(guess_nedges)
 export(guess_nloci)
 export(guess_nrows)
+export(interpret_variable_names)
 export(is_complete)
 export(is_extant)
 export(is_extinct)
@@ -31,15 +32,20 @@ export(read_arch)
 export(read_arch_file)
 export(read_arch_genome)
 export(read_arch_network)
+export(read_architecture)
 export(read_binary)
 export(read_bitset)
 export(read_data)
+export(read_edges)
 export(read_genome)
 export(read_indiv_loci)
+export(read_individual_genomes)
 export(read_individuals)
+export(read_loci)
 export(read_network)
 export(read_param)
 export(read_param_file)
+export(read_parameters)
 export(read_pedigree)
 export(read_pop)
 export(read_population)
@@ -48,6 +54,7 @@ export(read_slurm_status)
 export(read_speciome)
 export(read_this)
 export(rename_str)
+export(rm_plural_colnames)
 export(set_param_file)
 export(smoothen_data)
 export(trait_colors)

diff --git a/R/interpret_variable_names.R b/R/interpret_variable_names.R
@@ -0,0 +1,81 @@
+#' Interpret variable names
+#'
+#' Adds labels to variable names that are provided without labels
+#'
+#' @param x Vector of names of variables
+#' @param type The label to add (either of "locus", "edge", "individual" or "trait")
+#'
+#' @details The function adds the specified label to the variable names. For example,
+#' "Fst" becomes "locus_Fst" if \code{type = "locus"}. It will not add a label
+#' if it detects that the label is already there, i.e. "locus_Fst" will stay "locus_Fst".
+#'
+#' @return A vector of strings
+#'
+#' @note In the case of \code{type = "trait"} some variable names will be considered
+#' exceptions to which not to add a label. These are the variables that are saved
+#' per time point and/or per ecotype or habitat but not per trait
+#' ("EI", "RI", "SI", "population_sizes", "ecotype_population_sizes" and "habitat_resources")
+#'
+#' @examples
+#'
+#' interpret_variable_names(c("Fst", "Qst"), "locus")
+#' interpret_variable_names(c("corbreed", "corfreq"), "edge")
+#'
+#' @export
+
+interpret_variable_names <- function(x, type) {
+
+  if (type == "locus") return(interpret_variable_names_locus(x))
+  if (type == "edge") return(interpret_variable_names_edge(x))
+  if (type == "individual") return(interpret_variable_names_individual(x))
+  if (type == "trait") return(interpret_variable_names_trait(x))
+
+}
+
+interpret_variable_names_locus <- function(x) {
+
+  # Add locus label to the rest that do not have it
+  is_locus <- stringr::str_detect(x, "locus_")
+  x[!is_locus] <- paste0("locus_", x[!is_locus])
+
+  return(x)
+
+}
+
+interpret_variable_names_edge <- function(x) {
+
+  # Add edge label to the variables that do not have it
+  is_edge <- stringr::str_detect(x, "edge_")
+  x[!is_edge] <- paste0("edge_", x[!is_edge])
+
+  return(x)
+
+}
+
+interpret_variable_names_individual <- function(x) {
+
+  # Add individual label to the variables that do not have it
+  is_individual <- stringr::str_detect(x, "individual_")
+  x[!is_individual] <- paste0("individual_", x[!is_individual])
+
+  return(x)
+
+}
+
+interpret_variable_names_trait <- function(x) {
+
+  # Add trait label to the variables that do not have it
+  is_trait <- stringr::str_detect(x, "trait_")
+
+  # Except for these exceptions
+  exceptions <- c(
+    "EI", "RI", "SI", "population_sizes", "ecotype_population_sizes",
+    "habitat_resources"
+  )
+
+  is_trait[x %in% exceptions] <- TRUE
+  x[!is_trait] <- paste0("trait_", x[!is_trait])
+
+  return(x)
+
+}
diff --git a/R/read_architecture.R b/R/read_architecture.R
@@ -11,7 +11,7 @@
 #'
 #' @examples
 #'
-#' root <- "inst/extdata/sim-example"
+#' root <- system.file("extdata", "sim-example", package = "speciomer")
 #' read_architecture(root)
 #'
 #' @export
@@ -58,8 +58,9 @@ read_architecture <- function(root) {
 
   # Store locus-wise fields in a tibble
   nodes <- with(arch, tibble::tibble(
+    locus = seq(length(traits)),
     chromosome = purrr::map_int(locations, ~ which(.x < chromosomes)[1]),
-    trait = traits + 1,
+    trait = as.integer(traits + 1),
     location = locations,
     effect = effects,
     dominance = dominances
@@ -78,15 +79,35 @@ read_architecture <- function(root) {
     curr_network <- purrr::map_dfc(arch[is_curr_network], ~ .x)
 
     # Rename the columns
-    colnames(curr_network) <- c("from", "to", "weights")
+    colnames(curr_network) <- c("from", "to", "weight")
+
+    # Correct indexing of loci
+    curr_network <- curr_network %>% dplyr::mutate(from = from + 1, to = to + 1)
 
     # Add a column for the trait
-    curr_network <- curr_network %>% dplyr::mutate(trait = curr_trait + 1)
+    curr_network <- curr_network %>%
+      dplyr::mutate(trait = as.integer(curr_trait + 1))
 
     return(curr_network)
 
   })
 
+  # Add edge identifier
+  edges <- edges %>%
+    tibble::add_column(edge = seq(nrow(edges)), .before = "from")
+
+  # Additional locus-wise data from the network
+  nodes_extra <- edges %>%
+    tidyr::pivot_longer(cols = c(from, to), values_to = "locus") %>%
+    dplyr::group_by(locus) %>%
+    dplyr::summarize(
+      degree = dplyr::n(),
+      max_abs_weight = max(abs(weight))
+    )
+
+  # Add them
+  nodes <- nodes %>% dplyr::right_join(nodes_extra)
+
   # Make a list of the two tibbles
   arch <- list(nodes = nodes, edges = edges)
 

diff --git a/R/read_binary.R b/R/read_binary.R
@@ -8,7 +8,8 @@
 #'
 #' @examples
 #'
-#' read_binary("inst/extdata/sim-example/time.dat")
+#' root <- system.file("extdata", "sim-example", package = "speciomer")
+#' read_binary(paste0(root, "/time.dat"))
 #'
 #' @export
 

diff --git a/R/read_bitset.R b/R/read_bitset.R
@@ -1,23 +1,15 @@
-#' Read bitset
+#' Read a bitset
 #'
-#' Read a binary file bit-wise and return a vector of integers (0/1)
-#' (e.g. whole genomes of the whole population)
+#' Reads a binary file bit-wise and returns a vector of binary integers (0 or 1)
 #'
 #' @param filename Path to the file to read
 #'
 #' @return A vector of integers
 #'
 #' @examples
 #'
-#' \dontrun{
-#'
-#' # Location of the simulation folder
-#' root <- "data/example_1"
-#'
-#' f <- file.path(root, "time.dat")
-#' read_bitset(f)
-#'
-#' }
+#' root <- system.file("extdata", "sim-indiv-genomes", package = "speciomer")
+#' read_bitset(paste0(root, "/individual_whole_genomes.dat"))
 #'
 #' @export
 

diff --git a/R/read_edges.R b/R/read_edges.R
@@ -4,7 +4,7 @@
 #' edge per time point.
 #'
 #' @param root Path to the simulation folder
-#' @param variables Vector of names of variable to read
+#' @param variables Vector of names of variable to read (will be interpreted using \code{interpret_variable_names})
 #' @param architecture Whether to append edge-wise genetic architecture parameters
 #' (see \code{?read_architecture}).
 #'
@@ -20,13 +20,16 @@
 #'
 #' @examples
 #'
-#' root <- "inst/extdata/sim-example"
+#' root <- system.file("extdata", "sim-example", package = "speciomer")
 #' read_edges(root, "edge_corbreed")
 #'
 #' @export
 
 read_edges <- function(root, variables, architecture = TRUE) {
 
+  # Add an edge prefix to the variable names if needed
+  variables <- interpret_variable_names(variables, type = "edge")
+
   # Count the number of edges
   parameters <- read_parameters(root)
   nedges <- parameters[["nedges"]]
@@ -43,18 +46,20 @@ read_edges <- function(root, variables, architecture = TRUE) {
   # Remove the prefix "edge", now redundant
   colnames(data) <- stringr::str_remove(colnames(data), "edge_")
 
+  # Add an edge identifier
+  data <- data %>%
+    tibble::add_column(edge = 1, .after = "time") %>%
+    dplyr::group_by(time) %>%
+    dplyr::mutate(edge = seq(dplyr::n())) %>%
+    dplyr::ungroup()
+
   if (!architecture) return(data)
 
   # Read the genetic architecture
   arch <- read_architecture(root)[["edges"]]
 
   # Append it to each time point
-  data <- data %>%
-    dplyr::group_by(time) %>%
-    tidyr::nest() %>%
-    dplyr::mutate(arch = purrr::map(time, ~ arch)) %>%
-    tidyr::unnest(cols = c(data, arch)) %>%
-    dplyr::ungroup()
+  data <- data %>% dplyr::left_join(arch)
 
   return(data)
 

diff --git a/R/read_individual_genomes.R b/R/read_individual_genomes.R
@@ -20,20 +20,17 @@
 #'
 #' @examples
 #'
-#' root <- "inst/extdata/sim-example/"
+#' root <- system.file("extdata", "sim-indiv-genomes", package = "speciomer")
 #' read_individual_genomes(root)
-#' read_individual_genomes(
-#'   root, individual_variables = c("individual_traits", "individual_ecotypes"),
-#'   individual_ncols = c(3, 1)
-#' )
+#' read_individual_genomes(root, "individual_ecotypes")
 #' read_individual_genomes(root, locus_variables = "locus_Fst")
 #'
 #' @export
 
 read_individual_genomes <- function(
 
   root, individual_variables = NULL, individual_ncols = NULL,
-  locus_variables = NULL, locus_architecture = TRUE
+  locus_variables = NULL, locus_architecture = FALSE
 
 ) {
 
@@ -99,11 +96,7 @@ read_individual_genomes <- function(
     individual_data <- read_individuals(root, individual_variables, individual_ncols)
 
     # Add them
-    data <- data %>%
-      dplyr::group_by(individual, time) %>%
-      tidyr::nest() %>%
-      dplyr::bind_cols(individual_data %>% dplyr::select(-time)) %>%
-      tidyr::unnest(cols = data)
+    data <- data %>% dplyr::right_join(individual_data)
 
   }
 
@@ -113,11 +106,7 @@ read_individual_genomes <- function(
     locus_data <- read_loci(root, locus_variables, locus_architecture)
 
     # Add them
-    data <- data %>%
-      dplyr::group_by(locus, time) %>%
-      tidyr::nest() %>%
-      dplyr::bind_cols(locus_data %>% dplyr::select(-time)) %>%
-      tidyr::unnest(cols = data)
+    data <- data %>% dplyr::right_join(locus_data)
 
   }
 
@@ -127,11 +116,7 @@ read_individual_genomes <- function(
     arch <- read_architecture(root)[["nodes"]]
 
     # Append it
-    data <- data %>%
-      dplyr::group_by(locus) %>%
-      tidyr::nest() %>%
-      dplyr::bind_cols(arch) %>%
-      tidyr::unnest(data)
+    data <- data %>% dplyr::right_join(arch)
 
   }
 

diff --git a/R/read_individuals.R b/R/read_individuals.R
@@ -21,14 +21,17 @@
 #'
 #' @examples
 #'
-#' root <- "inst/extdata/sim-example"
+#' root <- system.file("extdata", "sim-example", package = "speciomer")
 #' read_individuals(root, "individual_ecotypes")
 #' read_individuals(root, c("individual_traits", "individual_ecotypes"), ncols = c(3, 1))
 #'
 #' @export
 
 read_individuals <- function(root, variables, ncols = NULL) {
 
+  # Add an individual prefix to the variable names if needed
+  variables <- interpret_variable_names(variables, type = "individual")
+
   if (is.null(ncols)) ncols <- rep(1, length(variables))
 
   # Add time to the list of variables to read
@@ -45,6 +48,9 @@ read_individuals <- function(root, variables, ncols = NULL) {
   # Remove the prefix "individual" from the column names
   colnames(data) <- stringr::str_remove(colnames(data), "individual_")
 
+  # Add an individual identifier
+  data <- data %>% tibble::add_column(individual = seq(nrow(data)), .after = "time")
+
   # Remove plurals in column names
   colnames(data) <- rm_plural_colnames(colnames(data))