pachterlab · andrewrech · Jun 27, 2018 · Jun 27, 2018 · Jun 28, 2018 · Feb 4, 2019
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -19,7 +19,6 @@ Imports:
     dplyr,
     data.table,
     tidyr,
-    reshape2,
     rhdf5,
     parallel,
     lazyeval,

diff --git a/NAMESPACE b/NAMESPACE
@@ -31,6 +31,7 @@ export(design_matrix)
 export(enclosed_brush)
 export(excluded_ids)
 export(extract_model)
+export(gene_from_gene)
 export(get_bootstrap_summary)
 export(get_bootstraps)
 export(get_quantile)
@@ -40,6 +41,7 @@ export(log_transform)
 export(melt_bootstrap_sleuth)
 export(models)
 export(norm_factors)
+export(head)
 export(plot_bootstrap)
 export(plot_fld)
 export(plot_group_density)
@@ -86,3 +88,4 @@ importFrom(lazyeval,interp)
 importFrom(lazyeval,lazy)
 importFrom(rhdf5,h5write)
 importFrom(rhdf5,h5write.default)
+importFrom(utils, head)
diff --git a/R/bootstrap.R b/R/bootstrap.R
@@ -485,6 +485,7 @@ process_bootstrap <- function(i, samp_name, kal_path,
                                           mappings)
     # this step undoes the tidying to get back a matrix format
     # target_ids here are now the aggregation column ids
+    scaled_bs <- data.table::as.data.table(scaled_bs)
     bs_mat <- data.table::dcast(scaled_bs, sample ~ target_id,
                                 value.var = "scaled_reads_per_base")
     # this now has the same format as the transcript matrix

diff --git a/R/fix_head.R b/R/fix_head.R
@@ -0,0 +1,6 @@
+
+## ---- head
+#' Fix unexported head error.
+#'
+#' @export head
+head <- utils::head
diff --git a/R/matrix.R b/R/matrix.R
@@ -24,12 +24,14 @@
 #' @param which_df character vector of length one. Which type of data to use
 #' ("obs_norm" or "obs_raw")
 #' @param which_units character vector of length one. Which units to use ("tpm"
-#' or "est_counts")
-#' @return a matrix which contains a matrix of target_ids and transcript expression in \code{which_units}
+#' or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses))
+#' @return a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}.
+#'   Note this currently does not support returning raw values for gene-level counts or TPMs.
 #' @examples
 #' sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm')
 #' head(sleuth_matrix) # look at first 5 transcripts, sorted by name
 #' @export
+#' importFrom utils head
 sleuth_to_matrix <- function(obj, which_df, which_units) {
   if ( !(which_df %in% c("obs_norm", "obs_raw")) ) {
     stop("Invalid object")

diff --git a/R/plots.R b/R/plots.R
@@ -1035,13 +1035,13 @@ plot_transcript_heatmap <- function(obj,
 
   if (units == 'tpm') {
     tabd_df <- dplyr::select(tabd_df, target_id, sample, tpm)
-    tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, value.var = 'tpm')
+    tabd_df <- data.table::dcast(tabd_df, target_id ~sample, value.var = 'tpm')
   } else if (units == 'est_counts') {
     tabd_df <- dplyr::select(tabd_df, target_id, sample, est_counts)
-    tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, value.var = 'est_counts')
+    tabd_df <- data.table::dcast(tabd_df, target_id ~sample, value.var = 'est_counts')
   } else if (units == 'scaled_reads_per_base') {
     tabd_df <- dplyr::select(tabd_df, target_id, sample, scaled_reads_per_base)
-    tabd_df <- reshape2::dcast(tabd_df, target_id ~sample,
+    tabd_df <- data.table::dcast(tabd_df, target_id ~sample,
                                value.var = 'scaled_reads_per_base')
   } else {
     stop("Didn't recognize the following unit: ", units)

diff --git a/R/sleuth.R b/R/sleuth.R
@@ -1007,7 +1007,8 @@ kallisto_table <- function(obj,
 # @return a matrix with the appropriate names
 obs_to_matrix <- function(obj, value_name) {
 
-  obs_counts <- reshape2::dcast(obj$obs_norm, target_id ~ sample,
+  obj$obs_norm <- data.table::as.data.table(obj$obs_norm)
+  obs_counts <- data.table::dcast(obj$obs_norm, target_id ~ sample,
     value.var = value_name)
 
   obs_counts <- as.data.frame(obs_counts)
@@ -1091,6 +1092,7 @@ summary.sleuth <- function(obj, covariates = TRUE) {
 #' head(sleuth_genes) # show info for first 5 genes
 #' sleuth_genes[1:5, 6] # show transcripts for first 5 genes
 #' @export
+#' @importFrom utils head                    
 sleuth_gene_table <- function(obj, test, test_type = 'lrt', which_model = 'full', which_group = 'ens_gene') {
 
   if (is.null(obj$target_mapping)) {
@@ -1150,6 +1152,52 @@ transcripts_from_gene <- function(obj, test, test_type,
   table$target_id[table[, 2] == gene_name]
 }
 
+#' Get the gene ID using other gene identifiers
+#'
+#' Get the \code{target_id} of a gene using other gene identifiers.
+#' The identifiers found under the \code{obj$gene_column} are often
+#' difficult to remember (e.g. ensembl gene ID, ENSG00000111640).
+#' This function allows a user to find that difficult-to-remember
+#' identifier using more-easily-remembered identifiers, such as
+#' gene symbol (e.g. "GAPDH").
+#'
+#' @param obj a \code{sleuth} object
+#' @param gene_colname the name of the column containing 'gene_name'.
+#'   This parameter refers to the name of the column that the gene you are searching for appears in.
+#'   Check the column names using \code{colnames(obj$target_mapping)}.
+#' @param gene_name a string containing the name of the gene you are interested in.
+#' @return a character vector containing the \code{target_id} of the gene, found under
+#'   \code{obj$gene_column} within \code{obj$target_mapping}.
+#'   If the column name provided is the same as \code{obj$gene_column}, and the
+#'   gene_name used is found, that gene_name will be returned.
+#' @examples
+#'   \dontrun{gene_from_gene(obj, "gene_symbol", "GAPDH")}
+#' @export
+gene_from_gene <- function(obj, gene_colname, gene_name) {
+
+  if (!obj$gene_mode) {
+    stop("this sleuth object is in transcript mode. Please use 'transcripts_from_gene' instead.")
+  }
+
+  table <- as.data.frame(obj$target_mapping)
+  if (gene_colname == obj$gene_column) {
+    if (!(gene_name %in% table[, eval(parse(text = obj$gene_column))])) {
+      stop("Couldn't find gene ", gene_name)
+    } else {
+      return(gene_name)
+    }
+  }
+
+  table <- unique(dplyr::select_(table, obj$gene_column, gene_colname))
+  if (!(gene_name %in% table[, 2])) {
+    stop("Couldn't find gene ", gene_name)
+  }
+  hits <- unique(table[table[,2] == gene_name, 1])
+  if (length(hits) > 1) {
+    warning("there was more than one gene ID that matched this identifier; taking the first one")
+  }
+  hits[1]
+ }
 
 #' Change sleuth transform counts function
 #'

diff --git a/man/gene_from_gene.Rd b/man/gene_from_gene.Rd
diff --git a/man/sleuth_to_matrix.Rd b/man/sleuth_to_matrix.Rd