From 9acf16e38a8dd2fa177c7e7ddae85edaae4b711f Mon Sep 17 00:00:00 2001
From: warrenmcg <warren-mcgee@fsm.northwestern.edu>
Date: Wed, 27 Jun 2018 14:23:01 -0500
Subject: [PATCH 01/11] add back in deleted 'gene_from_gene' method to fix #190

---
 NAMESPACE             |  1 +
 R/sleuth.R            | 46 +++++++++++++++++++++++++++++++++++++++++++
 man/gene_from_gene.Rd | 34 ++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+)
 create mode 100644 man/gene_from_gene.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 01d495b..bb5f80d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -31,6 +31,7 @@ export(design_matrix)
 export(enclosed_brush)
 export(excluded_ids)
 export(extract_model)
+export(gene_from_gene)
 export(get_bootstrap_summary)
 export(get_bootstraps)
 export(get_quantile)
diff --git a/R/sleuth.R b/R/sleuth.R
index fea2305..1c1e6bd 100644
--- a/R/sleuth.R
+++ b/R/sleuth.R
@@ -1150,6 +1150,52 @@ transcripts_from_gene <- function(obj, test, test_type,
   table$target_id[table[, 2] == gene_name]
 }
 
+#' Get the gene ID using other gene identifiers
+#'
+#' Get the \code{target_id} of a gene using other gene identifiers.
+#' The identifiers found under the \code{obj$gene_column} are often
+#' difficult to remember (e.g. ensembl gene ID, ENSG00000111640).
+#' This function allows a user to find that difficult-to-remember
+#' identifier using more-easily-remembered identifiers, such as
+#' gene symbol (e.g. "GAPDH").
+#'
+#' @param obj a \code{sleuth} object
+#' @param gene_colname the name of the column containing 'gene_name'.
+#'   This parameter refers to the name of the column that the gene you are searching for appears in.
+#'   Check the column names using \code{colnames(obj$target_mapping)}.
+#' @param gene_name a string containing the name of the gene you are interested in.
+#' @return a character vector containing the \code{target_id} of the gene, found under
+#'   \code{obj$gene_column} within \code{obj$target_mapping}.
+#'   If the column name provided is the same as \code{obj$gene_column}, and the
+#'   gene_name used is found, that gene_name will be returned.
+#' @examples
+#'   \dontrun{gene_from_gene(obj, "gene_symbol", "GAPDH")}
+#' @export
+gene_from_gene <- function(obj, gene_colname, gene_name) {
+
+  if (!obj$gene_mode) {
+    stop("this sleuth object is in transcript mode. Please use 'transcripts_from_gene' instead.")
+  }
+
+  table <- as.data.frame(obj$target_mapping)
+  if (gene_colname == obj$gene_column) {
+    if (!(gene_name %in% table[, eval(parse(text = obj$gene_column))])) {
+      stop("Couldn't find gene ", gene_name)
+    } else {
+      return(gene_name)
+    }
+  }
+
+  table <- unique(dplyr::select_(table, obj$gene_column, gene_colname))
+  if (!(gene_name %in% table[, 2])) {
+    stop("Couldn't find gene ", gene_name)
+  }
+  hits <- unique(table[table[,2] == gene_name, 1])
+  if (length(hits) > 1) {
+    warning("there was more than one gene ID that matched this identifier; taking the first one")
+  }
+  hits[1]
+ }
 
 #' Change sleuth transform counts function
 #'
diff --git a/man/gene_from_gene.Rd b/man/gene_from_gene.Rd
new file mode 100644
index 0000000..0f7077e
--- /dev/null
+++ b/man/gene_from_gene.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sleuth.R
+\name{gene_from_gene}
+\alias{gene_from_gene}
+\title{Get the gene ID using other gene identifiers}
+\usage{
+gene_from_gene(obj, gene_colname, gene_name)
+}
+\arguments{
+\item{obj}{a \code{sleuth} object}
+
+\item{gene_colname}{the name of the column containing 'gene_name'.
+This parameter refers to the name of the column that the gene you are searching for appears in.
+Check the column names using \code{colnames(obj$target_mapping)}.}
+
+\item{gene_name}{a string containing the name of the gene you are interested in.}
+}
+\value{
+a character vector containing the \code{target_id} of the gene, found under
+  \code{obj$gene_column} within \code{obj$target_mapping}.
+  If the column name provided is the same as \code{obj$gene_column}, and the
+  gene_name used is found, that gene_name will be returned.
+}
+\description{
+Get the \code{target_id} of a gene using other gene identifiers.
+The identifiers found under the \code{obj$gene_column} are often
+difficult to remember (e.g. ensembl gene ID, ENSG00000111640).
+This function allows a user to find that difficult-to-remember
+identifier using more-easily-remembered identifiers, such as
+gene symbol (e.g. "GAPDH").
+}
+\examples{
+  \dontrun{gene_from_gene(obj, "gene_symbol", "GAPDH")}
+}

From 50b35d0629c7f46fa8303b08b3ce30efce462b31 Mon Sep 17 00:00:00 2001
From: warrenmcg <warren-mcgee@fsm.northwestern.edu>
Date: Wed, 27 Jun 2018 14:23:27 -0500
Subject: [PATCH 02/11] fix outdated documentation for 'sleuth_to_matrix'

---
 R/matrix.R              | 5 +++--
 man/sleuth_to_matrix.Rd | 8 ++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/R/matrix.R b/R/matrix.R
index 4ffecf3..b0fff75 100644
--- a/R/matrix.R
+++ b/R/matrix.R
@@ -24,8 +24,9 @@
 #' @param which_df character vector of length one. Which type of data to use
 #' ("obs_norm" or "obs_raw")
 #' @param which_units character vector of length one. Which units to use ("tpm"
-#' or "est_counts")
-#' @return a matrix which contains a matrix of target_ids and transcript expression in \code{which_units}
+#' or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses))
+#' @return a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}.
+#'   Note this currently does not support returning raw values for gene-level counts or TPMs.
 #' @examples
 #' sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm')
 #' head(sleuth_matrix) # look at first 5 transcripts, sorted by name
diff --git a/man/sleuth_to_matrix.Rd b/man/sleuth_to_matrix.Rd
index bc4bc30..c5e0f6d 100644
--- a/man/sleuth_to_matrix.Rd
+++ b/man/sleuth_to_matrix.Rd
@@ -13,16 +13,16 @@ sleuth_to_matrix(obj, which_df, which_units)
 ("obs_norm" or "obs_raw")}
 
 \item{which_units}{character vector of length one. Which units to use ("tpm"
-or "est_counts")}
+or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses))}
 }
 \value{
-a \code{list} with an attribute 'data', which contains a matrix of target_ids
-        and transcript expression in \code{which_units}
+a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}.
+  Note this currently does not support returning raw values for gene-level counts or TPMs.
 }
 \description{
 Convert a sleuth object to a matrix with the condition names.
 }
 \examples{
 sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm')
-head(sleuth_matrix$data) # look at first 5 transcripts, sorted by name
+head(sleuth_matrix) # look at first 5 transcripts, sorted by name
 }

From f94976bf1f1c7aaae3268c1372e12bd30541e784 Mon Sep 17 00:00:00 2001
From: "Andrew J. Rech" <rech@rech.io>
Date: Mon, 4 Feb 2019 18:22:55 -0500
Subject: [PATCH 03/11] Improve obs_to_matrix with data.table

Replace reshape2::dcast with data.table::dcadt for speed gain and to avoid issue with long sample names causing vector return error in reshape2 but not data.table:

Using reshape2:

```
 obs_counts <- dcast(obj$obs_norm, target_id ~ sample, value.var = value_name)
Aggregation function missing: defaulting to length
Error during wrapup: dims [product 868328] do not match the length of object [41674225]
```

But no error using data.table.

[`obj_norm`](https://s3.amazonaws.com/rech-ul/obj_norm.RDS?AWSAccessKeyId=AKIAI6SXE4VOIPIZJI6Q&Expires=1601162547&Signature=3lPIt6yZ2UPFhMioUwwRopzo8eM%3D) to reproduce:
---
 R/sleuth.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/sleuth.R b/R/sleuth.R
index 1c1e6bd..08523fe 100644
--- a/R/sleuth.R
+++ b/R/sleuth.R
@@ -1007,7 +1007,8 @@ kallisto_table <- function(obj,
 # @return a matrix with the appropriate names
 obs_to_matrix <- function(obj, value_name) {
 
-  obs_counts <- reshape2::dcast(obj$obs_norm, target_id ~ sample,
+  data.table::as.data.table(obj$obs_norm) <- data.table::as.data.table(obj$obs_norm)
+  obs_counts <- data.table::dcast(obj$obs_norm, target_id ~ sample,
     value.var = value_name)
 
   obs_counts <- as.data.frame(obs_counts)

From 6a1e85d1b364e57f35b10bc0a60d29cb933a61d1 Mon Sep 17 00:00:00 2001
From: "Andrew J. Rech" <rech@rech.io>
Date: Mon, 4 Feb 2019 18:54:34 -0500
Subject: [PATCH 04/11] typo

---
 R/sleuth.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/sleuth.R b/R/sleuth.R
index 08523fe..4ec6a8b 100644
--- a/R/sleuth.R
+++ b/R/sleuth.R
@@ -1007,7 +1007,7 @@ kallisto_table <- function(obj,
 # @return a matrix with the appropriate names
 obs_to_matrix <- function(obj, value_name) {
 
-  data.table::as.data.table(obj$obs_norm) <- data.table::as.data.table(obj$obs_norm)
+  obj$obs_norm <- data.table::as.data.table(obj$obs_norm)
   obs_counts <- data.table::dcast(obj$obs_norm, target_id ~ sample,
     value.var = value_name)
 

From ea5a3904d92efa15d56002bbc5e28b7bf57be70b Mon Sep 17 00:00:00 2001
From: andrewrech <rech@rech.io>
Date: Mon, 4 Feb 2019 19:12:49 -0500
Subject: [PATCH 05/11] data.table::dcast used on data frame

---
 R/bootstrap.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/bootstrap.R b/R/bootstrap.R
index 6e1d11f..67edaac 100644
--- a/R/bootstrap.R
+++ b/R/bootstrap.R
@@ -485,6 +485,7 @@ process_bootstrap <- function(i, samp_name, kal_path,
                                           mappings)
     # this step undoes the tidying to get back a matrix format
     # target_ids here are now the aggregation column ids
+    scaled_bs <- data.table::as.data.table(scaled_bs)
     bs_mat <- data.table::dcast(scaled_bs, sample ~ target_id,
                                 value.var = "scaled_reads_per_base")
     # this now has the same format as the transcript matrix

From 395bcd8c2fa205ba92195fc0d3ba7568c6d8d60a Mon Sep 17 00:00:00 2001
From: andrewrech <rech@rech.io>
Date: Sat, 23 Feb 2019 19:57:04 -0500
Subject: [PATCH 06/11] Bandaid for infernal issue
 https://github.com/pachterlab/sleuth/issues/135

---
 NAMESPACE    | 1 +
 R/fix_head.R | 6 ++++++
 2 files changed, 7 insertions(+)
 create mode 100644 R/fix_head.R

diff --git a/NAMESPACE b/NAMESPACE
index bb5f80d..b76159c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -41,6 +41,7 @@ export(log_transform)
 export(melt_bootstrap_sleuth)
 export(models)
 export(norm_factors)
+export(head)
 export(plot_bootstrap)
 export(plot_fld)
 export(plot_group_density)
diff --git a/R/fix_head.R b/R/fix_head.R
new file mode 100644
index 0000000..65fdc0d
--- /dev/null
+++ b/R/fix_head.R
@@ -0,0 +1,6 @@
+
+## ---- head
+#' Fix unexported head error.
+#'
+#' @export head
+head <- utils::head

From d05cd9b9c48b85a5a5b132681982fb10005b1a08 Mon Sep 17 00:00:00 2001
From: "Andrew J. Rech" <rech@rech.io>
Date: Sat, 16 Mar 2019 11:03:42 -0400
Subject: [PATCH 07/11] importFrom utils head

---
 R/sleuth.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/sleuth.R b/R/sleuth.R
index 4ec6a8b..d387818 100644
--- a/R/sleuth.R
+++ b/R/sleuth.R
@@ -1092,6 +1092,7 @@ summary.sleuth <- function(obj, covariates = TRUE) {
 #' head(sleuth_genes) # show info for first 5 genes
 #' sleuth_genes[1:5, 6] # show transcripts for first 5 genes
 #' @export
+#' @importFrom utils head                    
 sleuth_gene_table <- function(obj, test, test_type = 'lrt', which_model = 'full', which_group = 'ens_gene') {
 
   if (is.null(obj$target_mapping)) {

From ed95e72ccdec960c0a5c2746e677c53ee8c95996 Mon Sep 17 00:00:00 2001
From: "Andrew J. Rech" <rech@rech.io>
Date: Sat, 16 Mar 2019 11:04:51 -0400
Subject: [PATCH 08/11] importFrom utils head

---
 R/matrix.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/matrix.R b/R/matrix.R
index b0fff75..ee17e41 100644
--- a/R/matrix.R
+++ b/R/matrix.R
@@ -31,6 +31,7 @@
 #' sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm')
 #' head(sleuth_matrix) # look at first 5 transcripts, sorted by name
 #' @export
+#' importFrom utils head
 sleuth_to_matrix <- function(obj, which_df, which_units) {
   if ( !(which_df %in% c("obs_norm", "obs_raw")) ) {
     stop("Invalid object")

From e4341e6d78ed64dc7864db7b711eafb2912d0434 Mon Sep 17 00:00:00 2001
From: "Andrew J. Rech" <rech@rech.io>
Date: Sat, 16 Mar 2019 11:06:53 -0400
Subject: [PATCH 09/11] utils::head

For d05cd9b ed95e7
---
 NAMESPACE | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NAMESPACE b/NAMESPACE
index b76159c..03f118b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -88,3 +88,4 @@ importFrom(lazyeval,interp)
 importFrom(lazyeval,lazy)
 importFrom(rhdf5,h5write)
 importFrom(rhdf5,h5write.default)
+importFrom(utils, head)

From 5ca29dc05bb541ab7ffd3e296feb1bb491dab63d Mon Sep 17 00:00:00 2001
From: "Andrew J. Rech" <rech@rech.io>
Date: Sat, 16 Mar 2019 11:12:12 -0400
Subject: [PATCH 10/11] convert plots.R reshape2 calls to data.table

If `tabd_df` is not a data table, it is converted. Don't need `as.data.table`.
---
 R/plots.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/plots.R b/R/plots.R
index 5077857..5d6d0e1 100644
--- a/R/plots.R
+++ b/R/plots.R
@@ -1035,13 +1035,13 @@ plot_transcript_heatmap <- function(obj,
 
   if (units == 'tpm') {
     tabd_df <- dplyr::select(tabd_df, target_id, sample, tpm)
-    tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, value.var = 'tpm')
+    tabd_df <- data.table::dcast(tabd_df, target_id ~sample, value.var = 'tpm')
   } else if (units == 'est_counts') {
     tabd_df <- dplyr::select(tabd_df, target_id, sample, est_counts)
-    tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, value.var = 'est_counts')
+    tabd_df <- data.table::dcast(tabd_df, target_id ~sample, value.var = 'est_counts')
   } else if (units == 'scaled_reads_per_base') {
     tabd_df <- dplyr::select(tabd_df, target_id, sample, scaled_reads_per_base)
-    tabd_df <- reshape2::dcast(tabd_df, target_id ~sample,
+    tabd_df <- data.table::dcast(tabd_df, target_id ~sample,
                                value.var = 'scaled_reads_per_base')
   } else {
     stop("Didn't recognize the following unit: ", units)

From b076a022a738c2181368b740a237b18de5f238ec Mon Sep 17 00:00:00 2001
From: "Andrew J. Rech" <rech@rech.io>
Date: Sat, 16 Mar 2019 11:17:42 -0400
Subject: [PATCH 11/11] Remove reshapre2 import

---
 DESCRIPTION | 1 -
 1 file changed, 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2a567bc..3a2e896 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -19,7 +19,6 @@ Imports:
     dplyr,
     data.table,
     tidyr,
-    reshape2,
     rhdf5,
     parallel,
     lazyeval,