From 9acf16e38a8dd2fa177c7e7ddae85edaae4b711f Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Wed, 27 Jun 2018 14:23:01 -0500 Subject: [PATCH 01/11] add back in deleted 'gene_from_gene' method to fix #190 --- NAMESPACE | 1 + R/sleuth.R | 46 +++++++++++++++++++++++++++++++++++++++++++ man/gene_from_gene.Rd | 34 ++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 man/gene_from_gene.Rd diff --git a/NAMESPACE b/NAMESPACE index 01d495b..bb5f80d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ export(design_matrix) export(enclosed_brush) export(excluded_ids) export(extract_model) +export(gene_from_gene) export(get_bootstrap_summary) export(get_bootstraps) export(get_quantile) diff --git a/R/sleuth.R b/R/sleuth.R index fea2305..1c1e6bd 100644 --- a/R/sleuth.R +++ b/R/sleuth.R @@ -1150,6 +1150,52 @@ transcripts_from_gene <- function(obj, test, test_type, table$target_id[table[, 2] == gene_name] } +#' Get the gene ID using other gene identifiers +#' +#' Get the \code{target_id} of a gene using other gene identifiers. +#' The identifiers found under the \code{obj$gene_column} are often +#' difficult to remember (e.g. ensembl gene ID, ENSG00000111640). +#' This function allows a user to find that difficult-to-remember +#' identifier using more-easily-remembered identifiers, such as +#' gene symbol (e.g. "GAPDH"). +#' +#' @param obj a \code{sleuth} object +#' @param gene_colname the name of the column containing 'gene_name'. +#' This parameter refers to the name of the column that the gene you are searching for appears in. +#' Check the column names using \code{colnames(obj$target_mapping)}. +#' @param gene_name a string containing the name of the gene you are interested in. +#' @return a character vector containing the \code{target_id} of the gene, found under +#' \code{obj$gene_column} within \code{obj$target_mapping}. +#' If the column name provided is the same as \code{obj$gene_column}, and the +#' gene_name used is found, that gene_name will be returned. +#' @examples +#' \dontrun{gene_from_gene(obj, "gene_symbol", "GAPDH")} +#' @export +gene_from_gene <- function(obj, gene_colname, gene_name) { + + if (!obj$gene_mode) { + stop("this sleuth object is in transcript mode. Please use 'transcripts_from_gene' instead.") + } + + table <- as.data.frame(obj$target_mapping) + if (gene_colname == obj$gene_column) { + if (!(gene_name %in% table[, eval(parse(text = obj$gene_column))])) { + stop("Couldn't find gene ", gene_name) + } else { + return(gene_name) + } + } + + table <- unique(dplyr::select_(table, obj$gene_column, gene_colname)) + if (!(gene_name %in% table[, 2])) { + stop("Couldn't find gene ", gene_name) + } + hits <- unique(table[table[,2] == gene_name, 1]) + if (length(hits) > 1) { + warning("there was more than one gene ID that matched this identifier; taking the first one") + } + hits[1] + } #' Change sleuth transform counts function #' diff --git a/man/gene_from_gene.Rd b/man/gene_from_gene.Rd new file mode 100644 index 0000000..0f7077e --- /dev/null +++ b/man/gene_from_gene.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sleuth.R +\name{gene_from_gene} +\alias{gene_from_gene} +\title{Get the gene ID using other gene identifiers} +\usage{ +gene_from_gene(obj, gene_colname, gene_name) +} +\arguments{ +\item{obj}{a \code{sleuth} object} + +\item{gene_colname}{the name of the column containing 'gene_name'. +This parameter refers to the name of the column that the gene you are searching for appears in. +Check the column names using \code{colnames(obj$target_mapping)}.} + +\item{gene_name}{a string containing the name of the gene you are interested in.} +} +\value{ +a character vector containing the \code{target_id} of the gene, found under + \code{obj$gene_column} within \code{obj$target_mapping}. + If the column name provided is the same as \code{obj$gene_column}, and the + gene_name used is found, that gene_name will be returned. +} +\description{ +Get the \code{target_id} of a gene using other gene identifiers. +The identifiers found under the \code{obj$gene_column} are often +difficult to remember (e.g. ensembl gene ID, ENSG00000111640). +This function allows a user to find that difficult-to-remember +identifier using more-easily-remembered identifiers, such as +gene symbol (e.g. "GAPDH"). +} +\examples{ + \dontrun{gene_from_gene(obj, "gene_symbol", "GAPDH")} +} From 50b35d0629c7f46fa8303b08b3ce30efce462b31 Mon Sep 17 00:00:00 2001 From: warrenmcg Date: Wed, 27 Jun 2018 14:23:27 -0500 Subject: [PATCH 02/11] fix outdated documentation for 'sleuth_to_matrix' --- R/matrix.R | 5 +++-- man/sleuth_to_matrix.Rd | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/R/matrix.R b/R/matrix.R index 4ffecf3..b0fff75 100644 --- a/R/matrix.R +++ b/R/matrix.R @@ -24,8 +24,9 @@ #' @param which_df character vector of length one. Which type of data to use #' ("obs_norm" or "obs_raw") #' @param which_units character vector of length one. Which units to use ("tpm" -#' or "est_counts") -#' @return a matrix which contains a matrix of target_ids and transcript expression in \code{which_units} +#' or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses)) +#' @return a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}. +#' Note this currently does not support returning raw values for gene-level counts or TPMs. #' @examples #' sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm') #' head(sleuth_matrix) # look at first 5 transcripts, sorted by name diff --git a/man/sleuth_to_matrix.Rd b/man/sleuth_to_matrix.Rd index bc4bc30..c5e0f6d 100644 --- a/man/sleuth_to_matrix.Rd +++ b/man/sleuth_to_matrix.Rd @@ -13,16 +13,16 @@ sleuth_to_matrix(obj, which_df, which_units) ("obs_norm" or "obs_raw")} \item{which_units}{character vector of length one. Which units to use ("tpm" -or "est_counts")} +or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses))} } \value{ -a \code{list} with an attribute 'data', which contains a matrix of target_ids - and transcript expression in \code{which_units} +a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}. + Note this currently does not support returning raw values for gene-level counts or TPMs. } \description{ Convert a sleuth object to a matrix with the condition names. } \examples{ sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm') -head(sleuth_matrix$data) # look at first 5 transcripts, sorted by name +head(sleuth_matrix) # look at first 5 transcripts, sorted by name } From f94976bf1f1c7aaae3268c1372e12bd30541e784 Mon Sep 17 00:00:00 2001 From: "Andrew J. Rech" Date: Mon, 4 Feb 2019 18:22:55 -0500 Subject: [PATCH 03/11] Improve obs_to_matrix with data.table Replace reshape2::dcast with data.table::dcadt for speed gain and to avoid issue with long sample names causing vector return error in reshape2 but not data.table: Using reshape2: ``` obs_counts <- dcast(obj$obs_norm, target_id ~ sample, value.var = value_name) Aggregation function missing: defaulting to length Error during wrapup: dims [product 868328] do not match the length of object [41674225] ``` But no error using data.table. [`obj_norm`](https://s3.amazonaws.com/rech-ul/obj_norm.RDS?AWSAccessKeyId=AKIAI6SXE4VOIPIZJI6Q&Expires=1601162547&Signature=3lPIt6yZ2UPFhMioUwwRopzo8eM%3D) to reproduce: --- R/sleuth.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/sleuth.R b/R/sleuth.R index 1c1e6bd..08523fe 100644 --- a/R/sleuth.R +++ b/R/sleuth.R @@ -1007,7 +1007,8 @@ kallisto_table <- function(obj, # @return a matrix with the appropriate names obs_to_matrix <- function(obj, value_name) { - obs_counts <- reshape2::dcast(obj$obs_norm, target_id ~ sample, + data.table::as.data.table(obj$obs_norm) <- data.table::as.data.table(obj$obs_norm) + obs_counts <- data.table::dcast(obj$obs_norm, target_id ~ sample, value.var = value_name) obs_counts <- as.data.frame(obs_counts) From 6a1e85d1b364e57f35b10bc0a60d29cb933a61d1 Mon Sep 17 00:00:00 2001 From: "Andrew J. Rech" Date: Mon, 4 Feb 2019 18:54:34 -0500 Subject: [PATCH 04/11] typo --- R/sleuth.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/sleuth.R b/R/sleuth.R index 08523fe..4ec6a8b 100644 --- a/R/sleuth.R +++ b/R/sleuth.R @@ -1007,7 +1007,7 @@ kallisto_table <- function(obj, # @return a matrix with the appropriate names obs_to_matrix <- function(obj, value_name) { - data.table::as.data.table(obj$obs_norm) <- data.table::as.data.table(obj$obs_norm) + obj$obs_norm <- data.table::as.data.table(obj$obs_norm) obs_counts <- data.table::dcast(obj$obs_norm, target_id ~ sample, value.var = value_name) From ea5a3904d92efa15d56002bbc5e28b7bf57be70b Mon Sep 17 00:00:00 2001 From: andrewrech Date: Mon, 4 Feb 2019 19:12:49 -0500 Subject: [PATCH 05/11] data.table::dcast used on data frame --- R/bootstrap.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/bootstrap.R b/R/bootstrap.R index 6e1d11f..67edaac 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -485,6 +485,7 @@ process_bootstrap <- function(i, samp_name, kal_path, mappings) # this step undoes the tidying to get back a matrix format # target_ids here are now the aggregation column ids + scaled_bs <- data.table::as.data.table(scaled_bs) bs_mat <- data.table::dcast(scaled_bs, sample ~ target_id, value.var = "scaled_reads_per_base") # this now has the same format as the transcript matrix From 395bcd8c2fa205ba92195fc0d3ba7568c6d8d60a Mon Sep 17 00:00:00 2001 From: andrewrech Date: Sat, 23 Feb 2019 19:57:04 -0500 Subject: [PATCH 06/11] Bandaid for infernal issue https://github.com/pachterlab/sleuth/issues/135 --- NAMESPACE | 1 + R/fix_head.R | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 R/fix_head.R diff --git a/NAMESPACE b/NAMESPACE index bb5f80d..b76159c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -41,6 +41,7 @@ export(log_transform) export(melt_bootstrap_sleuth) export(models) export(norm_factors) +export(head) export(plot_bootstrap) export(plot_fld) export(plot_group_density) diff --git a/R/fix_head.R b/R/fix_head.R new file mode 100644 index 0000000..65fdc0d --- /dev/null +++ b/R/fix_head.R @@ -0,0 +1,6 @@ + +## ---- head +#' Fix unexported head error. +#' +#' @export head +head <- utils::head From d05cd9b9c48b85a5a5b132681982fb10005b1a08 Mon Sep 17 00:00:00 2001 From: "Andrew J. Rech" Date: Sat, 16 Mar 2019 11:03:42 -0400 Subject: [PATCH 07/11] importFrom utils head --- R/sleuth.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/sleuth.R b/R/sleuth.R index 4ec6a8b..d387818 100644 --- a/R/sleuth.R +++ b/R/sleuth.R @@ -1092,6 +1092,7 @@ summary.sleuth <- function(obj, covariates = TRUE) { #' head(sleuth_genes) # show info for first 5 genes #' sleuth_genes[1:5, 6] # show transcripts for first 5 genes #' @export +#' @importFrom utils head sleuth_gene_table <- function(obj, test, test_type = 'lrt', which_model = 'full', which_group = 'ens_gene') { if (is.null(obj$target_mapping)) { From ed95e72ccdec960c0a5c2746e677c53ee8c95996 Mon Sep 17 00:00:00 2001 From: "Andrew J. Rech" Date: Sat, 16 Mar 2019 11:04:51 -0400 Subject: [PATCH 08/11] importFrom utils head --- R/matrix.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/matrix.R b/R/matrix.R index b0fff75..ee17e41 100644 --- a/R/matrix.R +++ b/R/matrix.R @@ -31,6 +31,7 @@ #' sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm') #' head(sleuth_matrix) # look at first 5 transcripts, sorted by name #' @export +#' importFrom utils head sleuth_to_matrix <- function(obj, which_df, which_units) { if ( !(which_df %in% c("obs_norm", "obs_raw")) ) { stop("Invalid object") From e4341e6d78ed64dc7864db7b711eafb2912d0434 Mon Sep 17 00:00:00 2001 From: "Andrew J. Rech" Date: Sat, 16 Mar 2019 11:06:53 -0400 Subject: [PATCH 09/11] utils::head For d05cd9b ed95e7 --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index b76159c..03f118b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -88,3 +88,4 @@ importFrom(lazyeval,interp) importFrom(lazyeval,lazy) importFrom(rhdf5,h5write) importFrom(rhdf5,h5write.default) +importFrom(utils, head) From 5ca29dc05bb541ab7ffd3e296feb1bb491dab63d Mon Sep 17 00:00:00 2001 From: "Andrew J. Rech" Date: Sat, 16 Mar 2019 11:12:12 -0400 Subject: [PATCH 10/11] convert plots.R reshape2 calls to data.table If `tabd_df` is not a data table, it is converted. Don't need `as.data.table`. --- R/plots.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/plots.R b/R/plots.R index 5077857..5d6d0e1 100644 --- a/R/plots.R +++ b/R/plots.R @@ -1035,13 +1035,13 @@ plot_transcript_heatmap <- function(obj, if (units == 'tpm') { tabd_df <- dplyr::select(tabd_df, target_id, sample, tpm) - tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, value.var = 'tpm') + tabd_df <- data.table::dcast(tabd_df, target_id ~sample, value.var = 'tpm') } else if (units == 'est_counts') { tabd_df <- dplyr::select(tabd_df, target_id, sample, est_counts) - tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, value.var = 'est_counts') + tabd_df <- data.table::dcast(tabd_df, target_id ~sample, value.var = 'est_counts') } else if (units == 'scaled_reads_per_base') { tabd_df <- dplyr::select(tabd_df, target_id, sample, scaled_reads_per_base) - tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, + tabd_df <- data.table::dcast(tabd_df, target_id ~sample, value.var = 'scaled_reads_per_base') } else { stop("Didn't recognize the following unit: ", units) From b076a022a738c2181368b740a237b18de5f238ec Mon Sep 17 00:00:00 2001 From: "Andrew J. Rech" Date: Sat, 16 Mar 2019 11:17:42 -0400 Subject: [PATCH 11/11] Remove reshapre2 import --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2a567bc..3a2e896 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,6 @@ Imports: dplyr, data.table, tidyr, - reshape2, rhdf5, parallel, lazyeval,