Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ Imports:
scales,
SummarizedExperiment,
GenomicRanges,
methods
methods,
S4Vectors
Suggests:
BiocStyle,
testthat,
Expand All @@ -53,7 +54,6 @@ Suggests:
Seurat,
KernSmooth,
Rtsne,
S4Vectors,
ggplot2,
widyr,
clusterProfiler,
Expand Down Expand Up @@ -82,7 +82,7 @@ Biarch: true
biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.2
RoxygenNote: 7.2.0
LazyDataCompression: xz
URL: https://github.com/stemangiola/tidybulk
BugReports: https://github.com/stemangiola/tidybulk/issues
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ import(readr)
import(tibble)
import(tidyr)
importFrom(GenomicRanges,makeGRangesListFromDataFrame)
importFrom(S4Vectors,metadata)
importFrom(SummarizedExperiment,SummarizedExperiment)
importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
Expand Down
8 changes: 4 additions & 4 deletions R/dplyr_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ rowwise.tidybulk <- function(data, ...)
#'
#' @examples
#'`%>%` = magrittr::`%>%`
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
#' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% left_join(annotation)
#'
#' @rdname dplyr-methods
Expand Down Expand Up @@ -763,7 +763,7 @@ left_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x",
#'
#' @examples
#'`%>%` = magrittr::`%>%`
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
#' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% inner_join(annotation)
#'
#' @rdname join-methods
Expand Down Expand Up @@ -802,7 +802,7 @@ inner_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x",
#'
#' @examples
#'`%>%` = magrittr::`%>%`
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
#' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% right_join(annotation)
#'
#' @rdname join-methods
Expand Down Expand Up @@ -843,7 +843,7 @@ right_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x",
#'
#' @examples
#'`%>%` = magrittr::`%>%`
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
#' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% full_join(annotation)
#'
#' @rdname join-methods
Expand Down
63 changes: 22 additions & 41 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ setOldClass("tidybulk")
#'
#' @examples
#'
#' my_tt = tidybulk(tidybulk::se_mini)
#' tidybulk(tidybulk::se_mini)
#'
#'
#' @docType methods
Expand Down Expand Up @@ -1353,9 +1353,7 @@ setMethod("remove_redundancy", "tidybulk", .remove_redundancy)
#' cm$batch = 0
#' cm$batch[colnames(cm) %in% c("SRR1740035", "SRR1740043")] = 1
#'
#' res =
#' cm %>%
#' tidybulk(sample, transcript, count) |>
#' identify_abundant() |>
#' adjust_abundance( ~ condition + batch )
#'
Expand Down Expand Up @@ -1675,7 +1673,7 @@ setMethod("aggregate_duplicates", "tidybulk", .aggregate_duplicates)
#' library(dplyr)
#'
#' # Subsetting for time efficiency
#' tidybulk::se_mini |> tidybulk() |>filter(sample=="SRR1740034") |> deconvolve_cellularity(sample, feature, count, cores = 1)
#' tidybulk::se_mini |> deconvolve_cellularity(cores = 1)
#'
#'
#' @docType methods
Expand Down Expand Up @@ -1815,7 +1813,10 @@ setMethod("deconvolve_cellularity",
#'
#' @examples
#'
#' tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = feature, .sample = sample)
#' # This function was designed for data.frame
#' # Convert from SummarizedExperiment for this example. It is NOT reccomended.
#'
#' tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = .feature, .sample = .sample)
#'
#' @export
#'
Expand Down Expand Up @@ -2014,7 +2015,10 @@ setMethod("describe_transcript", "tidybulk", .describe_transcript)
#'
#' library(dplyr)
#'
#' tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(feature)
#' # This function was designed for data.frame
#' # Convert from SummarizedExperiment for this example. It is NOT reccomended.
#'
#' tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(.feature)
#'
#'
#'
Expand Down Expand Up @@ -2882,8 +2886,10 @@ setMethod("keep_abundant", "tidybulk", .keep_abundant)
#' @examples
#' \dontrun{
#'
#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample)
#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count)
#' library(SummarizedExperiment)
#' se = tidybulk::se_mini
#' rowData( se)$entrez = rownames(se )
#' df_entrez = aggregate_duplicates(se,.transcript = entrez )
#'
#' library("EGSEA")
#'
Expand Down Expand Up @@ -3075,9 +3081,8 @@ setMethod("test_gene_enrichment",
#'
#' @examples
#'
#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample)
#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count)
#' df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
#' #se_mini = aggregate_duplicates(tidybulk::se_mini, .transcript = entrez)
#' #df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
#'
#' \dontrun{
#' test_gene_overrepresentation(
Expand Down Expand Up @@ -3245,15 +3250,14 @@ setMethod("test_gene_overrepresentation",
#'
#' \dontrun{
#'
#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample)
#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count)
#' df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
#' df_entrez = tidybulk::se_mini
#' df_entrez = mutate(df_entrez, do_test = .feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
#' df_entrez = df_entrez %>% test_differential_abundance(~ condition)
#'
#'
#' test_gene_rank(
#' df_entrez,
#' .sample = sample,
#' .sample = .sample,
#' .entrez = entrez,
#' species="Homo sapiens",
#' gene_sets =c("C2"),
Expand Down Expand Up @@ -3591,7 +3595,7 @@ setMethod("pivot_transcript",
#'
#' @examples
#'
#' tidybulk::se_mini |> tidybulk() |> fill_missing_abundance( fill_with = 0)
#' # tidybulk::se_mini |> fill_missing_abundance( fill_with = 0)
#'
#'
#' @docType methods
Expand Down Expand Up @@ -3862,19 +3866,8 @@ setMethod("impute_missing_abundance", "tidybulk", .impute_missing_abundance)
#' )
#'
#' # Cox regression - multiple
#' library(dplyr)
#' library(tidyr)
#'
#' tidybulk::se_mini |>
#' tidybulk() |>
#'
#' # Add survival data
#' nest(data = -sample) |>
#' mutate(
#' days = c(1, 10, 500, 1000, 2000),
#' dead = c(1, 1, 1, 0, 1)
#' ) %>%
#' unnest(data) |>
#'
#' # Test
#' test_differential_cellularity(
Expand Down Expand Up @@ -4019,15 +4012,6 @@ setMethod("test_differential_cellularity",
#' library(tidyr)
#'
#' tidybulk::se_mini |>
#' tidybulk() |>
#'
#' # Add survival data
#' nest(data = -sample) |>
#' mutate(
#' days = c(1, 10, 500, 1000, 2000),
#' dead = c(1, 1, 1, 0, 1)
#' ) %>%
#' unnest(data) |>
#' test_stratification_cellularity(
#' survival::Surv(days, dead) ~ .,
#' cores = 1
Expand Down Expand Up @@ -4138,10 +4122,8 @@ setMethod("test_stratification_cellularity",
#'
#' @examples
#'
#' # Define tidybulk tibble
#' df = tidybulk(tidybulk::se_mini)
#'
#' get_bibliography(df)
#' get_bibliography(tidybulk::se_mini)
#'
#'
#'
Expand Down Expand Up @@ -4236,9 +4218,8 @@ setMethod("get_bibliography",
#'
#' @examples
#'
#' library(dplyr)
#'
#' tidybulk::se_mini |> tidybulk() |> select(feature, count) |> head() |> as_matrix(rownames=feature)
#' tibble(.feature = "CD3G", count=1) |> as_matrix(rownames=.feature)
#'
#' @export
as_matrix <- function(tbl,
Expand Down
68 changes: 19 additions & 49 deletions R/methods_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,41 +24,12 @@
~ as.symbol(.x),
~ NULL)

sample_info <-
colData(.data) %>%
.as_tibble_optimised(.data) %>%

# If reserved column names are present add .x
change_reserved_column_names() %>%

# Convert to tibble
tibble::as_tibble(rownames="sample")


range_info <-
get_special_datasets(.data) %>%
reduce(left_join, by="coordinate")

gene_info <-
rowData(.data) %>%

# If reserved column names are present add .x
change_reserved_column_names() %>%

# Convert to tibble
tibble::as_tibble(rownames="feature")

count_info <- get_count_datasets(.data)

# Return
count_info %>%
left_join(sample_info, by="sample") %>%
left_join(gene_info, by="feature") %>%
when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info) %>% suppressMessages(), ~ (.)) %>%

mutate_if(is.character, as.factor) %>%
# mutate_if(is.character, as.factor) %>%
tidybulk(
sample,
feature,
!!as.symbol(sample__$name),
!!as.symbol(feature__$name),
!!as.symbol(SummarizedExperiment::assays(.data)[1] %>% names ),
!!norm_col # scaled counts if any
)
Expand Down Expand Up @@ -787,23 +758,22 @@ setMethod("adjust_abundance",

collapse_function = function(x){ x %>% unique() %>% paste(collapse = "___") }

feature_column_name = ".feature"

# Row data
new_row_data =
.data %>%
rowData() %>%
as_tibble(rownames = feature_column_name) %>%
as_tibble(rownames = feature__$name) %>%
group_by(!!as.symbol(quo_name(.transcript))) %>%
summarise(
across(everything(), ~ .x %>% collapse_function()),
merged.transcripts = n()
) %>%
arrange(!!as.symbol(feature_column_name)) %>%
arrange(!!as.symbol(feature__$name)) %>%
as.data.frame()

rownames(new_row_data) = new_row_data[,feature_column_name]
new_row_data = new_row_data %>% select(-feature_column_name)
rownames(new_row_data) = new_row_data[,feature__$name]
new_row_data = new_row_data %>% select(-feature__$name)

# Counts
new_count_data =
Expand All @@ -824,7 +794,7 @@ setMethod("adjust_abundance",
)

# GRanges
columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature_column_name)
columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature__$name)

rr = rowRanges(.data)

Expand All @@ -834,27 +804,27 @@ setMethod("adjust_abundance",
as_tibble() %>%
# Add names
when(
is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature_column_name) := group_name),
~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAME)
is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature__$name) := group_name),
~ mutate(., !!as.symbol(feature__$name) := rr@ranges@NAME)
) %>%
left_join(
rowData(.data) %>%
as.data.frame() %>%
select(!!as.symbol(quo_name(.transcript))) %>%
as_tibble(rownames =feature_column_name),
by = feature_column_name
as_tibble(rownames =feature__$name),
by = feature__$name
) %>%
group_by(!!as.symbol(quo_name(.transcript))) %>%
mutate(
across(columns_to_collapse, ~ .x %>% collapse_function()),
merged.transcripts = n()
) %>%
arrange(!!as.symbol(feature_column_name)) %>%
arrange(!!as.symbol(feature__$name)) %>%

select(-one_of("group_name", "group")) %>%
suppressWarnings() %>%

makeGRangesListFromDataFrame( split.field = feature_column_name,
makeGRangesListFromDataFrame( split.field = feature__$name,
keep.extra.columns = TRUE) %>%

.[match(rownames(new_count_data[[1]]), names(.))]
Expand Down Expand Up @@ -1894,7 +1864,7 @@ setMethod("test_gene_rank",
) %>%

# Convert to tibble
tibble::as_tibble(rownames="sample")
tibble::as_tibble(rownames=sample__$name)



Expand Down Expand Up @@ -1934,7 +1904,7 @@ setMethod("pivot_sample",

range_info <-
get_special_datasets(.data) %>%
reduce(left_join, by="feature")
reduce(left_join, by=feature__$name)

gene_info <-
rowData(.data) %>%
Expand All @@ -1946,11 +1916,11 @@ setMethod("pivot_sample",
) %>%

# Convert to tibble
tibble::as_tibble(rownames="feature")
tibble::as_tibble(rownames=feature__$name)

gene_info %>%
when(
nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by="feature"),
nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by=feature__$name),
~ (.)
)
}
Expand Down
Loading