Ran github actions

smped · Jan 22, 2024 · 317999b · 317999b
1 parent d1bceca
commit 317999b
Show file tree

Hide file tree

Showing 17 changed files with 208 additions and 55 deletions.
diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
@@ -52,8 +52,8 @@ jobs:
       matrix:
         config:
           - { os: ubuntu-latest, r: '4.3', bioc: '3.18', cont: "bioconductor/bioconductor_docker:RELEASE_3_18", rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest" }
-          - { os: macOS-latest, r: '4.3', bioc: '3.18', cont: "bioconductor/bioconductor_docker:RELEASE_3_18", rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest" }
-          - { os: windows-latest, r: '4.3', bioc: '3.18', cont: "bioconductor/bioconductor_docker:RELEASE_3_18", rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest" }
+          - { os: macOS-latest, r: '4.3', bioc: '3.18'}
+          - { os: windows-latest, r: '4.3', bioc: '3.18'}
 ## Dockers for macOS and Windows can't handle r: 'next'
 #          - { os: ubuntu-latest, r: 'next', bioc: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest" }
 #          - { os: macOS-latest, r: 'next', bioc: 'devel'}
@@ -65,6 +65,7 @@ jobs:
       TZ: UTC
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 
     steps:
 
@@ -254,7 +255,7 @@ jobs:
           git config --global user.email "actions@github.com"
           git config --global user.name "GitHub Actions"
           git config --global --add safe.directory /__w/transmogR/transmogR
-          Rscript -e "pkgdown::deploy_to_branch(new_process = FALSE)"
+          Rscript -e "pkgdown::deploy_to_branch(clean = TRUE)"
         shell: bash {0}
         ## Note that you need to run pkgdown::deploy_to_branch(new_process = FALSE)
         ## at least one locally before this will work. This creates the gh-pages

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: transmogR
 Type: Package
 Title: Modify a set of reference sequences using a set of variants
-Version: 0.1.7
+Version: 0.1.8
 Authors@R: person("Stevie", "Pederson", 
     email = "stephen.pederson.au@gmail.com", 
     role = c("aut", "cre"),
@@ -48,5 +48,5 @@ biocViews: Alignment, GenomicVariation, Sequencing, TranscriptomeVariant
 BiocType: Software
 VignetteBuilder: knitr
 Roxygen: list(markdown = TRUE)  
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.0
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -24,14 +24,15 @@ importFrom(Biostrings,getSeq)
 importFrom(ComplexUpset,intersection_size)
 importFrom(ComplexUpset,upset)
 importFrom(ComplexUpset,upset_set_size)
-importFrom(GenomeInfoDb,'seqinfo<-')
-importFrom(GenomeInfoDb,'seqlevels<-')
+importFrom(GenomeInfoDb,"genome<-")
+importFrom(GenomeInfoDb,"seqinfo<-")
+importFrom(GenomeInfoDb,"seqlevels<-")
 importFrom(GenomeInfoDb,seqinfo)
 importFrom(GenomeInfoDb,seqlevels)
 importFrom(GenomeInfoDb,seqlevelsInUse)
 importFrom(GenomeInfoDb,seqnames)
 importFrom(GenomicFeatures,extractTranscriptSeqs)
-importFrom(IRanges,'width<-')
+importFrom(IRanges,"width<-")
 importFrom(IRanges,Views)
 importFrom(IRanges,end)
 importFrom(IRanges,findOverlaps)
@@ -40,14 +41,14 @@ importFrom(IRanges,start)
 importFrom(IRanges,subsetByOverlaps)
 importFrom(IRanges,width)
 importFrom(InteractionSet,GInteractions)
-importFrom(S4Vectors,'mcols<-')
+importFrom(S4Vectors,"mcols<-")
 importFrom(S4Vectors,DataFrame)
 importFrom(S4Vectors,mcols)
 importFrom(S4Vectors,queryHits)
 importFrom(S4Vectors,splitAsList)
 importFrom(S4Vectors,subjectHits)
 importFrom(SummarizedExperiment,rowRanges)
-importFrom(VariantAnnotation,'vcfWhich<-')
+importFrom(VariantAnnotation,"vcfWhich<-")
 importFrom(VariantAnnotation,ScanVcfParam)
 importFrom(VariantAnnotation,readVcf)
 importFrom(ggplot2,aes)
@@ -63,7 +64,7 @@ importFrom(ggplot2,ylab)
 importFrom(methods,as)
 importFrom(methods,is)
 importFrom(parallel,mclapply)
-importFrom(rlang,':=')
+importFrom(rlang,":=")
 importFrom(rlang,list2)
 importFrom(stats,aggregate)
 importFrom(stats,as.formula)
diff --git a/NEWS.md b/NEWS.md
@@ -36,3 +36,7 @@
 
 * Renamed mogrify*() to `transmogrify()` and `genomogrify()`
 * Added masking to `genomogrify()`
+
+# transmogR 0.1.8
+
+* Switched `parY()` to an S4 methods
diff --git a/R/genomogrify.R b/R/genomogrify.R
@@ -1,7 +1,19 @@
 #' @title Mogrify a genome using a set of variants
+#'
 #' @description
 #' Use a set of SNPS, insertions and deletions to modify a reference genome
 #'
+#' @details
+#' This function is designed to create a variant-modified reference genome,
+#' intended to be included as a set of decoys when using salmon in selective
+#' alignment mode.
+#' Sequence lengths will change if InDels are included and any coordinate-based
+#' information will be lost on the output of this function.
+#'
+#' Tags are able to be added to any modified sequence to assist identifying any
+#' changes that have been made to a sequence.
+#'
+#'
 #' @param x A DNAStringSet or BSgenome
 #' @param var GRanges object containing the variants, or a
 #' [VariantAnnotation::VcfFile]

diff --git a/R/helpers.R b/R/helpers.R
@@ -1,6 +1,6 @@
 #' @keywords internal
 #' @importFrom Biostrings IUPAC_CODE_MAP
-#' @importFrom S4Vectors mcols 'mcols<-'
+#' @importFrom S4Vectors mcols mcols<-
 .checkAlts <- function(var, alt_col) {
 
     alt_col <- match.arg(alt_col, colnames(mcols(var)))
@@ -25,8 +25,8 @@
 #' @keywords internal
 #' @import GenomicRanges
 #' @importClassesFrom VariantAnnotation ScanVcfParam
-#' @importFrom VariantAnnotation readVcf ScanVcfParam 'vcfWhich<-'
-#' @importFrom S4Vectors mcols 'mcols<-'
+#' @importFrom VariantAnnotation readVcf ScanVcfParam vcfWhich<-
+#' @importFrom S4Vectors mcols mcols<-
 #' @importFrom SummarizedExperiment rowRanges
 #' @importFrom GenomeInfoDb seqinfo
 #' @importFrom methods is
@@ -62,7 +62,7 @@
     args
 }
 
-#' @importFrom S4Vectors mcols 'mcols<-'
+#' @importFrom S4Vectors mcols mcols<-
 #' @importFrom InteractionSet GInteractions
 #' @keywords internal
 .giFromSj <- function(sj, tx_col, rank_col) {

diff --git a/R/indelcator.R b/R/indelcator.R
@@ -4,6 +4,9 @@
 #' Modify one or more sequences to include Insertions or Deletions
 #'
 #' @details
+#' This is a lower-level function relied on by both [transmogrify()] and
+#' [genomogrify()].
+#'
 #' Takes an [Biostrings::XString] or [Biostrings::XStringSet] object and
 #' modifies the sequence to incorporate InDels.
 #' The expected types of data determine the behaviour, with the following
@@ -27,7 +30,7 @@
 #' @param names passed to [BSgenome::getSeq] when x is a BSgenome object
 #' @param ... Passed to [parallel::mclapply]
 #'
-#' @seealso [transmogrify()]
+#' @seealso [transmogrify()] [genomogrify()]
 #'
 #' @examples
 #' ## Start with a DNAStringSet
@@ -58,7 +61,7 @@ setGeneric(
 #' @import GenomicRanges
 #' @importFrom methods is as
 #' @importFrom IRanges subsetByOverlaps findOverlaps
-#' @importFrom S4Vectors mcols 'mcols<-' queryHits subjectHits DataFrame
+#' @importFrom S4Vectors mcols mcols<- queryHits subjectHits DataFrame
 #' @importFrom stats aggregate
 #' @rdname indelcator-methods
 #' @aliases indelcator
@@ -132,8 +135,8 @@ setMethod(
 #' @import Biostrings
 #' @import GenomicRanges
 #' @importFrom GenomeInfoDb seqnames seqinfo seqlevelsInUse
-#' @importFrom S4Vectors splitAsList mcols 'mcols<-'
-#' @importFrom IRanges width Views start end 'width<-'
+#' @importFrom S4Vectors splitAsList mcols mcols<-
+#' @importFrom IRanges width Views start end width<-
 #' @importFrom parallel mclapply
 #' @rdname indelcator-methods
 #' @aliases indelcator

diff --git a/R/owl.R b/R/owl.R
@@ -12,8 +12,11 @@
 #' @param ... Passed to [Biostrings::replaceLetterAt()]
 #'
 #' @details
-#' If providing a BSgenome object, this will first be coerced to a DNAStringSet
-#' which can be time consuming
+#' This is a lower-level function called by [transmogrify()] and
+#' [genomogrify()], but able to be called by the user if needed
+#'
+#' Note that when providing a BSgenome object, this will first be coerced to a
+#' DNAStringSet which can be time consuming.
 #'
 #' @return An object of the same class as the original object, but with SNPs
 #' inserted at the supplied positions
@@ -29,7 +32,7 @@
 #' @import Biostrings
 #' @importFrom S4Vectors mcols
 #' @importFrom methods is
-#' @importFrom GenomeInfoDb seqinfo 'seqinfo<-' seqlevels seqnames 'seqlevels<-'
+#' @importFrom GenomeInfoDb seqinfo seqinfo<- seqlevels seqnames seqlevels<-
 #' @importFrom BSgenome getSeq
 #'
 #' @export
@@ -41,7 +44,7 @@ setGeneric(
 #' @import Biostrings
 #' @importFrom S4Vectors mcols
 #' @importFrom methods is
-#' @importFrom GenomeInfoDb seqinfo 'seqinfo<-' seqlevels seqnames 'seqlevels<-'
+#' @importFrom GenomeInfoDb seqinfo seqinfo<- seqlevels seqnames seqlevels<-
 #' @rdname owl-methods
 #' @aliases owl
 #' @export

diff --git a/R/parY.R b/R/parY.R
@@ -9,11 +9,15 @@
 #' variations, create a GRanges object with the Pseudo-Autosomal Regions from
 #' the Y chromosome for that build.
 #' The length of the Y chromosome on the seqinfo object is used to determine
-#' the correct genome build
+#' the correct genome build when passing a Seqinfo object.
+#' Otherwise
 #'
 #' An additional mcols column called PAR will indicate PAR1 and PAR2
 #'
-#' @param x A Seqinfo object
+#' @param x A Seqinfo object or any of named build. If passing
+#' a character vector, [match.arg()] will be used to match the build.
+#' @param prefix Optional prefix to place before chromosome names. Can only be
+#' NULL, "" or "chr"
 #' @param ... Not used
 #'
 #' @examples
@@ -29,30 +33,74 @@
 #' )
 #' parY(sq)
 #'
+#' ## Or just call by name
+#' parY("GRCh38", prefix = "chr")
 #'
+#'
+#' @export
+#' @name parY
+#' @rdname parY-methods
+setGeneric(
+    "parY", function(x, ...){standardGeneric("parY")}
+)
 #' @import GenomicRanges
 #' @importFrom GenomeInfoDb seqnames
-#' @export
-parY <- function(x, ...){
-
-    ## Retain the seqnames but estimate the genome build.
-    stopifnot(is(x, "Seqinfo"))
-    hasChr <- any(grepl("^chrY",seqnames(x)))
-    y <- "Y"
-    if (hasChr) y <- "chrY"
-    sq <- as.data.frame(x)
-    len <- sq[y, "seqlengths"]
-    par_df <- data.frame(
-        build = c("hg19", "hg38", "chm13v2.0"),
-        length = c(59373566, 57227415, 62460029),
-        par1 = c("10001-2781479", "10001-2781479", "1-2458320"),
-        par2 = c("59034050-59363566", "56887903-57217415", "62122809-62460029")
-    )
-    par_df <- subset(par_df, length == len)
-    if (nrow(par_df) == 0) stop("Invalid length for ", y)
-    rng <- paste0(y, ":", unlist(par_df[c("par1", "par2")]))
-    gr <- GRanges(rng, seqinfo = x)
-    gr$PAR <- paste0("PAR", seq_len(2))
-    gr
-
-}
+#' @rdname parY-methods
+#' @aliases parY-methods
+setMethod(
+    "parY", signature = signature(x = "Seqinfo"),
+    function(x, ...){
+
+        ## Retain the seqnames but estimate the genome build.
+        stopifnot(is(x, "Seqinfo"))
+        hasChr <- any(grepl("^chrY",seqnames(x)))
+        y <- "Y"
+        if (hasChr) y <- "chrY"
+        sq <- as.data.frame(x)
+        len <- sq[y, "seqlengths"]
+        par_df <- data.frame(
+            build = c("hg19", "hg38", "chm13v2.0"),
+            length = c(59373566, 57227415, 62460029),
+            par1 = c("10001-2649520", "10001-2781479", "1-2458320"),
+            par2 = c("59034050-59363566", "56887903-57217415", "62122809-62460029")
+        )
+        par_df <- subset(par_df, length == len)
+        if (nrow(par_df) == 0) stop("Invalid length for ", y)
+        rng <- paste0(y, ":", unlist(par_df[c("par1", "par2")]))
+        gr <- GRanges(rng, seqinfo = x)
+        gr$PAR <- paste0("PAR", seq_len(2))
+        gr
+
+    }
+)
+#' @import GenomicRanges
+#' @importFrom GenomeInfoDb genome<-
+#' @rdname parY-methods
+#' @aliases parY-methods
+setMethod(
+    "parY", signature = signature(x = "character"),
+    function(x, prefix = NULL, ...) {
+
+        poss_builds <- c(
+            "hg19", "hg38", "chm13v2.0", "GRCh37", "GRCh38", "CHM13"
+        )
+        x <- match.arg(x, poss_builds)
+        if (!is.null(prefix)) stopifnot(prefix %in% c("chr", ""))
+
+        ## Now match the options
+        if (x %in% c("hg19", "GRCh37"))
+            gr <- paste0(prefix, c("Y:10001-2649520", "Y:59034050-59363566"))
+
+        if (x %in% c("hg38", "GRCh38"))
+            gr <- paste0(prefix, c("Y:10001-2781479", "Y:56887903-57217415"))
+
+        if (x %in% c("chr13v2.0", "CHM13"))
+            gr <- paste0(prefix, c("Y:1-2458320", "Y:62122809-62460029"))
+
+        gr <- GRanges(gr)
+        gr$PAR <- paste0("PAR", seq_len(2))
+        genome(gr) <- x
+        gr
+
+    }
+)
diff --git a/R/sjFromExons.R b/R/sjFromExons.R
@@ -56,7 +56,7 @@
 #' sjFromExons(gtf, as = "GInteractions")
 #'
 #'
-#' @importFrom S4Vectors mcols 'mcols<-'
+#' @importFrom S4Vectors mcols mcols<-
 #' @importFrom methods is
 #' @importFrom stats aggregate
 #' @export

diff --git a/R/upsetVarByCol.R b/R/upsetVarByCol.R
@@ -46,7 +46,7 @@
 #' @importFrom ComplexUpset intersection_size upset_set_size upset
 #' @importFrom ggplot2 aes geom_bar geom_text after_stat ggtitle position_stack
 #' @importFrom ggplot2 scale_y_reverse scale_y_continuous expansion ylab
-#' @importFrom rlang list2 ':='
+#' @importFrom rlang list2 :=
 #' @export
 upsetVarByCol <- function(
         gr, var, alt_col = "ALT", mcol = "transcript_id", ...,

diff --git a/README.md b/README.md
@@ -3,12 +3,14 @@
 <!-- badges: start -->
 [![Build Status](https://github.com/smped/transmogR/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/smped/transmogR/actions)
 [![Repo Status](https://img.shields.io/badge/repo%20status-Active-green.svg)](https://shields.io/)
-[![Codecov test coverage](https://codecov.io/gh/smped/transmogR/branch/devel/graph/badge.svg)](https://codecov.io/gh/smped/transmogR?branch=devel)
+[![Codecov test coverage](https://codecov.io/gh/smped/transmogR/branch/gh-actions/graph/badge.svg)](https://codecov.io/gh/smped/transmogR?branch=gh-actions)
 <!-- badges: end -->
 
+
 This package contains functions for creating a variant-modified, or mogrified, 
 reference genome or transcriptome.
 SNPs, Insertions and Deletions are all supported.
 
 With a generous tip of the hat and deep appreciation to Bill Watterson. 
+The inspiration for many function names can be found [here](https://www.gocomics.com/calvinandhobbes/1988/02/14).
 Results are expected to be more predictable than for prototype transmogrifiers.
diff --git a/man/genomogrify-methods.Rd b/man/genomogrify-methods.Rd