diff --git a/DESCRIPTION b/DESCRIPTION index 71aabfd..ebd611d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,13 @@ Package: Rseb Type: Package Title: A package for NGS data managing and visualization -Version: 0.1.2 +Version: 0.1.3 Author: Sebastian Gregoricchio Maintainer: Sebastian Gregoricchio Description: An R-package for daily tasks required to handle biological data as well as avoid re-coding of small functions for quick but necessary data managing. License: GNU GENERAL PUBLIC LICENSE version 3 Depends: - R, BiocManager, Biostrings, biomaRt, GO.db, rtracklayer, cowplot, data.table, dplyr, ggplot2, ggrepel, matrixStats, plyr, purrr, robustbase, stringr, tidyr, tools + R (>= 3.2.0), BiocManager, Biostrings, biomaRt, GO.db, rtracklayer, cowplot, data.table, dplyr, ggplot2, ggrepel, matrixStats, plyr, purrr, robustbase, stringr, tidyr, tools biocViews: Imports: Biostrings, biomaRt, GO.db, rtracklayer, cowplot, data.table, dplyr, ggplot2, ggrepel, matrixStats, plyr, purrr, robustbase, stringr, tidyr, tools diff --git a/NAMESPACE b/NAMESPACE index 677f581..e985d6c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(DE.status) export(GSEA.to.GOnumber) export(IGVsnap) +export(build.bed) export(calculate.mode) export(cmyk) export(combine.lists) @@ -32,21 +33,3 @@ export(store_packages) export(substract.bw) export(update_pkgs) export(volcano) -import(Biostrings) -import(GO.db) -import(biomaRt) -import(dplyr) -import(ggplot2) -import(ggrepel) -import(rtracklayer) -import(tidyr) -importFrom(BiocManager,install) -importFrom(cowplot,plot_grid) -importFrom(data.table,fread) -importFrom(matrixStats,colSds) -importFrom(plyr,ddply) -importFrom(purrr,reduce) -importFrom(robustbase,colMedians) -importFrom(stringr,str_split) -importFrom(tidyr,separate) -importFrom(tools,toTitleCase) diff --git a/NEWS.md b/NEWS.md index 5182cdf..8c391b8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,3 +12,8 @@ First release. * Optimization of `IGVsnap` function; * Added the function `pkg.check`. +## v0.1.3 - January 11th 2021 +* Optimization of `volcano` function: parameter 'font_size' added; +* Bugs fixed in `computeMatrix.deeptools` +* Added function `build.bed`. +* Added R version dependence (set to R >= 3.2.0) to be compatible with `dplyr`. diff --git a/R/GSEA.to.GOnumber.R b/R/GSEA.to.GOnumber.R index 0f44b2b..8327e68 100644 --- a/R/GSEA.to.GOnumber.R +++ b/R/GSEA.to.GOnumber.R @@ -14,8 +14,8 @@ #' #' @export GSEA.to.GOnumber #' -#' @import GO.db -#' @import dplyr +# @import GO.db +# @import dplyr GSEA.to.GOnumber = function(input_terms, diff --git a/R/IGVsnap.R b/R/IGVsnap.R index 47c427e..eff584f 100644 --- a/R/IGVsnap.R +++ b/R/IGVsnap.R @@ -1,174 +1,174 @@ -#' @title Script generator for Integrative Genomics Viewer (IGV) batch tasks. -#' -#' @description The function builds a script file that can be run on IGV to generate multiple screenshots at specific genomic regions. -#' -#' @param loci_vector Either a gene name vector (e.g. \code{c("Gapdh", "Spi1", ...)}) or a regions vector (eg. \code{c('chr1:253000-256503', ...)}. All IGV formats are allowed. -#' @param input_type Define the input type. Allowed values are \code{genes} and \code{regions}. -#' @param biomart Defines the \code{biomart} parameter for \code{biomaRt} package, by default \code{ensembl}. -#' @param dataset Defines the \code{dataset} parameter for \code{biomaRt} package, by default \code{mmusculus_gene_ensembl}. -#' @param reference_genome [optional] Defines the genome to use, e.g. "mm10", "hg19", ... . By default \code{NULL}. -#' @param fivePrime Numeric value to define how many bases [bp] exapand from full gene position at it's 5'-end, default 1000bp. -#' @param threePrime Numeric value to define how many bases [bp] exapand from full gene position at it's 3'-end, default 1000bp. -#' @param snap_names [optional] String vector to define the names of images (without extention), by default uses \code{loci_vector}. -#' @param IGV_batch_file String for the batch_script_file_name/path, by default \code{/IGV_batch.txt}. -#' @param snap_image_format String to define the format of the images, e.g. "png", "jpeg", "svg", ... . By default \code{png}. -#' @param snap_directory String for the output directory for the snapshoots. By default . -#' @param maxPanelHeight Numeric value to define the height in pixel of the IGV pannel that will be captured on IGV. -#' @param session [optional] FULL path to an IGV session file (session.xml) to use for the images. By default \code{NULL}. -#' @param exit Logical value to indicate whether exit IGV after image capture ended. By default \code{FALSE}. -#' -#' @return Exports a .txt file ready-to-use on IGV. -#' -#' @details To run the script on IGV: Tools > Run Batch Script... > choose the .txt output file from this function. \cr For more info on how batch tasks work on IGV see: \cr \url{https://software.broadinstitute.org/software/igv/PortCommands}. -#' -#' @export IGVsnap -#' -#' @import biomaRt -#' @import dplyr - -###################### -## IGVsnap function ## -###################### -IGVsnap = function(loci_vector, - input_type, # 'genes' or 'regions' - biomart = "ensembl", - dataset = "mmusculus_gene_ensembl", - reference_genome = NULL, - fivePrime = 1000, #bp - threePrime = 1000, - snap_names = NULL, - IGV_batch_file = paste(getwd(), "/IGV_batch.txt", sep = ""), - snap_image_format = "png", - snap_directory = getwd(), - maxPanelHeight = 1000, - session = NULL, - exit = FALSE) { - - # check parameters - help_message = c( - "Help for 'IGVsnap' function from 'Rseb' package: \n", "\n", - " loci_vector either a gene name vector [e.g. c('Gapdh', 'Spi1', ...)] or a regions vector [eg. c('chr1:253000-256503', ...). All IGV formats are allowed. \n", - " input_type define whether 'genes' or 'regions'. \n", - " biomart 'biomart' parameter for biomaRt package, by default 'ensembl'. \n", - " dataset 'dataset' parameter for biomaRt package, by default 'mmusculus_gene_ensembl'. \n", - " reference_genome [optional] to define the genome to use 'mm10', 'hg19, ..., 'NULL' by default. \n", - " fivePrime how many bases [bp] exapand from full gene position at it's 5'-end, default 1000bp. \n", - " threePrime how many bases [bp] exapand from full gene position at it's 3'-end, default 1000bp. \n", - " snap_names [optional] names of images (without extention), by default uses 'loci_vector'. \n", - " IGV_batch_file batch_script_file name/position, by default /IGV_batch.txt. \n", - "snap_image_format image format such as 'png', 'jpeg', 'svg', ..., by default 'png'. \n", - " snap_directory output directory for snapshoots, by default . \n", - " maxPanelHeight heigth in pixel of the IGV pannel that will be captured. \n", - " session [optional] define an IGV session file .xml to use for the images, 'NULL' by default - USE FULL PATH -. \n", - " exit logical to indicate whether exit IGV after image capture ended, FALSE by default. \n", - " help logical to indicate whether display the help, FALSE by default. \n", - "\n", "More info at https://software.broadinstitute.org/software/igv/PortCommands") - - if (exists("input_type") == F | exists("loci_vector") == F | (!(input_type %in% c("genes", "regions")))) {return(message(help_message))} - - # Retrieves regions - if (input_type == "genes") { - loci_vector = sort(unique(loci_vector)) - - # Install packages from bioconductor - pkg = "biomaRt" - if (!require(pkg, character.only = TRUE)) { - BiocManager::install(pkg) - if(!require(pkg, character.only = TRUE)) stop(paste(pkg,"package not found."))} - - require(biomaRt) - # to list all the datasets availables - # listDatasets(useMart("ensembl")) - - # for getBM(): - # EXAMPLES - # mm10 = useMart("ensembl", dataset="mmusculus_gene_ensembl") - # listFilters(mm10) /to filter output - # listAttributes(mm10) /output columns - - genome = useMart(biomart = biomart, - dataset = dataset) - - # loading gene list - require(dplyr) - gene_positions = getBM(mart = genome, # mart object, see "useMart" function - attributes = c("chromosome_name", "start_position", "end_position", "strand", "external_gene_name"), # columns in output - filters = "external_gene_name", # select only certain values, - values = loci_vector) %>% # values for the filter - arrange(factor(external_gene_name, levels = loci_vector)) - - gene_positions$start_position = gene_positions$start_position-fivePrime - gene_positions$end_position = gene_positions$end_position+threePrime - - IGV_positions = data.frame(position = paste("chr", gene_positions$chromosome_name, ":", - gene_positions$start_position, "-", - gene_positions$end_position, - sep = ""), - gene = gene_positions$external_gene_name) - - if (is.null(snap_names)) {snap_names = IGV_positions$gene} - list = IGV_positions$position - - } else if (input_type == "regions") { - if (is.null(snap_names)) {snap_names = loci_vector} - list = unique(loci_vector) - } - - # Initialization/Creation of the batch_file - write(file = IGV_batch_file, - x = paste("# IGV_batch_file,", as.character(date()))) - - # Insert loading exiting session if required - if (!is.null(session)) { - write(file = IGV_batch_file, - x = paste("load", session), - append = T) - } else if (!is.null(reference_genome)){ - write(file = IGV_batch_file, - x = paste("genome", reference_genome), - append = T) - } - - - # Set pannel size (height in pixel) - write(file = IGV_batch_file, - x = paste("maxPanelHeight", maxPanelHeight), - append = T) - - - # Insert output directory for the snapshots - write(file = IGV_batch_file, - x = paste("snapshotDirectory", snap_directory), - append = T) - - for (i in 1:length(list)) { - # Write the position where to go - write(file = IGV_batch_file, - x = paste("goto", list[i]), - append = T) - - # Write the command to take a snapshot with the name of the gene/position with the chosen extension - write(file = IGV_batch_file, - x = paste("snapshot ", snap_names[i], "_snapshot.", snap_image_format, sep =""), - append = T) - } - - # Add command to go to whole genome (to indicate that finished) - write(file = IGV_batch_file, - x = "goto all", - append = T) - - # Exit command if required - if (exit == TRUE) { - write(file = IGV_batch_file, - x = "exit", - append = T) - } - - message("The following batch script have been generated: \n", - IGV_batch_file, - "\n", "\n", - "The final snapshoot will be generated in the following folder: \n", - snap_directory) - -} # END function +#' @title Script generator for Integrative Genomics Viewer (IGV) batch tasks. +#' +#' @description The function builds a script file that can be run on IGV to generate multiple screenshots at specific genomic regions. +#' +#' @param loci_vector Either a gene name vector (e.g. \code{c("Gapdh", "Spi1", ...)}) or a regions vector (eg. \code{c('chr1:253000-256503', ...)}. All IGV formats are allowed. +#' @param input_type Define the input type. Allowed values are \code{genes} and \code{regions}. +#' @param biomart Defines the \code{biomart} parameter for \code{biomaRt} package, by default \code{ensembl}. +#' @param dataset Defines the \code{dataset} parameter for \code{biomaRt} package, by default \code{mmusculus_gene_ensembl}. +#' @param reference_genome [optional] Defines the genome to use, e.g. "mm10", "hg19", ... . By default \code{NULL}. +#' @param fivePrime Numeric value to define how many bases [bp] exapand from full gene position at it's 5'-end, default 1000bp. +#' @param threePrime Numeric value to define how many bases [bp] exapand from full gene position at it's 3'-end, default 1000bp. +#' @param snap_names [optional] String vector to define the names of images (without extention), by default uses \code{loci_vector}. +#' @param IGV_batch_file String for the batch_script_file_name/path, by default \code{/IGV_batch.txt}. +#' @param snap_image_format String to define the format of the images, e.g. "png", "jpeg", "svg", ... . By default \code{png}. +#' @param snap_directory String for the output directory for the snapshoots. By default . +#' @param maxPanelHeight Numeric value to define the height in pixel of the IGV pannel that will be captured on IGV. +#' @param session [optional] FULL path to an IGV session file (session.xml) to use for the images. By default \code{NULL}. +#' @param exit Logical value to indicate whether exit IGV after image capture ended. By default \code{FALSE}. +#' +#' @return Exports a .txt file ready-to-use on IGV. +#' +#' @details To run the script on IGV: Tools > Run Batch Script... > choose the .txt output file from this function. \cr For more info on how batch tasks work on IGV see: \cr \url{https://software.broadinstitute.org/software/igv/PortCommands}. +#' +#' @export IGVsnap +#' +# @import biomaRt +# @import dplyr + +###################### +## IGVsnap function ## +###################### +IGVsnap = function(loci_vector, + input_type, # 'genes' or 'regions' + biomart = "ensembl", + dataset = "mmusculus_gene_ensembl", + reference_genome = NULL, + fivePrime = 1000, #bp + threePrime = 1000, + snap_names = NULL, + IGV_batch_file = paste(getwd(), "/IGV_batch.txt", sep = ""), + snap_image_format = "png", + snap_directory = getwd(), + maxPanelHeight = 1000, + session = NULL, + exit = FALSE) { + + # check parameters + help_message = c( + "Help for 'IGVsnap' function from 'Rseb' package: \n", "\n", + " loci_vector either a gene name vector [e.g. c('Gapdh', 'Spi1', ...)] or a regions vector [eg. c('chr1:253000-256503', ...). All IGV formats are allowed. \n", + " input_type define whether 'genes' or 'regions'. \n", + " biomart 'biomart' parameter for biomaRt package, by default 'ensembl'. \n", + " dataset 'dataset' parameter for biomaRt package, by default 'mmusculus_gene_ensembl'. \n", + " reference_genome [optional] to define the genome to use 'mm10', 'hg19, ..., 'NULL' by default. \n", + " fivePrime how many bases [bp] exapand from full gene position at it's 5'-end, default 1000bp. \n", + " threePrime how many bases [bp] exapand from full gene position at it's 3'-end, default 1000bp. \n", + " snap_names [optional] names of images (without extention), by default uses 'loci_vector'. \n", + " IGV_batch_file batch_script_file name/position, by default /IGV_batch.txt. \n", + "snap_image_format image format such as 'png', 'jpeg', 'svg', ..., by default 'png'. \n", + " snap_directory output directory for snapshoots, by default . \n", + " maxPanelHeight heigth in pixel of the IGV pannel that will be captured. \n", + " session [optional] define an IGV session file .xml to use for the images, 'NULL' by default - USE FULL PATH -. \n", + " exit logical to indicate whether exit IGV after image capture ended, FALSE by default. \n", + " help logical to indicate whether display the help, FALSE by default. \n", + "\n", "More info at https://software.broadinstitute.org/software/igv/PortCommands") + + if (exists("input_type") == F | exists("loci_vector") == F | (!(input_type %in% c("genes", "regions")))) {return(message(help_message))} + + # Retrieves regions + if (input_type == "genes") { + loci_vector = sort(unique(loci_vector)) + + # Install packages from bioconductor + pkg = "biomaRt" + if (!require(pkg, character.only = TRUE)) { + BiocManager::install(pkg) + if(!require(pkg, character.only = TRUE)) stop(paste(pkg,"package not found."))} + + require(biomaRt) + # to list all the datasets availables + # listDatasets(useMart("ensembl")) + + # for getBM(): + # EXAMPLES + # mm10 = useMart("ensembl", dataset="mmusculus_gene_ensembl") + # listFilters(mm10) /to filter output + # listAttributes(mm10) /output columns + + genome = useMart(biomart = biomart, + dataset = dataset) + + # loading gene list + require(dplyr) + gene_positions = getBM(mart = genome, # mart object, see "useMart" function + attributes = c("chromosome_name", "start_position", "end_position", "strand", "external_gene_name"), # columns in output + filters = "external_gene_name", # select only certain values, + values = loci_vector) %>% # values for the filter + arrange(factor(external_gene_name, levels = loci_vector)) + + gene_positions$start_position = gene_positions$start_position-fivePrime + gene_positions$end_position = gene_positions$end_position+threePrime + + IGV_positions = data.frame(position = paste("chr", gene_positions$chromosome_name, ":", + gene_positions$start_position, "-", + gene_positions$end_position, + sep = ""), + gene = gene_positions$external_gene_name) + + if (is.null(snap_names)) {snap_names = IGV_positions$gene} + list = IGV_positions$position + + } else if (input_type == "regions") { + if (is.null(snap_names)) {snap_names = loci_vector} + list = unique(loci_vector) + } + + # Initialization/Creation of the batch_file + write(file = IGV_batch_file, + x = paste("# IGV_batch_file,", as.character(date()))) + + # Insert loading exiting session if required + if (!is.null(session)) { + write(file = IGV_batch_file, + x = paste("load", session), + append = T) + } else if (!is.null(reference_genome)){ + write(file = IGV_batch_file, + x = paste("genome", reference_genome), + append = T) + } + + + # Set pannel size (height in pixel) + write(file = IGV_batch_file, + x = paste("maxPanelHeight", maxPanelHeight), + append = T) + + + # Insert output directory for the snapshots + write(file = IGV_batch_file, + x = paste("snapshotDirectory", snap_directory), + append = T) + + for (i in 1:length(list)) { + # Write the position where to go + write(file = IGV_batch_file, + x = paste("goto", list[i]), + append = T) + + # Write the command to take a snapshot with the name of the gene/position with the chosen extension + write(file = IGV_batch_file, + x = paste("snapshot ", snap_names[i], "_snapshot.", snap_image_format, sep =""), + append = T) + } + + # Add command to go to whole genome (to indicate that finished) + write(file = IGV_batch_file, + x = "goto all", + append = T) + + # Exit command if required + if (exit == TRUE) { + write(file = IGV_batch_file, + x = "exit", + append = T) + } + + message("The following batch script have been generated: \n", + IGV_batch_file, + "\n", "\n", + "The final snapshoot will be generated in the following folder: \n", + snap_directory) + +} # END function diff --git a/R/build.bed.R b/R/build.bed.R new file mode 100644 index 0000000..b2c5254 --- /dev/null +++ b/R/build.bed.R @@ -0,0 +1,218 @@ +#' @title Bed generator +#' +#' @description Function that helps the building of a bed file providing the columns. It enables also the specification of the track line for software such as IGV in order to pre-define colors, track name, etc. +#' +#' @param chr String vector containing the name of the chromosome (e.g. chr3, chrY, chr2_random) or scaffold (e.g. scaffold10671). +#' @param start Numeric vector indicating the starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0. +#' @param end Numeric vector indicating the ending position of the feature in the chromosome or scaffold. +#' @param name String vector defining the name of the BED line. This label is displayed to the left of the BED line in the Genome Browser window when the track is open to full display mode or directly to the left of the item in pack mode. If set as \code{NULL} (default) and the column is required, the names will correspond to the mid-point of the region. +#' @param score A single value or a numeric vector with a score between 0 and 1000. If the track line \code{useScore} attribute is set as \code{TRUE} for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray). By default 0. +#' @param strand A single character or a string vector defining the strand: either "." (=no strand) or "+" or "-". By default ".". +#' @param thickStart A numeric vector indicating the starting position at which the feature is drawn thickly (for example, the start codon in gene displays). When there is no thick part (default value, \code{thickStart = NULL}) it will be used the \code{start} value. +#' @param thickEnd A numeric vector indicating the ending position at which the feature is drawn thickly (for example, the start codon in gene displays). When there is no thick part (default value, \code{thickStart = NULL}) it will be used the \code{end} value. +#' @param itemRgb A single value or a string vector containing the colors for each feature. It can be expressed as an RGB value of the form R,G,B (e.g. "255,0,0") or as any other R-supported color name (it will be converted automatically to RGB version). By default \code{NULL}. If the track line \code{itemRgb.ON} attribute is set as \code{TRUE}, this color value will determine the display color of the data contained in this BED line. NOTE: It is recommended that a simple color scheme (eight colors or less) be used with this attribute to avoid overwhelming the color resources of the Genome Browser and your Internet browser. +#' @param blockCount A single number or a numeric vector indicating the number of blocks (exons) in the BED line. By default \code{NULL}. +#' @param blockSizes A vector containing a comma-separated list of the block sizes. The number of items in this list should correspond to \code{blockCount}. By default \code{NULL}. +#' @param blockStarts A vector containing a comma-separated list of block starts. All of the \code{blockStart} positions should be calculated relative to \code{start}. The number of items in this list should correspond to \code{blockCount}. By default \code{NULL}. +#' +#' @param track.name A string defining the track label that will be displayed to the left of the track in the Genome Browser window, and also the label of the track control at the bottom of the screen. The name can consist of up to 15 characters. It is recommended that the track_label be restricted to alpha-numeric characters and spaces to avoid potential parsing problems. By default \code{NULL}. +#' @param display.mode A string that defines the initial display mode of the annotation track. Values for \code{display.mode} include: "hide", "dense", "full", "pack", "squish". By default \code{NULL}. +#' @param itemRgb.ON Logic value to define whether this attribute should be set to "On", the Genome Browser will use the RGB value shown in the \code{itemRgb} field in each data line of the associated BED track to determine the display color of the data on that line. If the \code{itemRgb} values are not provided, this parameter will be ignored. By default \code{TRUE}. +#' @param useScore Logic value to define if the \code{score} field in each of the track's data lines should be used to determine the level of shading in which the data is displayed. By default \code{FALSE}. +#' @param colorByStrand A vector composed by two strings for two colors, either in RGB comma separated format (eg. "0,250,30") or any R-supported color string (they will be converted automatically to RGB format). The order of color sets is c("strand +", "strand -"). Parameter ignored when \code{itemRgb} is active/provided. By default \code{NULL}. +#' @param track.base.color A single string defining the main color for the annotation track. The track color consists of three comma-separated RGB values from 0-255 (eg. "0,250,30") or any R-supported color string (it will be converted automatically to RGB format). Parameter ignored when \code{itemRgb} or \code{colorByStrand} are active/provided. By default \code{NULL}. +#' +#' @param sort Logic value to define whether to sort the bed using the function \link{sort.bed}. By default \code{TRUE}. +#' @param bed.file.name If a string with a full path to a bed_file is provided, the function will export the bed as a txt file. By default \code{NULL}. +#' @param export.track.line Logic value to define if the track line should be exported. When \code{bed.file.name = NULL} this parameter is ignored. By default \code{TRUE}. +#' @param return.data.frame Logic value to define if the to return the data.frame corresponding to the bed (it will show the columns names). By default \code{FALSE}. +#' @param force.generation Force the generation of bed even when certain errors occur (eg. score > 1000, start > end). By default \code{FALSE}. +#' +#' +#' @return If required the function can export a bed file with or without the track line, return a data.frame (with column names) corresponding to the bed generated, or both. The bed file could be automatically sorted settin the parameter \code{sort = TRUE}. +#' +#' @references +#' \itemize{ +#' \item More information about bed format are available at the following link: \url{https://genome.ucsc.edu/FAQ/FAQformat.html#format1}. +#' \item More information about track line parameters are available at the following link: \url{https://genome.ucsc.edu/goldenPath/help/hgTracksHelp.html#lines}. +#' } +#' +#' @export build.bed + + +build.bed = function( + # bed columns + chr, + start, + end, + name = NULL, + score = 0, + strand = ".", + thickStart = NULL, + thickEnd = NULL, + itemRgb = NULL, + blockCount = NULL, + blockSizes = NULL, + blockStarts = NULL, + + # track lines + track.name = NULL, + display.mode = NULL, + itemRgb.ON = T, + useScore = F, + colorByStrand = NULL, + track.base.color = NULL, + + # export parameters + sort = T, + bed.file.name = NULL, + export.track.line = TRUE, + return.data.frame = F, + force.generation = F + ) { # BEGIN function + + # -------------------------------------------------------------------------- # + + # Check size of vectors for bed generation + ############################################################################## + row.number = length(chr) + + param.check = function(param, row.number){ + ifelse(test = if (!is.null(param)) {length(param) > 1}, + yes = length(param) == row.number, + no = TRUE) + } + + length.check = c( + length(start) == row.number, + length(end) == row.number, + param.check(name, row.number), + param.check(score, row.number), + param.check(strand, row.number), + param.check(thickStart, row.number), + param.check(thickEnd, row.number), + param.check(itemRgb, row.number), + param.check(blockCount, row.number), + param.check(blockSizes, row.number), + param.check(blockStarts, row.number) + ) + + if (length(unique(length.check)) > 1) {return(warning(paste("The expected number of elements in each colmun/parameter must be", row.number, "or 1.")))} + + + # Check that START is before END and force if required + if (-1 %in% sign(end - start) & force.generation == F) { + return(warning("At least one value in END vector is lower than the relative START value. If you want to force the bed generation set the parameter 'force.generation' as TRUE.")) + } + + # Check that that scores are between 0 and 1000 + if (-1 %in% sign(1000 - score) & force.generation == F) { + return(warning("At least one value in SCORE vector is grater than 1000 (allowed values between 0-1000). If you want to force the bed generation set the parameter 'force.generation' as TRUE.")) + } + + # Convert non-RGB colors (if required) + if (!is.null(itemRgb)) { + if (!grepl(",", itemRgb[1])) { + itemRgb = sapply(itemRgb, function(x){paste(as.vector(col2rgb(x)), collapse = ",")}, USE.NAMES = F) + } + } + + + # Names generation + if (is.null(name)) {name = round((start + end)/2)} + + # Define thickStart and thickEnd if RGB is defined + if (!is.null(itemRgb)) { + if (is.null(thickStart)) {thickStart = start} + if (is.null(thickEnd)) {thickEnd = end} + } + + # Check blocks + blocks.check = c(!is.null(blockCount), !is.null(blockSizes), !is.null(blockStarts)) + if (length(unique(blocks.check)) > 1) { + return(warning("If you want to define blocks, you need to provide all the 3 vectors for 'blockCount', 'blockSizes', 'blockStarts'.")) + } + + # Check track label length + if (!is.null(track.name)) { + if (nchar(track.name) > 15 | length(track.name) > 1) {return(warning("The label of the track.name must be a string with a number of characters <=15."))} + } + + + # TRACK LINE BUILDING + ############################################################################## + if (!is.null(bed.file.name)) { + if (export.track.line == TRUE) { + track.line = c() + if (!is.null(track.name)) {if (length(track.name) == 1) {track.line = c(track.line, paste('track name="', track.name, '"', sep = ""))}} + if (!is.null(display.mode)) {if (length(display.mode) == 1 & display.mode %in% c("hide", "dense", "full", "pack", "squish")) {track.line = c(track.line, paste('visibility="', display.mode, '"', sep = ""))}} + if (itemRgb.ON == TRUE & !is.null(itemRgb)) {track.line = c(track.line, 'itemRgb="on"')} + if (useScore == TRUE & length(score) > 1) {track.line = c(track.line, 'useScore="1"')} + + if (itemRgb.ON == FALSE) { + if (!is.null(colorByStrand)) { + if (length(colorByStrand) == 2) { + if (!grepl(",", colorByStrand[1])) {strand.colors = paste(sapply(colorByStrand, function(x){paste(as.vector(col2rgb(x)), collapse = ",")}, USE.NAMES = F), collapse = " ")} + track.line = c(track.line, paste('colorByStrand="', strand.colors, '"', sep = "")) + } + } # END if for colorByStrand + + if (is.null(colorByStrand) & !is.null(track.base.color)) { + if (length(track.base.color) == 1) { + if (!grepl(",", track.base.color)) {base.color = paste(as.vector(col2rgb(track.base.color)), collapse = ",")} + track.line = c(track.line, paste('color="', base.color, '"', sep = "")) + } # END if for track.base.color + } + } # END if for itemRGB OFF + + track.line = paste(track.line, collapse = " ") + } # END if for export line + } # END if for bed_file + + # DATA.FRAME BUILDING + ############################################################################## + # Select the columns required for the bed + columns_list = list(chr = chr, + start = start, + end = end, + name = name, + score = score, + strand = strand, + thickStart = thickStart, + thickEnd = thickEnd, + itemRgb = itemRgb, + blockCount = blockCount, + blockSizes = blockSizes, + blockStarts = blockStarts) + + columns.to.keep = c() + for (i in 1:length(columns_list)) { + if (!is.null(columns_list[[i]])) {columns.to.keep = c(columns.to.keep, i)} + } + + # build data.frame and sort if required + bed = data.frame(columns_list[columns.to.keep]) + if (sort == T) {bed = Rseb::sort.bed(bed)} + + + # OUTPUT GENERATION + ############################################################################## + # Export file if required + if (!is.null(bed.file.name)) { + if (length(bed.file.name) == 1) { + if (export.track.line == T & track.line != "") { + write(x = track.line, file = bed.file.name) + write.table(x = bed, file = bed.file.name, + quote = F, sep = "\t", row.names = F, col.names = F, append = T) + } else { + write.table(x = bed, file = bed.file.name, + quote = F, sep = "\t", row.names = F, col.names = F) + } + message(paste("Bed file exported as -> ", bed.file.name, sep = "")) + } + } + + # Return data.frame if required + if (return.data.frame == T) {return(bed)} + +} # END function diff --git a/R/computeMatrix.deeptools.R b/R/computeMatrix.deeptools.R index 374f953..7651bea 100644 --- a/R/computeMatrix.deeptools.R +++ b/R/computeMatrix.deeptools.R @@ -227,11 +227,11 @@ computeMatrix.deeptools = ###### Add common optional parameters - if (!is.null(sortUsingSamples)) {command = paste(command, paste(sortUsingSamples, collapse = " "))} - if (!is.null(minThreshold)) {command = paste(command, paste(minThreshold, collapse = " "))} - if (!is.null(maxThreshold)) {command = paste(command, paste(maxThreshold, collapse = " "))} - if (!is.null(blackListFileName)) {command = paste(command, add.quotes(blackListFileName))} - if (!is.null(samplesLabel)) {command = paste(command, paste(add.quotes(samplesLabel), collapse = " "))} + if (!is.null(sortUsingSamples)) {command = paste(command, "--sortUsingSamples", paste(sortUsingSamples, collapse = " "))} + if (!is.null(minThreshold)) {command = paste(command, "--minThreshold", paste(minThreshold, collapse = " "))} + if (!is.null(maxThreshold)) {command = paste(command, "--maxThreshold", paste(maxThreshold, collapse = " "))} + if (!is.null(blackListFileName)) {command = paste(command, "--blackListFileName", add.quotes(blackListFileName))} + if (!is.null(samplesLabel)) {command = paste(command, "--samplesLabel", paste(add.quotes(samplesLabel), collapse = " "))} if (missingDataAsZero == T) {command = paste(command, "--missingDataAsZero")} if (skipZeros == T) {command = paste(command, "--skipZeros")} diff --git a/R/convert_sequence.R b/R/convert_sequence.R index a68663f..2aa0318 100644 --- a/R/convert_sequence.R +++ b/R/convert_sequence.R @@ -15,7 +15,7 @@ #' #' @export convert_sequence #' -#' @import Biostrings +# @import Biostrings convert_sequence = function(sequence = NULL, mode = "not specified", diff --git a/R/data.summary.R b/R/data.summary.R index 881f7fa..6361ed0 100644 --- a/R/data.summary.R +++ b/R/data.summary.R @@ -13,7 +13,7 @@ #' #' @export data.summary #' -#' @importFrom plyr ddply +# @importFrom plyr ddply data.summary = function(data, diff --git a/R/density_plot.R b/R/density_plot.R index d2a2b6d..1026084 100644 --- a/R/density_plot.R +++ b/R/density_plot.R @@ -1,123 +1,123 @@ -#' @title Plot density signal of NGS data. -#' -#' @description Plots the density profile of NGS data (e.g. ChIP-seq, ATAC-seq, MeDIP-seq, etc.). Used by the function \code{\link{plot.density.profile}}. -#' -#' @param samples A character vector containing the samples list. -#' @param scores A numeric vector containing the scores for the Y-axis. -#' @param positions A numeric vector containing the position for the X-axis. -#' @param variance_scores A numeric vector containing the variance/error value at each position. -#' @param xlab A string containing the label for the X-axis. By default "Distance from regions center [bp]". -#' @param ylab A string containing the label for the Y-axis. By default "Average density signal". -#' @param line_type Vector to define each line type. Both numeric and string codes are accepted. if only one element is given this will be applied to all the lines. By default "solid". \cr Example 1: \code{c("solid", "dashed")}. \cr Example 2: \code{c(1, 2)} -#' @param x_lim List of numeric vectors with two elements each to define the range of the X-axis. To set only one side use NA for the side to leave automatic. If only one range is given this one will be applied to all the plots. By default \code{NULL}, the range will be defined automatically. \cr Example \code{list(c(0, 20), c(NA, 30), c(0, NA), c(NA, NA))}., -#' @param y_lim List of numeric vectors with two elements each to define the range of the Y-axis. To set only one side use NA for the side to leave automatic. If only one range is given this one will be applied to all the plots. By default \code{NULL}, the range will be defined automatically. \cr Example \code{list(c(0, 20), c(NA, 30), c(0, NA), c(NA, NA))}., -#' @param x_intercept A vector indicating the X intercepts for the vertical lines. By default 0. -#' @param colors Vector to define the line and error area colors. If only one value is provided or the number of values is lower than the required ones only the first value will be used. All standard R.colors values are accepted. By default \code{c("blue", "red", "purple", "orange", "green")}. -#' @param title A string containing the label for the X-axis. By default "Density profile". -#' @param text_size Numeric value to define the size of the text for the labels of all the plots. By default 12. -#' @param variance Logic value to define whether to plot the error/variance around the signal. By default \code{TRUE}. -#' @param print_plot Logic value to define whether to print the plot once generated or not. By default \code{FALSE}. -#' @param line_width Numeric value to define the line width for all the plots. By default 1., -#' @param variance_opacity Numeric value to define the alpha/transparency of the error/variance. By default 0.25. Parameter considered only when \code{variance = TRUE)}. -#' -#' @return Returns a plot in ggplot2 format. -#' -#' @export density_plot -#' -#' @import ggplot2 - - -density_plot = function( - samples, - scores, - positions, - variance_scores, - xlab = "Distance from regions center [bp]", - ylab = "Average density signal", - line_type = "solid", - y_lim = NULL, - x_lim = NULL, - x_intercept = 0, - colors = c("blue", "red", "purple", "orange", "green"), - title = "Density profile", - text_size = 12, - variance = T, - print_plot = F, - line_width = 1, - variance_opacity = 0.25) - -{ # BEGIN - # Create a matrix - matrix = - data.frame(sample = samples, - position = positions, - score = scores, - variance = variance_scores) - - # assigns 0 to the alpha of the ribbon if variance == F - if (variance == T) (variance_value = variance_opacity) else (variance_value = 0) - - # scaling size of parameters - n_samples = length(unique(matrix$sample)) - - # scaling number of colors/line.type to be equal to number samples - scaling = function(x, n_samples) { - if (length(x) < n_samples & - length(x) == 1) { - v = c(rep(x, n_samples)) - } else if (length(x) > 1 & - length(x) < n_samples) { - v = c(rep(x[1], n_samples)) - message("Number of colors and/or line.type lower than number of samples --> The first value is applied to all samples/groups.") - } else (v = as.vector(x)) - return(v) - } - - line_type = scaling(line_type, n_samples) - colors = scaling(colors, n_samples) - - require(ggplot2) - - # building the plot - plot = - ggplot(data = matrix, - aes(x = position, - y = score, - ymin = score-variance, - ymax = score+variance, - group = sample, - color = sample, - fill = sample, - linetype = sample)) + - geom_line(size = line_width) + - scale_color_manual(values = as.vector(colors[1:n_samples])) + - scale_linetype_manual(values = as.vector(line_type[1:n_samples])) + - geom_ribbon(alpha = variance_value, - color = NA) + - scale_fill_manual(values = as.vector(colors[1:n_samples])) + - xlab(xlab) + - ylab(ylab) + - ggtitle(title) + - geom_vline(xintercept = x_intercept, - linetype = "dashed", - col = "gray70") + - theme_classic() + - theme(text = element_text(size = text_size), - axis.text = element_text(color = "#000000"), - axis.title = element_text(color = "#000000"), - axis.line = element_line(color = "#000000"), - axis.ticks = element_line(color = "#000000"), - title = element_text(color = "#000000"), - legend.title = element_text(color = "#000000"), - legend.text = element_text(color = "#000000")) - - - if (!(is.null(y_lim))) plot = plot + ylim(y_lim) - if (!(is.null(x_lim))) plot = plot + xlim(x_lim) - - if (print_plot == T) { - (print(plot))} - - return(plot) - -} # END +#' @title Plot density signal of NGS data. +#' +#' @description Plots the density profile of NGS data (e.g. ChIP-seq, ATAC-seq, MeDIP-seq, etc.). Used by the function \code{\link{plot.density.profile}}. +#' +#' @param samples A character vector containing the samples list. +#' @param scores A numeric vector containing the scores for the Y-axis. +#' @param positions A numeric vector containing the position for the X-axis. +#' @param variance_scores A numeric vector containing the variance/error value at each position. +#' @param xlab A string containing the label for the X-axis. By default "Distance from regions center [bp]". +#' @param ylab A string containing the label for the Y-axis. By default "Average density signal". +#' @param line_type Vector to define each line type. Both numeric and string codes are accepted. if only one element is given this will be applied to all the lines. By default "solid". \cr Example 1: \code{c("solid", "dashed")}. \cr Example 2: \code{c(1, 2)} +#' @param x_lim List of numeric vectors with two elements each to define the range of the X-axis. To set only one side use NA for the side to leave automatic. If only one range is given this one will be applied to all the plots. By default \code{NULL}, the range will be defined automatically. \cr Example \code{list(c(0, 20), c(NA, 30), c(0, NA), c(NA, NA))}., +#' @param y_lim List of numeric vectors with two elements each to define the range of the Y-axis. To set only one side use NA for the side to leave automatic. If only one range is given this one will be applied to all the plots. By default \code{NULL}, the range will be defined automatically. \cr Example \code{list(c(0, 20), c(NA, 30), c(0, NA), c(NA, NA))}., +#' @param x_intercept A vector indicating the X intercepts for the vertical lines. By default 0. +#' @param colors Vector to define the line and error area colors. If only one value is provided or the number of values is lower than the required ones only the first value will be used. All standard R.colors values are accepted. By default \code{c("blue", "red", "purple", "orange", "green")}. +#' @param title A string containing the label for the X-axis. By default "Density profile". +#' @param text_size Numeric value to define the size of the text for the labels of all the plots. By default 12. +#' @param variance Logic value to define whether to plot the error/variance around the signal. By default \code{TRUE}. +#' @param print_plot Logic value to define whether to print the plot once generated or not. By default \code{FALSE}. +#' @param line_width Numeric value to define the line width for all the plots. By default 1., +#' @param variance_opacity Numeric value to define the alpha/transparency of the error/variance. By default 0.25. Parameter considered only when \code{variance = TRUE)}. +#' +#' @return Returns a plot in ggplot2 format. +#' +#' @export density_plot +#' +# @import ggplot2 + + +density_plot = function( + samples, + scores, + positions, + variance_scores, + xlab = "Distance from regions center [bp]", + ylab = "Average density signal", + line_type = "solid", + y_lim = NULL, + x_lim = NULL, + x_intercept = 0, + colors = c("blue", "red", "purple", "orange", "green"), + title = "Density profile", + text_size = 12, + variance = T, + print_plot = F, + line_width = 1, + variance_opacity = 0.25) + +{ # BEGIN + # Create a matrix + matrix = + data.frame(sample = samples, + position = positions, + score = scores, + variance = variance_scores) + + # assigns 0 to the alpha of the ribbon if variance == F + if (variance == T) (variance_value = variance_opacity) else (variance_value = 0) + + # scaling size of parameters + n_samples = length(unique(matrix$sample)) + + # scaling number of colors/line.type to be equal to number samples + scaling = function(x, n_samples) { + if (length(x) < n_samples & + length(x) == 1) { + v = c(rep(x, n_samples)) + } else if (length(x) > 1 & + length(x) < n_samples) { + v = c(rep(x[1], n_samples)) + message("Number of colors and/or line.type lower than number of samples --> The first value is applied to all samples/groups.") + } else (v = as.vector(x)) + return(v) + } + + line_type = scaling(line_type, n_samples) + colors = scaling(colors, n_samples) + + require(ggplot2) + + # building the plot + plot = + ggplot(data = matrix, + aes(x = position, + y = score, + ymin = score-variance, + ymax = score+variance, + group = sample, + color = sample, + fill = sample, + linetype = sample)) + + geom_line(size = line_width) + + scale_color_manual(values = as.vector(colors[1:n_samples])) + + scale_linetype_manual(values = as.vector(line_type[1:n_samples])) + + geom_ribbon(alpha = variance_value, + color = NA) + + scale_fill_manual(values = as.vector(colors[1:n_samples])) + + xlab(xlab) + + ylab(ylab) + + ggtitle(title) + + geom_vline(xintercept = x_intercept, + linetype = "dashed", + col = "gray70") + + theme_classic() + + theme(text = element_text(size = text_size), + axis.text = element_text(color = "#000000"), + axis.title = element_text(color = "#000000"), + axis.line = element_line(color = "#000000"), + axis.ticks = element_line(color = "#000000"), + title = element_text(color = "#000000"), + legend.title = element_text(color = "#000000"), + legend.text = element_text(color = "#000000")) + + + if (!(is.null(y_lim))) plot = plot + ylim(y_lim) + if (!(is.null(x_lim))) plot = plot + xlim(x_lim) + + if (print_plot == T) { + (print(plot))} + + return(plot) + +} # END diff --git a/R/intersect.bedtools.R b/R/intersect.bedtools.R index 3636181..f93bb27 100644 --- a/R/intersect.bedtools.R +++ b/R/intersect.bedtools.R @@ -61,7 +61,7 @@ #' #' @export intersect.bedtools #' -#' @importFrom data.table fread +# @importFrom data.table fread diff --git a/R/pkg.check.R b/R/pkg.check.R index 82397cc..30b64f9 100644 --- a/R/pkg.check.R +++ b/R/pkg.check.R @@ -1,53 +1,53 @@ -#' @title Check package installation. -#' -#' @description Function to check if a package is installed. It works with bioconductor or CRAN packages. -#' -#' @param package A single string indicating the name of the package to check. -#' @param archive A single string indicating the type of archive. Possible values "CRAN" and "bioconductor" (not case sensitive). Parameter without default.. -#' -#' @return If the pkg is not already installed it will be installed. -#' -#' @examples -#' pkg.check("ggplot2", "cran") -#' -#' pkg.check("biomaRt", "bioconductor") -#' -#' @export pkg.check -#' -#' @importFrom BiocManager install - - -###################### -pkg.check = function(package, - archive) { - - ### check parameters - if (class(package) != "character" | length(package) != 1) { - return(warning("The 'package' parameter must be a single string.")) - } - - - archive = tolower(archive) - if (class(archive) != "character" | length(archive) != 1 | !(archive %in% c("cran", "bioconductor"))) { - return(warning("The 'archive' parameter must be a single string. Possibile values: 'cran', 'bioconductor' (not case sensitive).")) - } - - - - ### install package - if (archive == "cran") { - # Install packages from CRAN - if (!require(package, character.only = TRUE)) { - install.packages(package) - if(!require(package, character.only = TRUE)) return(warning((paste(package,"package not found."))))} - - } else { - - # Install packages from bioconductor - if (!require(package, character.only = TRUE)) { - BiocManager::install(package) - if(!require(package, character.only = TRUE)) return(warning((paste(package,"package not found."))))} - } - - -} # END function +#' @title Check package installation. +#' +#' @description Function to check if a package is installed. It works with bioconductor or CRAN packages. +#' +#' @param package A single string indicating the name of the package to check. +#' @param archive A single string indicating the type of archive. Possible values "CRAN" and "bioconductor" (not case sensitive). Parameter without default.. +#' +#' @return If the pkg is not already installed it will be installed. +#' +#' @examples +#' pkg.check("ggplot2", "cran") +#' +#' pkg.check("biomaRt", "bioconductor") +#' +#' @export pkg.check +#' +# @importFrom BiocManager install + + +###################### +pkg.check = function(package, + archive) { + + ### check parameters + if (class(package) != "character" | length(package) != 1) { + return(warning("The 'package' parameter must be a single string.")) + } + + + archive = tolower(archive) + if (class(archive) != "character" | length(archive) != 1 | !(archive %in% c("cran", "bioconductor"))) { + return(warning("The 'archive' parameter must be a single string. Possibile values: 'cran', 'bioconductor' (not case sensitive).")) + } + + + + ### install package + if (archive == "cran") { + # Install packages from CRAN + if (!require(package, character.only = TRUE)) { + install.packages(package) + if(!require(package, character.only = TRUE)) return(warning((paste(package,"package not found."))))} + + } else { + + # Install packages from bioconductor + if (!require(package, character.only = TRUE)) { + BiocManager::install(package) + if(!require(package, character.only = TRUE)) return(warning((paste(package,"package not found."))))} + } + + +} # END function diff --git a/R/plot.density.profile.R b/R/plot.density.profile.R index eff957a..7c4ee53 100644 --- a/R/plot.density.profile.R +++ b/R/plot.density.profile.R @@ -57,16 +57,16 @@ #' #' @export plot.density.profile #' -#' @import tidyr -#' @import dplyr -#' @import ggplot2 -#' @importFrom data.table fread -#' @importFrom stringr str_split -#' @importFrom robustbase colMedians -#' @importFrom matrixStats colSds -#' @importFrom purrr reduce -#' @importFrom cowplot plot_grid -#' @importFrom tools toTitleCase +# @import tidyr +# @import dplyr +# @import ggplot2 +# @importFrom data.table fread +# @importFrom stringr str_split +# @importFrom robustbase colMedians +# @importFrom matrixStats colSds +# @importFrom purrr reduce +# @importFrom cowplot plot_grid +# @importFrom tools toTitleCase plot.density.profile = function( matrix.file, diff --git a/R/read.computeMatrix.file.R b/R/read.computeMatrix.file.R index 8db1aed..cf8d8a6 100644 --- a/R/read.computeMatrix.file.R +++ b/R/read.computeMatrix.file.R @@ -14,10 +14,10 @@ #' } #' This list can be passed as it is to the function \link{plot.density.profile}. #' -#' @import dplyr -#' @importFrom data.table fread -#' @importFrom stringr str_split -#' @importFrom tidyr separate +# @import dplyr +# @importFrom data.table fread +# @importFrom stringr str_split +# @importFrom tidyr separate read.computeMatrix.file = function(matrix.file) { diff --git a/R/sort.bed.R b/R/sort.bed.R index fb04753..2eb61c2 100644 --- a/R/sort.bed.R +++ b/R/sort.bed.R @@ -16,7 +16,7 @@ #' #' @export sort.bed #' -#' @import dplyr +# @import dplyr sort.bed = function(bed, diff --git a/R/substract.bw.R b/R/substract.bw.R index 948e2c0..657747c 100644 --- a/R/substract.bw.R +++ b/R/substract.bw.R @@ -12,7 +12,7 @@ #' #' @export substract.bw #' -#' @import rtracklayer +# @import rtracklayer substract.bw = function(bw1, diff --git a/R/volcano.R b/R/volcano.R index 82f16ac..148f1ee 100644 --- a/R/volcano.R +++ b/R/volcano.R @@ -36,13 +36,14 @@ #' @param threshold_line_color String to define the color of the threshold lines. By default \code{"gray70"} #' @param threshold_line_type String or numeric value to define the threshold lines type. Both numeric and string standard R codes are accepted. By default \code{"dotted"}, equivalent to \code{2}. #' @param font_family String to define the font family to use in the plot writings. By default \code{"Helvetica"}. +#' @param font_size Numeric value to define the font size. By default 12. #' #' @return A plot in ggplot2 format. #' #' @export volcano #' -#' @import ggplot2 -#' @import ggrepel +# @import ggplot2 +# @import ggrepel volcano = function(log2FC_data, padj_data, @@ -102,11 +103,13 @@ volcano = function(log2FC_data, add_threshold_lines = T, threshold_line_color = "gray70", threshold_line_type = "dotted", - font_family = "Helvetica", ...) { + + # font parameters + font_family = "Helvetica", + font_size = 12) { if (length(log2FC_data) != length(padj_data)) { - warning("ERROR: length(log2FC_data) is different from length(padj_data)") - return(NULL) + return(warning("ERROR: length(log2FC_data) is different from length(padj_data).")) } # Generate a table containing the data and their status @@ -143,7 +146,14 @@ volcano = function(log2FC_data, xlab(x_label) + ylab(y_label) + theme_classic() + - theme(text = element_text(family = font_family)) + theme(text = element_text(family = font_family, size = font_size), + axis.text = element_text(color = "#000000"), + axis.title = element_text(color = "#000000"), + axis.line = element_line(color = "#000000"), + axis.ticks = element_line(color = "#000000"), + title = element_text(color = "#000000"), + legend.title = element_text(color = "#000000"), + legend.text = element_text(color = "#000000")) # remove legend if necessary diff --git a/README.md b/README.md index 66ecc21..30a5385 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ devtools::install_github("sebastian-gregoricchio/Rseb") ``` ## Documentation -With the package a [PDF manual](https://sebastian-gregoricchio.github.io/Rseb/Rseb_0.1.2_manual.pdf) is available. +With the package a [PDF manual](https://sebastian-gregoricchio.github.io/Rseb/Rseb_0.1.3_manual.pdf) is available. ## Package history and releases @@ -57,7 +57,7 @@ The changeLog could be found [here](https://github.com/sebastian-gregoricchio/Rs **Old releases** * [Rseb v0.1.0](https://github.com/sebastian-gregoricchio/Rseb/releases/tag/0.1.0) * [Rseb v0.1.1](https://github.com/sebastian-gregoricchio/Rseb/releases/tag/0.1.1) - +* [Rseb v0.1.2](https://github.com/sebastian-gregoricchio/Rseb/releases/tag/0.1.2) ----------------- ## Contact diff --git a/Rseb_0.1.3_manual.pdf b/Rseb_0.1.3_manual.pdf new file mode 100644 index 0000000..b9df8e3 Binary files /dev/null and b/Rseb_0.1.3_manual.pdf differ diff --git a/_config.yaml b/_config.yaml index ac2180d..a739d40 100644 --- a/_config.yaml +++ b/_config.yaml @@ -1,4 +1,4 @@ -title: Rseb v0.1.2 +title: Rseb v0.1.3 show_downloads: true google_analytics: theme: jekyll-theme-slate diff --git a/index.md b/index.md index ecb36a0..5000399 100644 --- a/index.md +++ b/index.md @@ -44,7 +44,7 @@ devtools::install_github("sebastian-gregoricchio/Rseb") ``` ## Documentation -With the package a [PDF manual](https://sebastian-gregoricchio.github.io/Rseb/Rseb_0.1.2_manual.pdf) is available. +With the package a [PDF manual](https://sebastian-gregoricchio.github.io/Rseb/Rseb_0.1.3_manual.pdf) is available. ## Package history and releases @@ -53,6 +53,8 @@ The changeLog could be found [here](https://github.com/sebastian-gregoricchio/Rs **Old releases** * [Rseb v0.1.0](https://github.com/sebastian-gregoricchio/Rseb/releases/tag/0.1.0) * [Rseb v0.1.1](https://github.com/sebastian-gregoricchio/Rseb/releases/tag/0.1.1) +* [Rseb v0.1.2](https://github.com/sebastian-gregoricchio/Rseb/releases/tag/0.1.2) + diff --git a/man/build.bed.Rd b/man/build.bed.Rd new file mode 100644 index 0000000..064bdf1 --- /dev/null +++ b/man/build.bed.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build.bed.R +\name{build.bed} +\alias{build.bed} +\title{Bed generator} +\usage{ +build.bed( + chr, + start, + end, + name = NULL, + score = 0, + strand = ".", + thickStart = NULL, + thickEnd = NULL, + itemRgb = NULL, + blockCount = NULL, + blockSizes = NULL, + blockStarts = NULL, + track.name = NULL, + display.mode = NULL, + itemRgb.ON = T, + useScore = F, + colorByStrand = NULL, + track.base.color = NULL, + sort = T, + bed.file.name = NULL, + export.track.line = TRUE, + return.data.frame = F, + force.generation = F +) +} +\arguments{ +\item{chr}{String vector containing the name of the chromosome (e.g. chr3, chrY, chr2_random) or scaffold (e.g. scaffold10671).} + +\item{start}{Numeric vector indicating the starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.} + +\item{end}{Numeric vector indicating the ending position of the feature in the chromosome or scaffold.} + +\item{name}{String vector defining the name of the BED line. This label is displayed to the left of the BED line in the Genome Browser window when the track is open to full display mode or directly to the left of the item in pack mode. If set as \code{NULL} (default) and the column is required, the names will correspond to the mid-point of the region.} + +\item{score}{A single value or a numeric vector with a score between 0 and 1000. If the track line \code{useScore} attribute is set as \code{TRUE} for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray). By default 0.} + +\item{strand}{A single character or a string vector defining the strand: either "." (=no strand) or "+" or "-". By default ".".} + +\item{thickStart}{A numeric vector indicating the starting position at which the feature is drawn thickly (for example, the start codon in gene displays). When there is no thick part (default value, \code{thickStart = NULL}) it will be used the \code{start} value.} + +\item{thickEnd}{A numeric vector indicating the ending position at which the feature is drawn thickly (for example, the start codon in gene displays). When there is no thick part (default value, \code{thickStart = NULL}) it will be used the \code{end} value.} + +\item{itemRgb}{A single value or a string vector containing the colors for each feature. It can be expressed as an RGB value of the form R,G,B (e.g. "255,0,0") or as any other R-supported color name (it will be converted automatically to RGB version). By default \code{NULL}. If the track line \code{itemRgb.ON} attribute is set as \code{TRUE}, this color value will determine the display color of the data contained in this BED line. NOTE: It is recommended that a simple color scheme (eight colors or less) be used with this attribute to avoid overwhelming the color resources of the Genome Browser and your Internet browser.} + +\item{blockCount}{A single number or a numeric vector indicating the number of blocks (exons) in the BED line. By default \code{NULL}.} + +\item{blockSizes}{A vector containing a comma-separated list of the block sizes. The number of items in this list should correspond to \code{blockCount}. By default \code{NULL}.} + +\item{blockStarts}{A vector containing a comma-separated list of block starts. All of the \code{blockStart} positions should be calculated relative to \code{start}. The number of items in this list should correspond to \code{blockCount}. By default \code{NULL}.} + +\item{track.name}{A string defining the track label that will be displayed to the left of the track in the Genome Browser window, and also the label of the track control at the bottom of the screen. The name can consist of up to 15 characters. It is recommended that the track_label be restricted to alpha-numeric characters and spaces to avoid potential parsing problems. By default \code{NULL}.} + +\item{display.mode}{A string that defines the initial display mode of the annotation track. Values for \code{display.mode} include: "hide", "dense", "full", "pack", "squish". By default \code{NULL}.} + +\item{itemRgb.ON}{Logic value to define whether this attribute should be set to "On", the Genome Browser will use the RGB value shown in the \code{itemRgb} field in each data line of the associated BED track to determine the display color of the data on that line. If the \code{itemRgb} values are not provided, this parameter will be ignored. By default \code{TRUE}.} + +\item{useScore}{Logic value to define if the \code{score} field in each of the track's data lines should be used to determine the level of shading in which the data is displayed. By default \code{FALSE}.} + +\item{colorByStrand}{A vector composed by two strings for two colors, either in RGB comma separated format (eg. "0,250,30") or any R-supported color string (they will be converted automatically to RGB format). The order of color sets is c("strand +", "strand -"). Parameter ignored when \code{itemRgb} is active/provided. By default \code{NULL}.} + +\item{track.base.color}{A single string defining the main color for the annotation track. The track color consists of three comma-separated RGB values from 0-255 (eg. "0,250,30") or any R-supported color string (it will be converted automatically to RGB format). Parameter ignored when \code{itemRgb} or \code{colorByStrand} are active/provided. By default \code{NULL}.} + +\item{sort}{Logic value to define whether to sort the bed using the function \link{sort.bed}. By default \code{TRUE}.} + +\item{bed.file.name}{If a string with a full path to a bed_file is provided, the function will export the bed as a txt file. By default \code{NULL}.} + +\item{export.track.line}{Logic value to define if the track line should be exported. When \code{bed.file.name = NULL} this parameter is ignored. By default \code{TRUE}.} + +\item{return.data.frame}{Logic value to define if the to return the data.frame corresponding to the bed (it will show the columns names). By default \code{FALSE}.} + +\item{force.generation}{Force the generation of bed even when certain errors occur (eg. score > 1000, start > end). By default \code{FALSE}.} +} +\value{ +If required the function can export a bed file with or without the track line, return a data.frame (with column names) corresponding to the bed generated, or both. The bed file could be automatically sorted settin the parameter \code{sort = TRUE}. +} +\description{ +Function that helps the building of a bed file providing the columns. It enables also the specification of the track line for software such as IGV in order to pre-define colors, track name, etc. +} +\references{ +\itemize{ + \item More information about bed format are available at the following link: \url{https://genome.ucsc.edu/FAQ/FAQformat.html#format1}. + \item More information about track line parameters are available at the following link: \url{https://genome.ucsc.edu/goldenPath/help/hgTracksHelp.html#lines}. +} +} diff --git a/man/intersect.bedtools.Rd b/man/intersect.bedtools.Rd index 87710fc..4e1c0dc 100644 --- a/man/intersect.bedtools.Rd +++ b/man/intersect.bedtools.Rd @@ -4,7 +4,7 @@ \alias{intersect.bedtools} \title{Intersect two or more bed files (by \code{bedtools intersect} function).} \usage{ -\method{intersect}{bedtools}( +intersect.bedtools( a, b, outputFileName = paste(getwd(), "intersected.bed", sep = "/"), diff --git a/man/plot.density.profile.Rd b/man/plot.density.profile.Rd index ee25f8d..7da404b 100644 --- a/man/plot.density.profile.Rd +++ b/man/plot.density.profile.Rd @@ -4,7 +4,7 @@ \alias{plot.density.profile} \title{Plot of NGS density signal at specific regions from deepTools matrices.} \usage{ -plot.density.profile( +\method{plot}{density.profile}( matrix.file, plot.by.group = T, missing.data.as.zero = NULL, diff --git a/man/sort.bed.Rd b/man/sort.bed.Rd index b33c24d..e33a75e 100644 --- a/man/sort.bed.Rd +++ b/man/sort.bed.Rd @@ -4,7 +4,7 @@ \alias{sort.bed} \title{Sorter function for .bed files.} \usage{ -sort.bed( +\method{sort}{bed}( bed, bed.header = F, sep = "\\t", diff --git a/man/volcano.Rd b/man/volcano.Rd index f6f0669..19a19e9 100644 --- a/man/volcano.Rd +++ b/man/volcano.Rd @@ -39,7 +39,7 @@ volcano( threshold_line_color = "gray70", threshold_line_type = "dotted", font_family = "Helvetica", - ... + font_size = 12 ) } \arguments{ @@ -110,6 +110,8 @@ volcano( \item{threshold_line_type}{String or numeric value to define the threshold lines type. Both numeric and string standard R codes are accepted. By default \code{"dotted"}, equivalent to \code{2}.} \item{font_family}{String to define the font family to use in the plot writings. By default \code{"Helvetica"}.} + +\item{font_size}{Numeric value to define the font size. By default 12.} } \value{ A plot in ggplot2 format.