Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
katwre committed Aug 13, 2015
2 parents 2a7857d + 53e94b2 commit 7f0cb2c
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 34 deletions.
40 changes: 20 additions & 20 deletions DESCRIPTION
@@ -1,7 +1,7 @@
Package: genomation
Type: Package
Title: Summary, annotation and visualization of genomic data
Version: 1.1.11
Version: 1.1.12
Author: Altuna Akalin [aut, cre], Vedran Franke [aut, cre], Katarzyna Wreczycka [aut], Liz Ing-Simmons [ctb]
Maintainer: Altuna Akalin <aakalin@gmail.com>, Vedran Franke <vedran.franke@gmail.com>
Description: A package for summary and annotation of genomic intervals. Users can visualize and
Expand All @@ -20,30 +20,30 @@ BugReports: https://github.com/BIMSBbioinfo/genomation/issues
Depends:
R (>= 3.0.0),grid
Imports:
GenomicRanges,
data.table,
GenomicRanges,
GenomicAlignments,
IRanges,
Rsamtools,
readr,
data.table,
plyr,
reshape2,
ggplot2,
methods,
rtracklayer,
ggplot2,
gridBase,
impute,
impute,
IRanges,
matrixStats,
methods,
parallel,
plotrix,
matrixStats
plyr,
readr,
reshape2,
Rsamtools,
rtracklayer
Suggests:
RUnit,
knitr,
RColorBrewer,
genomationData,
BiocGenerics,
rmarkdown,
knitrBootstrap
genomationData,
knitr,
knitrBootstrap,
RColorBrewer,
rmarkdown,
RUnit
Collate:
'documentData.R'
'genomation-classes.R'
Expand All @@ -56,4 +56,4 @@ Collate:
'scoreMatrix.R'
'scoreMatrixBin.R'
'scoreMatrixList.R'
'test_genomation_package.R'
'test_genomation_package.R'
7 changes: 7 additions & 0 deletions NEWS
@@ -1,3 +1,10 @@
genomation 1.1.12
--------------

IMPROVEMENTS AND BUG FIXES

* gffToGRanges parses column 9 of the gff file correctly; added ensembl=TRUE to prepend chr to seqlevels

genomation 1.1.11
--------------

Expand Down
31 changes: 20 additions & 11 deletions R/readData.R
Expand Up @@ -466,12 +466,15 @@ setMethod("readTranscriptFeatures",
#' The file can end in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip}
#' and/or start with \code{http://} or \code{ftp://}. If the file is not compressed
#' it can also start with \code{https://} or \code{ftps://}.
#' @param track.line the number of track lines to skip, "auto" to detect them automatically
#' or FALSE(default) if the bed file doesn't have track lines
#' @param track.line Can be an integer specifying the number of track lines to skip,
#' "auto" to detect the header lines automatically
#' or FALSE(default) if the bed file doesn't have track lines.
#' "auto" detects both UCSC header lines and lines starting with #
#' @param split.group boolean, whether to split the 9th column of the file
#' @param split.char character that is used as a separator of the 9th column. ';' by default
#' @param filter a character designating which elements to retain from the gff file (e.g. exon, CDS, ...)
#' @param zero.based \code{boolean} whether the coordinates are 0 or 1 based. 0 is the default
#' @param ensembl \code{boolean} if TRUE, add the chr prefix to seqlevels. FALSE by default
#' @return returns a \code{GenomicRanges} object
#'
#' @examples
Expand All @@ -482,7 +485,7 @@ setMethod("readTranscriptFeatures",
#' @docType methods
#' @export
gffToGRanges = function(gff.file, track.line=FALSE, split.group=FALSE, split.char=';',filter=NULL,
zero.based=FALSE){
zero.based=FALSE, ensembl=FALSE){

gff = readGeneric(gff.file,
chr=1,
Expand All @@ -500,15 +503,18 @@ gffToGRanges = function(gff.file, track.line=FALSE, split.group=FALSE, split.cha
if(split.group){
message('splitting the group.column...')
group = strsplit(gff$group, '\\s+')
gnames = group[[1]][seq(1,length(group[[1]]),2)]
gids = seq(2,length(group[[1]]),2)
group = data.frame(do.call(rbind,
lapply(group,
function(x)gsub(split.char,'',x[gids]))),
stringsAsFactors=FALSE)
colnames(group) = gnames
group = lapply(group, function(x){
vals = x[seq(2,length(x),2)]
vals = sub(split.char, '', vals)
vals = sub('^"', '', vals)
vals = sub('"$', '', vals)
d = data.table(t(vals))
data.table::setnames(d, x[seq(1,length(x),2)])
d
})
group = data.table::rbindlist(group, fill=TRUE)
gff$group = NULL
values(gff) = cbind(values(gff), group)
values(gff) = cbind(values(gff), as.data.frame(group))
}

if(!is.null(filter)){
Expand All @@ -520,5 +526,8 @@ gffToGRanges = function(gff.file, track.line=FALSE, split.group=FALSE, split.cha
}
}

if(ensembl)
seqlevels(gff) = paste('chr',seqlevels(gff),sep='')

return(gff)
}
8 changes: 8 additions & 0 deletions inst/unitTests/test.gtf
@@ -0,0 +1,8 @@
#!genome-build GRCh37.p13
#!genome-version GRCh37
#!genome-date 2009-02
#!genome-build-accession NCBI:GCA_000001405.14
#!genebuild-last-updated 2013-09
1 pseudogene gene 11869 14412 . + . gene_id "ENSG00000223972"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene";
1 processed_transcript transcript 11869 14409 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana";
1 processed_transcript exon 11869 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "1"; gene_name "DDX11L1"; gene_source "ensembl_havana"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; exon_id "ENSE00002234944";
14 changes: 14 additions & 0 deletions inst/unitTests/test_readData.R
Expand Up @@ -118,4 +118,18 @@ test_readGeneric = function()
checkIdentical(g8, r10)
}

test_gffToGRanges = function()
{
library(GenomicRanges)
tab.test = system.file('unitTests/test.gtf', package='genomation')
gff1 = gffToGRanges(tab.test, track.line='auto')
checkIdentical(length(gff1), 3L)
checkIdentical(ncol(values(gff1)), 5L)

gff2 = gffToGRanges(tab.test, track.line='auto', split.group=TRUE)
checkIdentical(ncol(values(gff2)), 13L)

gff3 = gffToGRanges(tab.test, track.line='auto',ensembl=TRUE)
checkIdentical(as.character(seqlevels(gff3)), 'chr1')
}

10 changes: 7 additions & 3 deletions man/gffToGRanges.Rd
Expand Up @@ -7,16 +7,18 @@
The GenomicRanges object needs to be properly formated for the function to work.}
\usage{
gffToGRanges(gff.file, track.line = FALSE, split.group = FALSE,
split.char = ";", filter = NULL, zero.based = FALSE)
split.char = ";", filter = NULL, zero.based = FALSE, ensembl = FALSE)
}
\arguments{
\item{gff.file}{path to a gff formatted file.
The file can end in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip}
and/or start with \code{http://} or \code{ftp://}. If the file is not compressed
it can also start with \code{https://} or \code{ftps://}.}

\item{track.line}{the number of track lines to skip, "auto" to detect them automatically
or FALSE(default) if the bed file doesn't have track lines}
\item{track.line}{Can be an integer specifying the number of track lines to skip,
"auto" to detect the header lines automatically
or FALSE(default) if the bed file doesn't have track lines.
"auto" detects both UCSC header lines and lines starting with #}
\item{split.group}{boolean, whether to split the 9th column of the file}
Expand All @@ -25,6 +27,8 @@ or FALSE(default) if the bed file doesn't have track lines}
\item{filter}{a character designating which elements to retain from the gff file (e.g. exon, CDS, ...)}
\item{zero.based}{\code{boolean} whether the coordinates are 0 or 1 based. 0 is the default}
\item{ensembl}{\code{boolean} if TRUE, add the chr prefix to seqlevels. FALSE by default}
}
\value{
returns a \code{GenomicRanges} object
Expand Down

0 comments on commit 7f0cb2c

Please sign in to comment.