diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 000000000..7167553f5 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,29 @@ +codecov: + branch: master + +coverage: + precision: 3 + round: nearest + range: "50...100" + + status: + project: + default: + target: auto + threshold: .01 + branches: null + + patch: + default: + target: auto + branches: null + + changes: + default: + branches: null + + +comment: + layout: "header, diff, changes, sunburst, uncovered, tree" + branches: null + behavior: default diff --git a/.travis.yml b/.travis.yml index 1e9259942..a1c1b2bcf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,11 +10,12 @@ cache: - $HOME/.m2 jdk: - oraclejdk8 -script: ./gradlew jacocoTestReport testSRA; + - openjdk8 +script: ./gradlew test jacocoTestReport; after_success: + - bash <(curl -s https://codecov.io/bash) - echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'"; echo "JAVA_HOME='$JAVA_HOME'"; - ./gradlew coveralls; if [ "$TRAVIS_BRANCH" == "master" ]; then ./gradlew uploadArchives; fi diff --git a/README.md b/README.md index 0e468d334..afe901e05 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ -[![Coverage Status](https://coveralls.io/repos/github/samtools/htsjdk/badge.svg?branch=master)](https://coveralls.io/github/samtools/htsjdk?branch=master) +[![Coverage Status](https://codecov.io/gh/samtools/htsjdk/branch/master/graph/badge.svg)](https://codecov.io/gh/samtools/htsjdk) [![Build Status](https://travis-ci.org/samtools/htsjdk.svg?branch=master)](https://travis-ci.org/samtools/htsjdk) [![Maven Central](https://maven-badges.herokuapp.com/maven-central/com.github.samtools/htsjdk/badge.svg)](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.github.samtools%22%20AND%20a%3A%22htsjdk%22) [![License](http://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/samtools/htsjdk) [![Language](http://img.shields.io/badge/language-java-brightgreen.svg)](https://www.java.com/) +[![Join the chat at https://gitter.im/samtools/htsjdk](https://badges.gitter.im/samtools/htsjdk.svg)](https://gitter.im/samtools/htsjdk?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) Status of downstream projects automatically built on top of the current htsjdk master branch. See [gatk-jenkins](https://gatk-jenkins.broadinstitute.org/view/HTSJDK%20Release%20Tests/) for detailed logs. Failure may indicate problems in htsjdk, but may also be due to expected incompatibilities between versions, or unrelated failures in downstream projects. - [Picard](https://github.com/broadinstitute/picard): [![Build Status](https://gatk-jenkins.broadinstitute.org/buildStatus/icon?job=picard-on-htsjdk-master)](https://gatk-jenkins.broadinstitute.org/job/picard-on-htsjdk-master/) @@ -15,11 +16,19 @@ common file formats, such as [SAM][1] and [VCF][2], used for high-throughput sequencing data. There are also an number of useful utilities for manipulating HTS data. -Please see the [HTSJDK Documentation](http://samtools.github.io/htsjdk) for more information. - > **NOTE: _HTSJDK does not currently support the latest Variant Call Format Specification (VCFv4.3 and BCFv2.2)._** -#### Building HTSJDK +### Documentation & Getting Help + +API documentation for all versions of HTSJDK since `1.128` are available through [javadoc.io](http://www.javadoc.io/doc/com.github.samtools/htsjdk). + +If you believe you have found a bug or have an issue with the library please a) search the open and recently closed issues to ensure it has not already been reported, then b) log an issue. + +The project has a [gitter chat room](https://gitter.im/samtools/htsjdk) if you would like to chat with the developers and others involved in the project. + +To receive announcements of releases and other significant project news please subscribe to the [htsjdk-announce](https://groups.google.com/forum/#!forum/htsjdk-announce) google group. + +### Building HTSJDK HTSJDK is now built using [gradle](http://gradle.org/). @@ -73,7 +82,7 @@ Example gradle usage from the htsjdk root directory: ./gradlew tasks ``` -#### Create an HTSJDK project in IntelliJ +### Create an HTSJDK project in IntelliJ To create a project in IntelliJ IDE for htsjdk do the following: 1. Select fom the menu: `File -> New -> Project from Existing Sources` @@ -82,13 +91,17 @@ To create a project in IntelliJ IDE for htsjdk do the following: From time to time if dependencies change in htsjdk you may need to refresh the project from the `View -> Gradle` menu. -#### Licensing Information +### Licensing Information -Not all sub-packages of htsjdk are subject to the same license, so a license notice is included in each source file or sub-package as appropriate. Please check the relevant license notice whenever you start working with a part of htsjdk that you have not previously worked with to avoid any surprises. +Not all sub-packages of htsjdk are subject to the same license, so a license notice is included in each source file or sub-package as appropriate. +Please check the relevant license notice whenever you start working with a part of htsjdk that you have not previously worked with to avoid any surprises. +Broadly speaking the majority of the code is covered under the MIT license with the following notable exceptions: -#### Java Minimum Version Support Policy +* Much of the CRAM code is under the Apache License, Version 2 +* Core `tribble` code (underlying VCF reading/writing amongst other things) is under LGPL +* Code supporting the reading/writing of SRA format is uncopyrighted & public domain -> **NOTE: _Effective November 24th 2015, HTSJDK has ended support of Java 7 and previous versions. Java 8 is now required_.** +### Java Minimum Version Support Policy We will support all Java SE versions supported by Oracle until at least six months after Oracle's Public Updates period has ended ([see this link](http://www.oracle.com/technetwork/java/eol-135779.html)). @@ -96,9 +109,8 @@ Java SE Major Release | End of Java SE Oracle Public Updates | Proposed End of S ---- | ---- | ---- | ---- 6 | Feb 2013 | Aug 2013 | Oct 2015 7 | Apr 2015 | Oct 2015 | Oct 2015 -8* | Mar 2017 | Sep 2017 | Sep 2017 +8 | Jul 2018 | Jul 2018 | TBD -* to be finalized HTSJDK is migrating to semantic versioning (http://semver.org/). We will eventually adhere to it strictly and bump our major version whenever there are breaking changes to our API, but until we more clearly define what constitutes our official API, clients should assume that every release potentially contains at least minor changes to public methods. diff --git a/build.gradle b/build.gradle index 9e8f35154..96811bbeb 100644 --- a/build.gradle +++ b/build.gradle @@ -5,13 +5,14 @@ buildscript { } plugins { - id "java" + id 'java' + id 'scala' id 'maven' id 'signing' id 'jacoco' id 'com.palantir.git-version' version '0.5.1' id 'com.github.johnrengelman.shadow' version '1.2.3' - id "com.github.kt3k.coveralls" version "2.6.3" + id 'com.github.maiflai.scalatest' version '0.15' } repositories { @@ -19,21 +20,16 @@ repositories { } jacocoTestReport { - dependsOn test group = "Reporting" description = "Generate Jacoco coverage reports after running tests." additionalSourceDirs = files(sourceSets.main.allJava.srcDirs) reports { - xml.enabled = true // coveralls plugin depends on xml format report + xml.enabled = true // codecov depends on xml format report html.enabled = true } } -jacoco { - toolVersion = "0.7.5.201505241946" -} - dependencies { compile "org.apache.commons:commons-jexl:2.1.1" compile "commons-logging:commons-logging:1.1.1" @@ -42,7 +38,11 @@ dependencies { compile "org.tukaani:xz:1.5" compile "gov.nih.nlm.ncbi:ngs-java:1.2.4" + testCompile "org.scala-lang:scala-library:2.12.1" + testCompile "org.scalatest:scalatest_2.12:3.0.1" + testRuntime 'org.pegdown:pegdown:1.4.2' // Necessary for generating HTML reports with ScalaTest testCompile "org.testng:testng:6.9.9" + testCompile "com.google.jimfs:jimfs:1.1" } sourceCompatibility = 1.8 @@ -67,76 +67,57 @@ jar { import org.gradle.internal.os.OperatingSystem; -tasks.withType(Test) { - outputs.upToDateWhen { false } // tests will always rerun - useTestNG() +tasks.withType(Test) { task -> + task.outputs.upToDateWhen { false } // tests will always rerun - // set heap size for the test JVM(s) - minHeapSize = "1G" - maxHeapSize = "2G" + // Always run serially because there are some very badly behaved tests in HTSJDK that + // will cause errors and even deadlocks if run multi-threaded + task.maxParallelForks = 1 - jvmArgs '-Djava.awt.headless=true' //this prevents awt from displaying a java icon while the tests are running + // set heap size for the test JVM(s) + task.minHeapSize = "1G" + task.maxHeapSize = "2G" - if (System.env.CI == "true") { //if running under a CI output less into the logs - int count = 0 + task.jvmArgs '-Djava.awt.headless=true' //this prevents awt from displaying a java icon while the tests are running +} - beforeTest { descriptor -> - count++ - if( count % 100 == 0) { - logger.lifecycle("Finished "+ Integer.toString(count++) + " tests") - } - } - } else { - // show standard out and standard error of the test JVM(s) on the console - testLogging.showStandardStreams = true - beforeTest { descriptor -> - logger.lifecycle("Running Test: " + descriptor) - } +task findScalaAndJavaTypes(type: Exec) { + description = "Check that Scala files only exist in the scala test dir and that java files do not reside in the scala test dir." + commandLine './scripts/checkScalaAndJavaFiles.sh' +} - // listen to standard out and standard error of the test JVM(s) - onOutput { descriptor, event -> - logger.lifecycle("Test: " + descriptor + " produced standard out/err: " + event.message ) - } - } +test { + description = "Runs the unit tests other than the SRA tests" testLogging { - testLogging { - events "skipped", "failed" - exceptionFormat = "full" - } - afterSuite { desc, result -> - if (!desc.parent) { // will match the outermost suite - println "Results: ${result.resultType} (${result.testCount} tests, ${result.successfulTestCount} successes, ${result.failedTestCount} failures, ${result.skippedTestCount} skipped)" - } - } + events "failed", "skipped" } -} -test { - description = "Runs the unit tests other than the SRA tests" + if (System.env.CI == "true") { + jvmArgs += '-Dsamjdk.sra_libraries_download=true' + } - useTestNG { - if( OperatingSystem.current().isUnix() ){ - excludeGroups "slow", "broken", "sra" - } else { - excludeGroups "slow", "broken", "unix", "sra" - } + tags { + exclude "slow" + exclude "broken" + if (System.env.CI == "false") exclude "sra" + if (!OperatingSystem.current().isUnix()) exclude "unix" } -} +} dependsOn findScalaAndJavaTypes task testSRA(type: Test) { - jvmArgs '-Dsamjdk.sra_libraries_download=true' + description = "Run the SRA tests" + jvmArgs += '-Dsamjdk.sra_libraries_download=true' - description "Run the SRA tests" - useTestNG { - configFailurePolicy 'continue' - includeGroups "sra" + tags { + exclude "slow" + exclude "broken" } } task wrapper(type: Wrapper) { description = "Regenerate the gradle wrapper" - gradleVersion = '2.13' + gradleVersion = '3.2.1' } // This is a hack to disable the java 8 default javadoc lint until we fix the html formatting @@ -188,7 +169,7 @@ uploadArchives { authentication(userName: project.findProperty("sonatypeUsername"), password: project.findProperty("sonatypePassword")) } - snapshotRepository(url: "https://artifactory.broadinstitute.org/artifactory/libs-snapshot-local/") { + snapshotRepository(url: "https://broadinstitute.jfrog.io/broadinstitute/libs-snapshot-local/") { authentication(userName: System.env.ARTIFACTORY_USERNAME, password: System.env.ARTIFACTORY_PASSWORD) } diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index ca78035ef..6ffa23784 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index aad2b2428..f08cd01bf 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ -#Fri May 13 14:00:35 EDT 2016 +#Fri Jan 20 17:10:11 EST 2017 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-2.13-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-all.zip diff --git a/gradlew b/gradlew index 27309d923..9aa616c27 100755 --- a/gradlew +++ b/gradlew @@ -161,4 +161,9 @@ function splitJvmOpts() { eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" +# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong +if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then + cd "$(dirname "$0")" +fi + exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" diff --git a/scripts/checkScalaAndJavaFiles.sh b/scripts/checkScalaAndJavaFiles.sh new file mode 100755 index 000000000..adadfd31c --- /dev/null +++ b/scripts/checkScalaAndJavaFiles.sh @@ -0,0 +1,17 @@ +#/bin/bash + +# Check that Scala files only exist in the scala test dir and +# that java files do not reside in the scala test dir + +if `find src | grep -v '^src/test/scala' | grep -q '\.scala$' ` ; then + echo 'Found scala file(s) outside of scala test directory'; + find src | grep -v '^src/test/scala' | grep '\.scala$' + exit 1; +fi + +if `find src/test/scala | grep -q '\.java$' ` ; then + echo 'Found java file(s) in scala test directory'; + find src/test/scala | grep '\.java$' + exit 1; +fi + diff --git a/scripts/release_picard.sh b/scripts/release_picard.sh deleted file mode 100755 index 732234ab1..000000000 --- a/scripts/release_picard.sh +++ /dev/null @@ -1,152 +0,0 @@ -#! /bin/bash - -# The MIT License -# -# Copyright (c) $today.year The Broad Institute -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - -PROGNAME=`basename $0` -USERNAME=alecw - -function usage () { - echo "USAGE: $PROGNAME " >&2 - echo "Tags Github Picard source, checks out and builds sources, uploads build results to Sourceforge.">&2 - echo "-t Build in . Default: $TMPDIR." >&2 - echo "-u Sourceforge username. Default: $USERNAME." >&2 -} - -function tag_exists() { - git tag | grep -q "$1$" - if test $? = 0 - then return 0 - else return 1 - fi -} - -function remote_does_not_exist() { - git ls-remote $1 2>/dev/null 1>/dev/null - if test $? = 0 - then return 1 - else return 0 - fi -} - -function remote_tag_does_not_exist() { - git ls-remote --tags $2 | grep -q "$1$"; - if test $? = 0 - then return 0 - else return 1 - fi -} - -set -e - -while getopts "ht:u:" options; do - case $options in - u ) USERNAME=$OPTARG;; - t ) TMPDIR=$OPTARG;; - h ) usage;; - \? ) usage - exit 1;; - * ) usage - exit 1;; - - esac -done -shift $(($OPTIND - 1)) - -if (( $# != 1 )) - then echo "ERROR: Incorrect number of arguments." >&2 - usage - exit 1 -fi - -if [[ x"$EDITOR" == x ]] -then echo "EDITOR environment variable must be set." >&2 - exit 1 -fi - -# Require actual Java 1.6. This is not necessary for compiling, because can run 1.7 with -target 1.6, -# but this is necessary in order to force unit tests to run with 1.6. -(echo $JAVA_HOME | fgrep -q 1.6 ) || { echo "JAVA_HOME $JAVA_HOME is not 1.6" ; exit 1; } -java_version=`java -version 2>&1 | fgrep -i version` -(echo $java_version | fgrep -q 1.6. ) || { echo "java -version: $java_version is not 1.6"; exit 1; } - -GITROOT=git@github.com:samtools/htsjdk.git -REMOTE=origin - -RELEASE_ID=$1 - -# Since releases are lexically sorted, need to filter in order to have 1.1xx be at the bottom. -PREV_RELEASE_ID=`git ls-remote --tags | grep -v "{}$" | awk '{print $2}' | sed -e "s_.*/__g" | egrep '[.]\d\d\d' | tail -1` - -if [[ -e $TMPDIR/htsjdk ]] -then echo "$TMPDIR/htsjdk already exists. Please remove or specify a different TMPDIR." >&2 - exit 1 -fi -cd $TMPDIR - -# clone -git clone $GITROOT htsjdk -cd htsjdk -ant clean # Shouldn't be necessary, but no harm - -# tag must not exist -if tag_exists $RELEASE_ID -then echo "ERROR: Tag $RELEASE_ID locally already exists" - exit 1 -fi - -# remote must exist -if remote_does_not_exist $REMOTE -then echo "ERROR: Remote $REMOTE does not exist" - exit 1 -fi - -# tag at remote must not exist -if remote_tag_does_not_exist $RELEASE_ID $REMOTE -then echo "ERROR: Tag $RELEASE_ID at remote $REMOTE already exists" - exit 1 -fi - -# tag the branch locally then push to remote -echo Tagging master as $tag and pushing the tag to $remote -# NB: we could use annotated tags in the future to store release notes, etc. -git tag $tag -git push $remote $tag # TODO: should we check this return value in case someone made a tag since we last checked? - -ant -lib lib/ant test - -ant -lib lib/ant clean all javadoc - -mkdir -p deploy/picard-tools/$RELEASE_ID - -mkdir -p deploy/htsjdk/$RELEASE_ID -cp dist/htsjdk-$RELEASE_ID.jar deploy/htsjdk/$RELEASE_ID/ - -# Make all files to be pushed to Sourceforge writable by group so that another Picard admin can overwrite them. - -chmod -R gu+rw javadoc deploy dist - -find javadoc deploy dist -type d -exec chmod g+s '{}' ';' - -scp -p -r javadoc $USERNAME,picard@web.sourceforge.net:htdocs - -cd deploy -scp -p -r htsjdk/$RELEASE_ID $USERNAME,picard@web.sourceforge.net:/home/frs/project/p/pi/picard/htsjdk/ diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 000000000..95584da4a --- /dev/null +++ b/settings.gradle @@ -0,0 +1 @@ +rootProject.name = "htsjdk" diff --git a/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java b/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java index 6bf28ef29..724e73c62 100644 --- a/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java +++ b/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java @@ -88,6 +88,7 @@ protected AbstractBAMFileIndex(final File file, final SAMSequenceDictionary dict /** * Close this index and release any associated resources. */ + @Override public void close() { mIndexBuffer.close(); } @@ -170,6 +171,7 @@ public int getNumberOfReferences() { * @return The file offset of the first record in the last linear bin, or -1 * if there are no elements in linear bins (i.e. no mapped reads). */ + @Override public long getStartOfLastLinearBin() { seek(4); @@ -206,6 +208,7 @@ public long getStartOfLastLinearBin() { * @param reference the reference of interest * @return meta data for the reference */ + @Override public BAMIndexMetaData getMetaData(final int reference) { seek(4); diff --git a/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java b/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java index 769a7a735..0c3d48420 100644 --- a/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java +++ b/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java @@ -23,13 +23,12 @@ */ package htsjdk.samtools; +import javax.xml.bind.annotation.XmlTransient; import java.io.Serializable; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; -import javax.xml.bind.annotation.XmlTransient; - /** * Base class for the various concrete records in a SAM header, providing uniform * access to the attributes. @@ -60,8 +59,6 @@ public void setAttribute(final String key, final Object value) { /** * Set the given value for the attribute named 'key'. Replaces an existing value, if any. * If value is null, the attribute is removed. - * Supported types are Character, Integer, Float and String. Byte and Short may also be - * passed in but they will be converted to Integer. * @param key attribute name * @param value attribute value */ @@ -72,6 +69,7 @@ public void setAttribute(final String key, final String value) { mAttributes.put(key, value); } } + /** * Returns the Set of attributes. */ @@ -113,4 +111,10 @@ protected int attributesHashCode() { @Override public String toString() { return getClass().getSimpleName() + this.mAttributes.toString(); } + + /** + * Returns the record in the SAM line-based text format. Fields are + * separated by '\t' characters. The String is NOT terminated by '\n'. + */ + abstract public String getSAMString(); } diff --git a/src/main/java/htsjdk/samtools/AsyncSAMFileWriter.java b/src/main/java/htsjdk/samtools/AsyncSAMFileWriter.java index ab5b8d0b1..1a860f29b 100644 --- a/src/main/java/htsjdk/samtools/AsyncSAMFileWriter.java +++ b/src/main/java/htsjdk/samtools/AsyncSAMFileWriter.java @@ -48,11 +48,13 @@ public void setProgressLogger(final ProgressLoggerInterface progress) { * Adds an alignment to the queue to be written. Will re-throw any exception that was received when * writing prior record(s) to the underlying SAMFileWriter. */ + @Override public void addAlignment(final SAMRecord alignment) { write(alignment); } /** Returns the SAMFileHeader from the underlying SAMFileWriter. */ + @Override public SAMFileHeader getFileHeader() { return this.underlyingWriter.getFileHeader(); } diff --git a/src/main/java/htsjdk/samtools/BAMFileReader.java b/src/main/java/htsjdk/samtools/BAMFileReader.java index 98bb74f63..1d9110ad6 100644 --- a/src/main/java/htsjdk/samtools/BAMFileReader.java +++ b/src/main/java/htsjdk/samtools/BAMFileReader.java @@ -25,12 +25,8 @@ import htsjdk.samtools.seekablestream.SeekableStream; -import htsjdk.samtools.util.BinaryCodec; -import htsjdk.samtools.util.BlockCompressedInputStream; -import htsjdk.samtools.util.CloseableIterator; -import htsjdk.samtools.util.CoordMath; -import htsjdk.samtools.util.RuntimeIOException; -import htsjdk.samtools.util.StringLineReader; +import htsjdk.samtools.util.*; +import htsjdk.samtools.util.zip.InflaterFactory; import java.io.DataInputStream; import java.io.File; @@ -44,7 +40,7 @@ /** * Class for reading and querying BAM files. */ -class BAMFileReader extends SamReader.ReaderImplementation { +public class BAMFileReader extends SamReader.ReaderImplementation { // True if reading from a File rather than an InputStream private boolean mIsSeekable = false; @@ -67,10 +63,6 @@ // If true, all SAMRecords are fully decoded as they are read. private boolean eagerDecode; - // If true, the BAMFileReader will use asynchronous IO. - // Note: this field currently has no effect (is not hooked up anywhere), but will be in the future. See https://github.com/samtools/htsjdk/pull/576 - private final boolean useAsynchronousIO; - // For error-checking. private ValidationStringency mValidationStringency; @@ -95,41 +87,94 @@ /** * Prepare to read BAM from a stream (not seekable) * @param stream source of bytes. + * @param indexFile BAM index file * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException */ BAMFileReader(final InputStream stream, final File indexFile, final boolean eagerDecode, final boolean useAsynchronousIO, final ValidationStringency validationStringency, - final SAMRecordFactory factory) - throws IOException { + final SAMRecordFactory samRecordFactory) + throws IOException { + this(stream, indexFile, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, + BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Prepare to read BAM from a stream (not seekable) + * @param stream source of bytes. + * @param indexFile BAM index file + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream + * @throws IOException + */ + BAMFileReader(final InputStream stream, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) + throws IOException { mIndexFile = indexFile; mIsSeekable = false; - this.useAsynchronousIO = useAsynchronousIO; - mCompressedInputStream = new BlockCompressedInputStream(stream); + mCompressedInputStream = useAsynchronousIO ? new AsyncBlockCompressedInputStream(stream, inflaterFactory) : new BlockCompressedInputStream(stream, inflaterFactory); mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); this.eagerDecode = eagerDecode; this.mValidationStringency = validationStringency; - this.samRecordFactory = factory; + this.samRecordFactory = samRecordFactory; this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, null); } /** * Prepare to read BAM from a file (seekable) * @param file source of bytes. + * @param indexFile BAM index file + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException + */ + BAMFileReader(final File file, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory) + throws IOException { + this(file, indexFile, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Prepare to read BAM from a file (seekable) + * @param file source of bytes. + * @param indexFile BAM index file * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream + * @throws IOException */ BAMFileReader(final File file, final File indexFile, final boolean eagerDecode, final boolean useAsynchronousIO, final ValidationStringency validationStringency, - final SAMRecordFactory factory) + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) throws IOException { - this(new BlockCompressedInputStream(file), indexFile!=null ? indexFile : SamFiles.findIndex(file), eagerDecode, useAsynchronousIO, file.getAbsolutePath(), validationStringency, factory); + this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(file, inflaterFactory) : new BlockCompressedInputStream(file, inflaterFactory), + indexFile!=null ? indexFile : SamFiles.findIndex(file), eagerDecode, useAsynchronousIO, file.getAbsolutePath(), validationStringency, samRecordFactory); if (mIndexFile != null && mIndexFile.lastModified() < file.lastModified()) { System.err.println("WARNING: BAM index file " + mIndexFile.getAbsolutePath() + " is older than BAM " + file.getAbsolutePath()); @@ -138,62 +183,148 @@ mStream.setInputFileName(file.getAbsolutePath()); } + /** + * Prepare to read BAM from a stream (seekable) + * @param strm source of bytes + * @param indexFile BAM index file + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException + */ + BAMFileReader(final SeekableStream strm, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory) + throws IOException { + this(strm, indexFile, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Prepare to read BAM from a stream (seekable) + * @param strm source of bytes + * @param indexFile BAM index file + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream + * @throws IOException + */ BAMFileReader(final SeekableStream strm, final File indexFile, final boolean eagerDecode, final boolean useAsynchronousIO, final ValidationStringency validationStringency, - final SAMRecordFactory factory) + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) throws IOException { - this(new BlockCompressedInputStream(strm), indexFile, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, factory); + this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(strm, inflaterFactory) : new BlockCompressedInputStream(strm, inflaterFactory), + indexFile, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, samRecordFactory); } + /** + * Prepare to read BAM from a stream (seekable) + * @param strm source of bytes + * @param indexStream BAM index stream + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException + */ BAMFileReader(final SeekableStream strm, final SeekableStream indexStream, final boolean eagerDecode, final boolean useAsynchronousIO, final ValidationStringency validationStringency, - final SAMRecordFactory factory) + final SAMRecordFactory samRecordFactory) throws IOException { - this(new BlockCompressedInputStream(strm), indexStream, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, factory); + this(strm, indexStream, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, BlockGunzipper.getDefaultInflaterFactory()); } + /** + * Prepare to read BAM from a stream (seekable) + * @param strm source of bytes + * @param indexStream BAM index stream + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream + * @throws IOException + */ + BAMFileReader(final SeekableStream strm, + final SeekableStream indexStream, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) + throws IOException { + this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(strm, inflaterFactory) : new BlockCompressedInputStream(strm, inflaterFactory), + indexStream, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, samRecordFactory); + } + + /** + * Prepare to read BAM from a compressed stream (seekable) + * @param compressedInputStream source of bytes + * @param indexFile BAM index file + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param source string used when reporting errors + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException + */ private BAMFileReader(final BlockCompressedInputStream compressedInputStream, final File indexFile, final boolean eagerDecode, final boolean useAsynchronousIO, final String source, final ValidationStringency validationStringency, - final SAMRecordFactory factory) + final SAMRecordFactory samRecordFactory) throws IOException { mIndexFile = indexFile; mIsSeekable = true; mCompressedInputStream = compressedInputStream; mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); this.eagerDecode = eagerDecode; - this.useAsynchronousIO = useAsynchronousIO; this.mValidationStringency = validationStringency; - this.samRecordFactory = factory; + this.samRecordFactory = samRecordFactory; this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, source); mFirstRecordPointer = mCompressedInputStream.getFilePointer(); - } + } + /** + * Prepare to read BAM from a compressed stream (seekable) + * @param compressedInputStream source of bytes + * @param indexStream BAM index stream + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param source string used when reporting errors + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException + */ private BAMFileReader(final BlockCompressedInputStream compressedInputStream, final SeekableStream indexStream, final boolean eagerDecode, final boolean useAsynchronousIO, final String source, final ValidationStringency validationStringency, - final SAMRecordFactory factory) + final SAMRecordFactory samRecordFactory) throws IOException { mIndexStream = indexStream; mIsSeekable = true; mCompressedInputStream = compressedInputStream; mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); this.eagerDecode = eagerDecode; - this.useAsynchronousIO = useAsynchronousIO; this.mValidationStringency = validationStringency; - this.samRecordFactory = factory; + this.samRecordFactory = samRecordFactory; this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, source); mFirstRecordPointer = mCompressedInputStream.getFilePointer(); } @@ -210,6 +341,7 @@ static long findVirtualOffsetOfFirstRecord(final File bam) throws IOException { * If true, writes the source of every read into the source SAMRecords. * @param enabled true to write source information into each SAMRecord. */ + @Override void enableFileSource(final SamReader reader, final boolean enabled) { this.mReader = enabled ? reader : null; } @@ -218,6 +350,7 @@ void enableFileSource(final SamReader reader, final boolean enabled) { * If true, uses the caching version of the index reader. * @param enabled true to use the caching version of the reader. */ + @Override protected void enableIndexCaching(final boolean enabled) { if(mIndex != null) throw new SAMException("Unable to turn on index caching; index file has already been loaded."); @@ -229,6 +362,7 @@ protected void enableIndexCaching(final boolean enabled) { * This is slower but more scalable when accessing large numbers of BAM files sequentially. * @param enabled True to use memory mapping, false to use regular I/O. */ + @Override protected void enableIndexMemoryMapping(final boolean enabled) { if (mIndex != null) { throw new SAMException("Unable to change index memory mapping; index file has already been loaded."); @@ -240,7 +374,7 @@ protected void enableIndexMemoryMapping(final boolean enabled) { this.mCompressedInputStream.setCheckCrcs(enabled); } - @Override void setSAMRecordFactory(final SAMRecordFactory factory) { this.samRecordFactory = factory; } + @Override void setSAMRecordFactory(final SAMRecordFactory samRecordFactory) { this.samRecordFactory = samRecordFactory; } @Override public SamReader.Type type() { @@ -250,6 +384,7 @@ protected void enableIndexMemoryMapping(final boolean enabled) { /** * @return true if ths is a BAM file, and has an index */ + @Override public boolean hasIndex() { return mIsSeekable && ((mIndexFile != null) || (mIndexStream != null)); } @@ -258,6 +393,7 @@ public boolean hasIndex() { * Retrieves the index for the given file type. Ensure that the index is of the specified type. * @return An index of the given type. */ + @Override public BAMIndex getIndex() { if(!hasIndex()) throw new SAMException("No index is available for this BAM file."); @@ -294,6 +430,7 @@ public void close() { mIndex = null; } + @Override public SAMFileHeader getFileHeader() { return mFileHeader; } @@ -301,10 +438,12 @@ public SAMFileHeader getFileHeader() { /** * Set error-checking level for subsequent SAMRecord reads. */ + @Override void setValidationStringency(final ValidationStringency validationStringency) { this.mValidationStringency = validationStringency; } + @Override public ValidationStringency getValidationStringency() { return this.mValidationStringency; } @@ -317,6 +456,7 @@ public ValidationStringency getValidationStringency() { * getIterator() begins its iteration where the last one left off. That is the best that can be * done in that situation. */ + @Override public CloseableIterator getIterator() { if (mStream == null) { throw new IllegalStateException("File reader is closed"); @@ -421,6 +561,7 @@ public SAMFileSpan getFilePointerSpanningReads() { * @return Iterator for the matching SAMRecords * @see QueryInterval#optimizeIntervals(QueryInterval[]) */ + @Override public CloseableIterator query(final QueryInterval[] intervals, final boolean contained) { if (mStream == null) { throw new IllegalStateException("File reader is closed"); @@ -451,6 +592,7 @@ public SAMFileSpan getFilePointerSpanningReads() { * @param start Alignment start sought. * @return Iterator for the matching SAMRecords. */ + @Override public CloseableIterator queryAlignmentStart(final String sequence, final int start) { if (mStream == null) { throw new IllegalStateException("File reader is closed"); @@ -477,6 +619,7 @@ public SAMFileSpan getFilePointerSpanningReads() { * * @return Iterator for the matching SAMRecords. */ + @Override public CloseableIterator queryUnmapped() { if (mStream == null) { throw new IllegalStateException("File reader is closed"); @@ -579,6 +722,7 @@ private static SAMSequenceRecord readSequenceRecord(final BinaryCodec stream, fi private boolean isClosed = false; + @Override public void close() { if (!isClosed) { if (mCurrentIterator != null && this != mCurrentIterator) { @@ -593,6 +737,7 @@ protected void assertOpen() { if (isClosed) throw new AssertionError("Iterator has been closed"); } + @Override public void remove() { throw new UnsupportedOperationException("Not supported: remove"); } @@ -639,11 +784,13 @@ public SAMRecord next() { } } + @Override public boolean hasNext() { assertOpen(); return (mNextRecord != null); } + @Override public SAMRecord next() { assertOpen(); final SAMRecord result = mNextRecord; @@ -738,25 +885,56 @@ private void assertIntervalsOptimized(final QueryInterval[] intervals) { } } - private CloseableIterator createIndexIterator(final QueryInterval[] intervals, - final boolean contained) { - - assertIntervalsOptimized(intervals); - - // Hit the index to determine the chunk boundaries for the required data. + /** + * Use the index to determine the chunk boundaries for the required intervals. + * @param intervals the intervals to restrict reads to + * @param fileIndex the BAM index to use + * @return file pointer pairs corresponding to chunk boundaries + */ + public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIndex) { final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length]; - final BAMIndex fileIndex = getIndex(); for (int i = 0; i < intervals.length; ++i) { final QueryInterval interval = intervals[i]; final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); inputSpans[i] = span; } - final long[] filePointers; + final BAMFileSpan span; if (inputSpans.length > 0) { - filePointers = BAMFileSpan.merge(inputSpans).toCoordinateArray(); + span = BAMFileSpan.merge(inputSpans); } else { - filePointers = null; + span = null; } + return span; + } + + private CloseableIterator createIndexIterator(final QueryInterval[] intervals, + final boolean contained) { + + assertIntervalsOptimized(intervals); + + BAMFileSpan span = getFileSpan(intervals, getIndex()); + + // Create an iterator over the above chunk boundaries. + final BAMFileIndexIterator iterator = new BAMFileIndexIterator(span == null ? null : span.toCoordinateArray()); + + // Add some preprocessing filters for edge-case reads that don't fit into this + // query type. + return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); + } + + /** + * Prepare to iterate through SAMRecords that match the given intervals. + * @param intervals the intervals to restrict reads to + * @param contained if true, return records that are strictly + * contained in the intervals, otherwise return records that overlap + * @param filePointers file pointer pairs corresponding to chunk boundaries for the + * intervals + */ + public CloseableIterator createIndexIterator(final QueryInterval[] intervals, + final boolean contained, + final long[] filePointers) { + + assertIntervalsOptimized(intervals); // Create an iterator over the above chunk boundaries. final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers); @@ -785,6 +963,7 @@ private void assertIntervalsOptimized(final QueryInterval[] intervals) { advance(); } + @Override SAMRecord getNextRecord() throws IOException { // Advance to next file block if necessary @@ -827,6 +1006,7 @@ public BAMQueryFilteringIterator(final CloseableIterator iterator, /** * Returns true if a next element exists; false otherwise. */ + @Override public boolean hasNext() { assertOpen(); return mNextRecord != null; @@ -836,6 +1016,7 @@ public boolean hasNext() { * Gets the next record from the given iterator. * @return The next SAM record in the iterator. */ + @Override public SAMRecord next() { if(!hasNext()) throw new NoSuchElementException("BAMQueryFilteringIterator: no next element available"); diff --git a/src/main/java/htsjdk/samtools/BAMFileSpan.java b/src/main/java/htsjdk/samtools/BAMFileSpan.java index 193e44376..d99760d2a 100644 --- a/src/main/java/htsjdk/samtools/BAMFileSpan.java +++ b/src/main/java/htsjdk/samtools/BAMFileSpan.java @@ -78,6 +78,7 @@ public BAMFileSpan(final List chunks) { * Does this chunk list map to any position within the BAM file? * @return True iff the ChunkList points to any data within the BAM. */ + @Override public boolean isEmpty() { return chunks.isEmpty(); } @@ -86,6 +87,7 @@ public boolean isEmpty() { * Deep clone the given chunk list. * @return A copy of the chunk list. */ + @Override public BAMFileSpan clone() { final BAMFileSpan clone = new BAMFileSpan(); for(final Chunk chunk: chunks) @@ -100,6 +102,7 @@ public BAMFileSpan clone() { * @param fileSpan The filespan before which to eliminate. * @return A new BAMFileSpan which contains the portion of the chunk list after the given chunk. */ + @Override public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) { if(fileSpan == null) return clone(); @@ -115,15 +118,55 @@ public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) { validateSorted(); final BAMFileSpan trimmedChunkList = new BAMFileSpan(); + final long chunkStart = bamFileSpan.chunks.get(0).getChunkStart(); for(final Chunk chunkToTrim: chunks) { - if(chunkToTrim.getChunkEnd() > chunkToTrim.getChunkStart()) { - if(chunkToTrim.getChunkStart() >= bamFileSpan.chunks.get(0).getChunkStart()) { + if(chunkToTrim.getChunkEnd() > chunkStart) { + if(chunkToTrim.getChunkStart() >= chunkStart) { // This chunk from the list is completely beyond the start of the filtering chunk. trimmedChunkList.add(chunkToTrim.clone()); } else { // This chunk from the list partially overlaps the filtering chunk and must be trimmed. - trimmedChunkList.add(new Chunk(bamFileSpan.chunks.get(0).getChunkStart(),chunkToTrim.getChunkEnd())); + trimmedChunkList.add(new Chunk(chunkStart,chunkToTrim.getChunkEnd())); + } + } + } + return trimmedChunkList; + } + + /** + * Creates a new file span by removing all chunks after the given file span ends. + * If a chunk in the chunk list starts before and ends after the given + * chunk, the second portion of the chunk will be deleted. + * @param fileSpan The filespan after which to eliminate. + * @return A new BAMFileSpan which contains the portion of the chunk list before the + * given chunk. + */ + public SAMFileSpan removeContentsAfter(final SAMFileSpan fileSpan) { + if(fileSpan == null) + return clone(); + + if(!(fileSpan instanceof BAMFileSpan)) + throw new SAMException("Unable to compare "); + + final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan; + + if(bamFileSpan.isEmpty()) + return clone(); + + validateSorted(); + + final BAMFileSpan trimmedChunkList = new BAMFileSpan(); + final long chunkEnd = bamFileSpan.chunks.get(bamFileSpan.chunks.size() - 1).getChunkEnd(); + for(final Chunk chunkToTrim: chunks) { + if(chunkToTrim.getChunkStart() < chunkEnd) { + if(chunkToTrim.getChunkEnd() <= chunkEnd) { + // This chunk from the list is completely before the end of the filtering chunk. + trimmedChunkList.add(chunkToTrim.clone()); + } + else { + // This chunk from the list partially overlaps the filtering chunk and must be trimmed. + trimmedChunkList.add(new Chunk(chunkToTrim.getChunkStart(),chunkEnd)); } } } @@ -134,6 +177,7 @@ public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) { * Gets a file span over the data immediately following this span. * @return The a pointer to data immediately following this span. */ + @Override public SAMFileSpan getContentsFollowing() { if(chunks.isEmpty()) throw new SAMException("Unable to get the file pointer following this one: no data present."); diff --git a/src/main/java/htsjdk/samtools/BAMFileWriter.java b/src/main/java/htsjdk/samtools/BAMFileWriter.java index f6a474e2d..fc766ae7d 100644 --- a/src/main/java/htsjdk/samtools/BAMFileWriter.java +++ b/src/main/java/htsjdk/samtools/BAMFileWriter.java @@ -115,6 +115,7 @@ private BAMIndexer createBamIndex(final String path) { } } + @Override protected void writeAlignment(final SAMRecord alignment) { prepareToWriteAlignments(); @@ -135,10 +136,12 @@ protected void writeAlignment(final SAMRecord alignment) { } } + @Override protected void writeHeader(final String textHeader) { writeHeader(outputBinaryCodec, getFileHeader(), textHeader); } + @Override protected void finish() { outputBinaryCodec.close(); try { @@ -151,6 +154,7 @@ protected void finish() { } /** @return absolute path, or null if this writer does not correspond to a file. */ + @Override protected String getFilename() { return outputBinaryCodec.getOutputFileName(); } diff --git a/src/main/java/htsjdk/samtools/BAMIndex.java b/src/main/java/htsjdk/samtools/BAMIndex.java index 3663df9d0..62c69c79c 100644 --- a/src/main/java/htsjdk/samtools/BAMIndex.java +++ b/src/main/java/htsjdk/samtools/BAMIndex.java @@ -63,5 +63,6 @@ /** * Close the index and release any associated resources. */ + @Override void close(); } diff --git a/src/main/java/htsjdk/samtools/BAMIndexWriter.java b/src/main/java/htsjdk/samtools/BAMIndexWriter.java index b036b684d..aafcb5fbf 100644 --- a/src/main/java/htsjdk/samtools/BAMIndexWriter.java +++ b/src/main/java/htsjdk/samtools/BAMIndexWriter.java @@ -49,6 +49,7 @@ /** * Any necessary processing at the end of the file */ + @Override public void close(); } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/BAMRecord.java b/src/main/java/htsjdk/samtools/BAMRecord.java index c45566f08..14b629595 100644 --- a/src/main/java/htsjdk/samtools/BAMRecord.java +++ b/src/main/java/htsjdk/samtools/BAMRecord.java @@ -113,6 +113,7 @@ protected BAMRecord(final SAMFileHeader header, /** * Force all the lazily-initialized attributes to be decoded. */ + @Override protected void eagerDecode() { getReadName(); getCigar(); @@ -341,7 +342,12 @@ private String decodeReadName() { return NULL_SEQUENCE; } final int basesOffset = readNameSize() + cigarSize(); - return SAMUtils.compressedBasesToBytes(mReadLength, mRestOfBinaryData, basesOffset); + try { + return SAMUtils.compressedBasesToBytes(mReadLength, mRestOfBinaryData, basesOffset); + } catch ( final IllegalArgumentException ex ) { + final String msg = ex.getMessage() + " in read: " + getReadName(); + throw new IllegalStateException(msg, ex); + } } /* methods for computing disk size of variably-sized elements, in order to locate diff --git a/src/main/java/htsjdk/samtools/BAMRecordCodec.java b/src/main/java/htsjdk/samtools/BAMRecordCodec.java index dc1ca8196..e363a5b95 100644 --- a/src/main/java/htsjdk/samtools/BAMRecordCodec.java +++ b/src/main/java/htsjdk/samtools/BAMRecordCodec.java @@ -49,6 +49,7 @@ public BAMRecordCodec(final SAMFileHeader header, final SAMRecordFactory factory this.samRecordFactory = factory; } + @Override public BAMRecordCodec clone() { // Do not clone the references to codecs, as they must be distinct for each instance. return new BAMRecordCodec(this.header, this.samRecordFactory); @@ -56,6 +57,7 @@ public BAMRecordCodec clone() { /** Sets the output stream that records will be written to. */ + @Override public void setOutputStream(final OutputStream os) { this.binaryCodec.setOutputStream(os); } @@ -67,6 +69,7 @@ public void setOutputStream(final OutputStream os, final String filename) { } /** Sets the input stream that records will be read from. */ + @Override public void setInputStream(final InputStream is) { this.binaryCodec.setInputStream(is); } @@ -85,6 +88,7 @@ public void setInputStream(final InputStream is, final String filename) { * * @param alignment Record to be written. */ + @Override public void encode(final SAMRecord alignment) { // Compute block size, as it is the first element of the file representation of SAMRecord final int readLength = alignment.getReadLength(); @@ -150,7 +154,12 @@ public void encode(final SAMRecord alignment) { // that it is specced as a uint. this.binaryCodec.writeInt(cigarElement); } - this.binaryCodec.writeBytes(SAMUtils.bytesToCompressedBases(alignment.getReadBases())); + try { + this.binaryCodec.writeBytes(SAMUtils.bytesToCompressedBases(alignment.getReadBases())); + } catch ( final IllegalArgumentException ex ) { + final String msg = ex.getMessage() + " in read: " + alignment.getReadName(); + throw new IllegalStateException(msg, ex); + } byte[] qualities = alignment.getBaseQualities(); if (qualities.length == 0) { qualities = new byte[alignment.getReadLength()]; @@ -171,6 +180,7 @@ public void encode(final SAMRecord alignment) { * @return null if no more records. Should throw exception if EOF is encountered in the middle of * a record. */ + @Override public SAMRecord decode() { int recordLength = 0; try { diff --git a/src/main/java/htsjdk/samtools/Bin.java b/src/main/java/htsjdk/samtools/Bin.java index 1ac572400..f199d0a87 100644 --- a/src/main/java/htsjdk/samtools/Bin.java +++ b/src/main/java/htsjdk/samtools/Bin.java @@ -105,6 +105,7 @@ public boolean containsChunks() { * @param other Other bin to which this bin should be compared. * @return -1 if this < other, 0 if this == other, 1 if this > other. */ + @Override public int compareTo(final Bin other) { if(other == null) throw new ClassCastException("Cannot compare to a null object"); diff --git a/src/main/java/htsjdk/samtools/BinList.java b/src/main/java/htsjdk/samtools/BinList.java index e7107d44f..2111ba403 100644 --- a/src/main/java/htsjdk/samtools/BinList.java +++ b/src/main/java/htsjdk/samtools/BinList.java @@ -60,6 +60,7 @@ protected BinList(final int referenceSequence, final BitSet bins) { * Gets an iterator over all selected bins. * @return An iterator over all selected bins. */ + @Override public Iterator iterator() { return new BinIterator(); } @@ -95,6 +96,7 @@ public BinIterator() { * Are there more bins in this set, waiting to be returned? * @return True if more bins are remaining. */ + @Override public boolean hasNext() { return nextBin >= 0; } @@ -103,6 +105,7 @@ public boolean hasNext() { * Gets the next bin in the provided BinList. * @return the next available bin in the BinList. */ + @Override public Bin next() { if(!hasNext()) throw new NoSuchElementException("This BinIterator is currently empty"); @@ -111,6 +114,7 @@ public Bin next() { return new Bin(referenceSequence,currentBin); } + @Override public void remove() { throw new UnsupportedOperationException("Unable to remove from a bin iterator"); } diff --git a/src/main/java/htsjdk/samtools/BinaryBAMIndexWriter.java b/src/main/java/htsjdk/samtools/BinaryBAMIndexWriter.java index 35a22f7ac..5719aecf5 100644 --- a/src/main/java/htsjdk/samtools/BinaryBAMIndexWriter.java +++ b/src/main/java/htsjdk/samtools/BinaryBAMIndexWriter.java @@ -78,6 +78,7 @@ public BinaryBAMIndexWriter(final int nRef, final OutputStream output) { /** * Write this content as binary output */ + @Override public void writeReference(final BAMIndexContent content) { if (content == null) { @@ -147,6 +148,7 @@ public void writeReference(final BAMIndexContent content) { * * @param count */ + @Override public void writeNoCoordinateRecordCount(final Long count) { codec.writeLong(count == null ? 0 : count); } @@ -154,6 +156,7 @@ public void writeNoCoordinateRecordCount(final Long count) { /** * Any necessary processing at the end of the file */ + @Override public void close() { codec.close(); } diff --git a/src/main/java/htsjdk/samtools/BinningIndexContent.java b/src/main/java/htsjdk/samtools/BinningIndexContent.java index 9e32601c2..124353e27 100644 --- a/src/main/java/htsjdk/samtools/BinningIndexContent.java +++ b/src/main/java/htsjdk/samtools/BinningIndexContent.java @@ -171,6 +171,7 @@ int getNumberOfNonNullBins() { /** * @return An iterator over all non-empty bins. */ + @Override public Iterator iterator() { return new BinIterator(); } @@ -190,6 +191,7 @@ public BinIterator() { * * @return True if more bins are remaining. */ + @Override public boolean hasNext() { while (nextBin <= maxBinNumber) { if (getBin(nextBin) != null) return true; @@ -203,6 +205,7 @@ public boolean hasNext() { * * @return the next available bin in the BinList. */ + @Override public Bin next() { if (!hasNext()) throw new NoSuchElementException("This BinIterator is currently empty"); @@ -211,6 +214,7 @@ public Bin next() { return result; } + @Override public void remove() { throw new UnsupportedOperationException("Unable to remove from a bin iterator"); } diff --git a/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java b/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java index 4707b7bcc..c588bdb46 100644 --- a/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java +++ b/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java @@ -17,6 +17,7 @@ import htsjdk.samtools.cram.structure.Slice; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.RuntimeIOException; +import htsjdk.samtools.util.SequenceUtil; import java.io.IOException; import java.io.OutputStream; @@ -437,7 +438,13 @@ protected void flushContainer() throws IllegalArgumentException, IllegalAccessEx final SAMRecord restoredSamRecord = f.create(cramRecords.get(i)); assert (restoredSamRecord.getAlignmentStart() == samRecords.get(i).getAlignmentStart()); assert (restoredSamRecord.getReferenceName().equals(samRecords.get(i).getReferenceName())); - assert (restoredSamRecord.getReadString().equals(samRecords.get(i).getReadString())); + + if (!restoredSamRecord.getReadString().equals(samRecords.get(i).getReadString())) { + // try to fix the original read bases by normalizing them to BAM set: + final byte[] originalReadBases = samRecords.get(i).getReadString().getBytes(); + final String originalReadBasesUpperCaseIupacNoDot = new String(SequenceUtil.toBamReadBasesInPlace(originalReadBases)); + assert (restoredSamRecord.getReadString().equals(originalReadBasesUpperCaseIupacNoDot)); + } assert (restoredSamRecord.getBaseQualityString().equals(samRecords.get(i).getBaseQualityString())); } } diff --git a/src/main/java/htsjdk/samtools/CRAMFileReader.java b/src/main/java/htsjdk/samtools/CRAMFileReader.java index 9a29d367f..a7a40889c 100644 --- a/src/main/java/htsjdk/samtools/CRAMFileReader.java +++ b/src/main/java/htsjdk/samtools/CRAMFileReader.java @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright 2013 EMBL-EBI + * Copyright 2013-2016 EMBL-EBI * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -466,9 +466,8 @@ void enableFileSource(final SamReader reader, final boolean enabled) { iterator.setFileSource(enabled ? reader : null); } - private class CRAMIntervalIterator - extends BAMQueryMultipleIntervalsIteratorFilter - implements SAMRecordIterator { + private class CRAMIntervalIterator extends BAMQueryMultipleIntervalsIteratorFilter + implements CloseableIterator { // the granularity of this iterator is the container, so the records returned // by it must still be filtered to find those matching the filter criteria @@ -507,11 +506,6 @@ public CRAMIntervalIterator(final QueryInterval[] queries, final boolean contain } @Override - public SAMRecordIterator assertSorted(final SortOrder sortOrder) { - return null; - } - - @Override public void close() { if (unfilteredIterator != null) { unfilteredIterator.close(); diff --git a/src/main/java/htsjdk/samtools/CRAMIterator.java b/src/main/java/htsjdk/samtools/CRAMIterator.java index f8179e689..33492df69 100644 --- a/src/main/java/htsjdk/samtools/CRAMIterator.java +++ b/src/main/java/htsjdk/samtools/CRAMIterator.java @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright 2013 EMBL-EBI + * Copyright 2013-2016 EMBL-EBI * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -292,7 +292,7 @@ public void close() { @Override public SAMRecordIterator assertSorted(final SortOrder sortOrder) { - throw new RuntimeException("Not implemented."); + return SamReader.AssertingIterator.of(this).assertSorted(sortOrder); } public SamReader getFileSource() { diff --git a/src/main/java/htsjdk/samtools/CachingBAMFileIndex.java b/src/main/java/htsjdk/samtools/CachingBAMFileIndex.java index 8010ce59e..5597832c2 100644 --- a/src/main/java/htsjdk/samtools/CachingBAMFileIndex.java +++ b/src/main/java/htsjdk/samtools/CachingBAMFileIndex.java @@ -61,6 +61,7 @@ public CachingBAMFileIndex(final File file, final SAMSequenceDictionary dictiona * in a range that can be scanned to find SAMRecords that overlap the given positions. * May return null if there is no content overlapping the region. */ + @Override public BAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) { final BAMIndexContent queryResults = getQueryResults(referenceIndex); @@ -80,6 +81,7 @@ public BAMFileSpan getSpanOverlapping(final int referenceIndex, final int startP * @param endPos 1-based end of the desired interval, inclusive * @return a list of bins that contain relevant data. */ + @Override public BinList getBinsOverlapping(final int referenceIndex, final int startPos, final int endPos) { final BitSet regionBins = GenomicIndexUtil.regionToBins(startPos, endPos); if (regionBins == null) { @@ -93,6 +95,7 @@ public BinList getBinsOverlapping(final int referenceIndex, final int startPos, * @param bin The bin over which to perform an overlapping query. * @return The file pointers */ + @Override public BAMFileSpan getSpanOverlapping(final Bin bin) { if(bin == null) return null; @@ -138,6 +141,7 @@ public BAMFileSpan getSpanOverlapping(final Bin bin) { * @param referenceIndex The reference to load. CachingBAMFileIndex only stores index data for entire references. * @return The index information for this reference. */ + @Override protected BAMIndexContent getQueryResults(final int referenceIndex) { // WeakHashMap is a bit weird in that its lookups are done via equals() equality, but expirations must be // handled via == equality. This implementation jumps through a few hoops to make sure that == equality still diff --git a/src/main/java/htsjdk/samtools/Chunk.java b/src/main/java/htsjdk/samtools/Chunk.java index 0d77b0cd3..dbe27c64d 100644 --- a/src/main/java/htsjdk/samtools/Chunk.java +++ b/src/main/java/htsjdk/samtools/Chunk.java @@ -38,6 +38,7 @@ public Chunk(final long start, final long end) { mChunkEnd = end; } + @Override public Chunk clone() { return new Chunk(mChunkStart,mChunkEnd); } @@ -58,6 +59,7 @@ protected void setChunkEnd(final long value) { mChunkEnd = value; } + @Override public int compareTo(final Chunk chunk) { int result = Long.signum(mChunkStart - chunk.mChunkStart); if (result == 0) { diff --git a/src/main/java/htsjdk/samtools/CigarElement.java b/src/main/java/htsjdk/samtools/CigarElement.java index c645e6cc2..016956c56 100644 --- a/src/main/java/htsjdk/samtools/CigarElement.java +++ b/src/main/java/htsjdk/samtools/CigarElement.java @@ -36,6 +36,7 @@ private final CigarOperator operator; public CigarElement(final int length, final CigarOperator operator) { + if (length < 0) throw new IllegalArgumentException(String.format("Cigar element being constructed with negative length: %d and operation: %s" , length, operator.name())); this.length = length; this.operator = operator; } diff --git a/src/main/java/htsjdk/samtools/ComparableSamRecordIterator.java b/src/main/java/htsjdk/samtools/ComparableSamRecordIterator.java index 06186a1d0..cb2da892c 100644 --- a/src/main/java/htsjdk/samtools/ComparableSamRecordIterator.java +++ b/src/main/java/htsjdk/samtools/ComparableSamRecordIterator.java @@ -63,6 +63,7 @@ public SamReader getReader() { * @param that another iterator to compare to * @return a negative, 0 or positive number as described in the Comparator interface */ + @Override public int compareTo(final ComparableSamRecordIterator that) { if (this.comparator.getClass() != that.comparator.getClass()) { throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " + diff --git a/src/main/java/htsjdk/samtools/CoordinateSortedPairInfoMap.java b/src/main/java/htsjdk/samtools/CoordinateSortedPairInfoMap.java index d892d655a..37c200cc5 100644 --- a/src/main/java/htsjdk/samtools/CoordinateSortedPairInfoMap.java +++ b/src/main/java/htsjdk/samtools/CoordinateSortedPairInfoMap.java @@ -202,6 +202,7 @@ public int sizeInRam() { * or removed from map when iteration is in progress, nor may a second iteration be started. * Iterator must be closed in order to allow normal access to the map. */ + @Override public CloseableIterator> iterator() { if (iterationInProgress) throw new IllegalStateException("Cannot be called when iteration is in progress"); iterationInProgress = true; @@ -238,11 +239,13 @@ private void createIteratorForMapInRam() { currentReferenceIterator = mapInRam.entrySet().iterator(); } + @Override public void close() { closed = true; iterationInProgress = false; } + @Override public boolean hasNext() { if (closed) throw new IllegalStateException("Iterator has been closed"); if (currentReferenceIterator != null && !currentReferenceIterator.hasNext()) @@ -250,6 +253,7 @@ public boolean hasNext() { return currentReferenceIterator != null; } + @Override public Map.Entry next() { if (closed) throw new IllegalStateException("Iterator has been closed"); if (!hasNext()) throw new NoSuchElementException(); @@ -258,6 +262,7 @@ public boolean hasNext() { return ret; } + @Override public void remove() { throw new UnsupportedOperationException(); } diff --git a/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java b/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java index 7e3848e3d..707cc6ec1 100644 --- a/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java +++ b/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java @@ -14,6 +14,7 @@ public static DefaultSAMRecordFactory getInstance() { } /** Create a new SAMRecord to be filled in */ + @Override public SAMRecord createSAMRecord(final SAMFileHeader header) { return new SAMRecord(header); } @@ -23,6 +24,7 @@ public SAMRecord createSAMRecord(final SAMFileHeader header) { * any value other than NO_ALIGNMENT_REFERENCE_INDEX, the values must be resolvable against the sequence * dictionary in the header argument. */ + @Override public BAMRecord createBAMRecord (final SAMFileHeader header, final int referenceSequenceIndex, final int alignmentStart, diff --git a/src/main/java/htsjdk/samtools/DiskBasedBAMFileIndex.java b/src/main/java/htsjdk/samtools/DiskBasedBAMFileIndex.java index b5d6f597a..1eddddde3 100644 --- a/src/main/java/htsjdk/samtools/DiskBasedBAMFileIndex.java +++ b/src/main/java/htsjdk/samtools/DiskBasedBAMFileIndex.java @@ -56,6 +56,7 @@ public DiskBasedBAMFileIndex(final File file, final SAMSequenceDictionary dictio * positions. The last position in each pair is a virtual file pointer to the first SAMRecord beyond * the range that may contain the indicated SAMRecords. */ + @Override public BAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) { final BAMIndexContent queryResults = query(referenceIndex,startPos,endPos); @@ -69,6 +70,7 @@ public BAMFileSpan getSpanOverlapping(final int referenceIndex, final int startP return new BAMFileSpan(chunkList); } + @Override protected BAMIndexContent getQueryResults(final int reference){ throw new UnsupportedOperationException(); // todo: there ought to be a way to support this using the first startPos for the reference and the last diff --git a/src/main/java/htsjdk/samtools/DuplicateScoringStrategy.java b/src/main/java/htsjdk/samtools/DuplicateScoringStrategy.java index 1abd51473..26c83a584 100644 --- a/src/main/java/htsjdk/samtools/DuplicateScoringStrategy.java +++ b/src/main/java/htsjdk/samtools/DuplicateScoringStrategy.java @@ -36,7 +36,7 @@ public enum ScoringStrategy { SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH, - RANDOM, + RANDOM } /** Hash used for the RANDOM scoring strategy. */ @@ -46,8 +46,8 @@ private static enum Attr { DuplicateScore } /** Calculates a score for the read which is the sum of scores over Q15. */ - private static short getSumOfBaseQualities(final SAMRecord rec) { - short score = 0; + private static int getSumOfBaseQualities(final SAMRecord rec) { + int score = 0; for (final byte b : rec.getBaseQualities()) { if (b >= 15) score += b; } @@ -64,6 +64,8 @@ public static short computeDuplicateScore(final SAMRecord record, final ScoringS /** * Returns the duplicate score computed from the given fragment. + * value should be capped by Short.MAX_VALUE/2 since the score from two reads will be + * added and an overflow will be * * If true is given to assumeMateCigar, then any score that can use the mate cigar to compute the mate's score will return the score * computed on both ends. @@ -72,24 +74,40 @@ public static short computeDuplicateScore(final SAMRecord record, final ScoringS Short storedScore = (Short) record.getTransientAttribute(Attr.DuplicateScore); if (storedScore == null) { - short score = 0; - + short score=0; switch (scoringStrategy) { case SUM_OF_BASE_QUALITIES: - score += getSumOfBaseQualities(record); + // two (very) long reads worth of high-quality bases can go over Short.MAX_VALUE/2 + // and risk overflow. + score += (short) Math.min(getSumOfBaseQualities(record), Short.MAX_VALUE / 2); break; case TOTAL_MAPPED_REFERENCE_LENGTH: if (!record.getReadUnmappedFlag()) { - score += record.getCigar().getReferenceLength(); + // no need to remember the score since this scoring mechanism is symmetric + score = (short) Math.min(record.getCigar().getReferenceLength(), Short.MAX_VALUE / 2); } if (assumeMateCigar && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) { - score += SAMUtils.getMateCigar(record).getReferenceLength(); + score += (short) Math.min(SAMUtils.getMateCigar(record).getReferenceLength(), Short.MAX_VALUE / 2); } break; + // The RANDOM score gives the same score to both reads so that they get filtered together. + // it's not critical do use the readName since the scores from both ends get added, but it seem + // to be clearer this way. case RANDOM: - score += (short) (hasher.hashUnencodedChars(record.getReadName()) >> 16); + // start with a random number between Short.MIN_VALUE/4 and Short.MAX_VALUE/4 + score += (short) (hasher.hashUnencodedChars(record.getReadName()) & 0b11_1111_1111_1111); + // subtract Short.MIN_VALUE/4 from it to end up with a number between + // 0 and Short.MAX_VALUE/2. This number can be then discounted in case the read is + // not passing filters. We need to stay far from overflow so that when we add the two + // scores from the two read mates we do not overflow since that could cause us to chose a + // failing read-pair instead of a passing one. + score -= Short.MIN_VALUE / 4; } + // make sure that filter-failing records are heavily discounted. (the discount can happen twice, once + // for each mate, so need to make sure we do not subtract more than Short.MIN_VALUE overall.) + score += record.getReadFailsVendorQualityCheckFlag() ? (short) (Short.MIN_VALUE / 2) : 0; + storedScore = score; record.setTransientAttribute(Attr.DuplicateScore, storedScore); } @@ -110,7 +128,7 @@ public static int compare(final SAMRecord rec1, final SAMRecord rec2, final Scor int cmp; // always prefer paired over non-paired - if (rec1.getReadPairedFlag() != rec2.getReadPairedFlag()) return rec1.getReadPairedFlag() ? 1 : -1; + if (rec1.getReadPairedFlag() != rec2.getReadPairedFlag()) return rec1.getReadPairedFlag() ? -1 : 1; cmp = computeDuplicateScore(rec2, scoringStrategy, assumeMateCigar) - computeDuplicateScore(rec1, scoringStrategy, assumeMateCigar); @@ -125,7 +143,7 @@ public static int compare(final SAMRecord rec1, final SAMRecord rec2, final Scor } /** - * Compare two records based on their duplicate scores. The duplicate scores for each record is assume to be + * Compare two records based on their duplicate scores. The duplicate scores for each record is assumed to be * pre-computed by computeDuplicateScore and stored in the "DS" tag. If the scores are equal, we break * ties based on mapping quality (added to the mate's mapping quality if paired and mapped), then library/read name. * diff --git a/src/main/java/htsjdk/samtools/DuplicateSetIterator.java b/src/main/java/htsjdk/samtools/DuplicateSetIterator.java index 9a0c6f108..6e833035b 100644 --- a/src/main/java/htsjdk/samtools/DuplicateSetIterator.java +++ b/src/main/java/htsjdk/samtools/DuplicateSetIterator.java @@ -114,12 +114,13 @@ public DuplicateSetIterator(final CloseableIterator iterator, } @Deprecated - /** Do not use this method as the first duplicate set will not be compared with this scoring strategy. + /** @deprecated Do not use this method as the first duplicate set will not be compared with this scoring strategy. * Instead, provide a comparator to the constructor that has the scoring strategy set. */ public void setScoringStrategy(final DuplicateScoringStrategy.ScoringStrategy scoringStrategy) { this.comparator.setScoringStrategy(scoringStrategy); } + @Override public DuplicateSet next() { DuplicateSet duplicateSet = null; @@ -161,12 +162,15 @@ public DuplicateSet next() { return duplicateSet; } + @Override public void close() { wrappedIterator.close(); } + @Override public boolean hasNext() { return (!duplicateSet.isEmpty() || wrappedIterator.hasNext()); } // Does nothing! + @Override public void remove() { } } diff --git a/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java b/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java index a294752de..45d002e3e 100644 --- a/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java +++ b/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java @@ -107,6 +107,7 @@ private void startIterationIfRequired() { /** * Close down all open iterators. */ + @Override public void close() { // Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue. for (CloseableIterator iterator : pq) @@ -114,12 +115,14 @@ public void close() { } /** Returns true if any of the underlying iterators has more records, otherwise false. */ + @Override public boolean hasNext() { startIterationIfRequired(); return !this.pq.isEmpty(); } /** Returns the next record from the top most iterator during merging. */ + @Override public SAMRecord next() { startIterationIfRequired(); @@ -163,6 +166,7 @@ private void addIfNotEmpty(final ComparableSamRecordIterator iterator) { } /** Unsupported operation. */ + @Override public void remove() { throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()"); } @@ -176,10 +180,12 @@ private SAMRecordComparator getComparator() { // For unsorted build a fake comparator that compares based on object ID if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) { return new SAMRecordComparator() { + @Override public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) { return System.identityHashCode(lhs) - System.identityHashCode(rhs); } + @Override public int compare(final SAMRecord lhs, final SAMRecord rhs) { return fileOrderCompare(lhs, rhs); } @@ -206,6 +212,7 @@ public SAMFileHeader getMergedHeader() { private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator implements Serializable { private static final long serialVersionUID = 1L; + @Override public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) { final int referenceIndex1 = getReferenceIndex(samRecord1); final int referenceIndex2 = getReferenceIndex(samRecord2); diff --git a/src/main/java/htsjdk/samtools/QueryInterval.java b/src/main/java/htsjdk/samtools/QueryInterval.java index bdfb52c37..581e0f648 100644 --- a/src/main/java/htsjdk/samtools/QueryInterval.java +++ b/src/main/java/htsjdk/samtools/QueryInterval.java @@ -29,6 +29,7 @@ public QueryInterval(final int referenceIndex, final int start, final int end) { } + @Override public int compareTo(final QueryInterval other) { int comp = this.referenceIndex - other.referenceIndex; if (comp != 0) return comp; diff --git a/src/main/java/htsjdk/samtools/SAMFileHeader.java b/src/main/java/htsjdk/samtools/SAMFileHeader.java index 47543c2a6..eff595341 100644 --- a/src/main/java/htsjdk/samtools/SAMFileHeader.java +++ b/src/main/java/htsjdk/samtools/SAMFileHeader.java @@ -24,6 +24,8 @@ package htsjdk.samtools; +import htsjdk.samtools.util.CollectionUtil; +import htsjdk.samtools.util.Log; import htsjdk.samtools.util.StringLineReader; import java.io.StringWriter; @@ -47,15 +49,19 @@ public static final String SORT_ORDER_TAG = "SO"; public static final String GROUP_ORDER_TAG = "GO"; public static final String CURRENT_VERSION = "1.5"; - public static final Set ACCEPTABLE_VERSIONS = - new HashSet(Arrays.asList("1.0", "1.3", "1.4", "1.5")); + public static final Set ACCEPTABLE_VERSIONS = CollectionUtil.makeSet("1.0", "1.3", "1.4", "1.5"); + private SortOrder sortOrder = null; + private GroupOrder groupOrder = null; + + private static final Log log = Log.getInstance(SAMFileHeader.class); /** * These tags are of known type, so don't need a type field in the text representation. */ public static final Set STANDARD_TAGS = - new HashSet(Arrays.asList(VERSION_TAG, SORT_ORDER_TAG, GROUP_ORDER_TAG)); + new HashSet<>(Arrays.asList(VERSION_TAG, SORT_ORDER_TAG, GROUP_ORDER_TAG)); + @Override Set getStandardTags() { return STANDARD_TAGS; } @@ -64,11 +70,11 @@ * Ways in which a SAM or BAM may be sorted. */ public enum SortOrder { - unsorted(null), queryname(SAMRecordQueryNameComparator.class), coordinate(SAMRecordCoordinateComparator.class), - duplicate(SAMRecordDuplicateComparator.class); // NB: this is not in the SAM spec! + duplicate(SAMRecordDuplicateComparator.class), // NB: this is not in the SAM spec! + unknown(null); private final Class comparator; @@ -105,16 +111,14 @@ public SAMRecordComparator getComparatorInstance() { none, query, reference } - private List mReadGroups = - new ArrayList(); - private List mProgramRecords = new ArrayList(); - private final Map mReadGroupMap = - new HashMap(); - private final Map mProgramRecordMap = new HashMap(); + private List mReadGroups = new ArrayList<>(); + private List mProgramRecords = new ArrayList<>(); + private final Map mReadGroupMap = new HashMap<>(); + private final Map mProgramRecordMap = new HashMap<>(); private SAMSequenceDictionary mSequenceDictionary = new SAMSequenceDictionary(); - final private List mComments = new ArrayList(); + final private List mComments = new ArrayList<>(); private String textHeader; - private final List mValidationErrors = new ArrayList(); + private final List mValidationErrors = new ArrayList<>(); public SAMFileHeader() { setAttribute(VERSION_TAG, CURRENT_VERSION); @@ -127,11 +131,11 @@ public SAMFileHeader(final SAMSequenceDictionary dict) { } public String getVersion() { - return (String) getAttribute("VN"); + return getAttribute(VERSION_TAG); } public String getCreator() { - return (String) getAttribute("CR"); + return getAttribute("CR"); } public SAMSequenceDictionary getSequenceDictionary() { @@ -248,26 +252,82 @@ public SAMProgramRecord createProgramRecord() { } public SortOrder getSortOrder() { - final String so = getAttribute("SO"); - if (so == null || so.equals("unknown")) { - return SortOrder.unsorted; + if (sortOrder == null) { + final String so = getAttribute(SORT_ORDER_TAG); + if (so == null) { + sortOrder = SortOrder.unsorted; + } else { + try { + return SortOrder.valueOf(so); + } catch (IllegalArgumentException e) { + log.warn("Found non conforming header SO tag: " + so + ". Treating as 'unknown'."); + sortOrder = SortOrder.unknown; + } + } } - return SortOrder.valueOf((String) so); + return sortOrder; } public void setSortOrder(final SortOrder so) { - setAttribute("SO", so.name()); + sortOrder = so; + super.setAttribute(SORT_ORDER_TAG, so.name()); } public GroupOrder getGroupOrder() { - if (getAttribute("GO") == null) { - return GroupOrder.none; + if (groupOrder == null) { + final String go = getAttribute(GROUP_ORDER_TAG); + if (go == null) { + groupOrder = GroupOrder.none; + } else { + try { + return GroupOrder.valueOf(go); + } catch (IllegalArgumentException e) { + log.warn("Found non conforming header GO tag: " + go + ". Treating as 'none'."); + groupOrder = GroupOrder.none; + } + } } - return GroupOrder.valueOf((String)getAttribute("GO")); + return groupOrder; } public void setGroupOrder(final GroupOrder go) { - setAttribute("GO", go.name()); + groupOrder = go; + super.setAttribute(GROUP_ORDER_TAG, go.name()); + } + + + /** + * Set the given value for the attribute named 'key'. Replaces an existing value, if any. + * If value is null, the attribute is removed. + * Otherwise, the value will be converted to a String with toString. + * @param key attribute name + * @param value attribute value + * @deprecated Use {@link #setAttribute(String, String) instead + */ + @Deprecated + @Override + public void setAttribute(final String key, final Object value) { + if (key.equals(SORT_ORDER_TAG) || key.equals(GROUP_ORDER_TAG)) { + this.setAttribute(key, value.toString()); + } else { + super.setAttribute(key, value); + } + } + + /** + * Set the given value for the attribute named 'key'. Replaces an existing value, if any. + * If value is null, the attribute is removed. + * @param key attribute name + * @param value attribute value + */ + @Override + public void setAttribute(final String key, final String value) { + if (key.equals(SORT_ORDER_TAG)) { + this.sortOrder = null; + } else if (key.equals(GROUP_ORDER_TAG)) { + this.groupOrder = null; + } + super.setAttribute(key, value); } /** @@ -353,19 +413,25 @@ public int hashCode() { return result; } + @Override public final SAMFileHeader clone() { final SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); codec.setValidationStringency(ValidationStringency.SILENT); + return codec.decode(new StringLineReader(getSAMString()), "SAMFileHeader.clone"); + } + + @Override + public String getSAMString() { final StringWriter stringWriter = new StringWriter(); - codec.encode(stringWriter, this); - return codec.decode(new StringLineReader(stringWriter.toString()), "SAMFileHeader.clone"); + new SAMTextHeaderCodec().encode(stringWriter, this); + return stringWriter.toString(); } /** Little class to generate program group IDs */ public static class PgIdGenerator { private int recordCounter; - private final Set idsThatAreAlreadyTaken = new HashSet(); + private final Set idsThatAreAlreadyTaken = new HashSet<>(); public PgIdGenerator(final SAMFileHeader header) { for (final SAMProgramRecord pgRecord : header.getProgramRecords()) { @@ -393,7 +459,6 @@ public String getNonCollidingId(final String recordId) { idsThatAreAlreadyTaken.add(newId); return newId; } - } } } diff --git a/src/main/java/htsjdk/samtools/SAMFileWriter.java b/src/main/java/htsjdk/samtools/SAMFileWriter.java index fe99591f0..24936a0c1 100644 --- a/src/main/java/htsjdk/samtools/SAMFileWriter.java +++ b/src/main/java/htsjdk/samtools/SAMFileWriter.java @@ -46,5 +46,6 @@ /** * Must be called to flush or file will likely be defective. */ + @Override void close(); } diff --git a/src/main/java/htsjdk/samtools/SAMFileWriterFactory.java b/src/main/java/htsjdk/samtools/SAMFileWriterFactory.java index 61f1c9c13..30b36d7b3 100644 --- a/src/main/java/htsjdk/samtools/SAMFileWriterFactory.java +++ b/src/main/java/htsjdk/samtools/SAMFileWriterFactory.java @@ -86,6 +86,14 @@ public static void setDefaultCreateMd5File(final boolean createMd5File) { } /** + * Gets the default for whether to create md5Files for BAM files this factory. + * @see #setDefaultCreateMd5File(boolean) + */ + public static boolean getDefaultCreateMd5File() { + return defaultCreateMd5File; + } + + /** * Sets whether to create md5Files for BAMs from this factory. */ public SAMFileWriterFactory setCreateMd5File(final boolean createMd5File) { @@ -128,6 +136,14 @@ public static void setDefaultCreateIndexWhileWriting(final boolean setting) { } /** + * Gets the default for subsequent SAMFileWriterFactories that do not specify whether to create an index. + * @see #setDefaultCreateIndexWhileWriting + */ + public static boolean getDefaultCreateIndexWhileWriting() { + return defaultCreateIndexWhileWriting; + } + + /** * Convenience method allowing newSAMFileWriterFactory().setCreateIndex(true); * Equivalent to SAMFileWriterFactory.setDefaultCreateIndexWhileWriting(true); newSAMFileWriterFactory(); * If a BAM or CRAM (not SAM) file is created, the setting is true, and the file header specifies coordinate order, @@ -158,6 +174,14 @@ public SAMFileWriterFactory setMaxRecordsInRam(final int maxRecordsInRam) { } /** + * Gets the maximum number of records held in RAM before spilling to disk during sorting. + * @see #setMaxRecordsInRam(int) + */ + public int getMaxRecordsInRam() { + return maxRecordsInRam; + } + + /** * Turn on or off the use of asynchronous IO for writing output SAM and BAM files. If true then * each SAMFileWriter creates a dedicated thread which is used for compression and IO activities. */ @@ -195,6 +219,14 @@ public SAMFileWriterFactory setTempDirectory(final File tmpDir) { } /** + * Gets the temporary directory that will be used when sorting data. + * @see #setTempDirectory(File) + */ + public File getTempDirectory() { + return tmpDir; + } + + /** * Set the flag output format only when writing text. * Default value: [[htsjdk.samtools.SAMTextWriter.samFlagFieldOutput.DECIMAL]] */ @@ -237,7 +269,6 @@ public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean pre if (this.createIndex && !createIndex) { log.warn("Cannot create index for BAM because output file is not a regular file: " + outputFile.getAbsolutePath()); } - if (this.tmpDir != null) ret.setTempDirectory(this.tmpDir); initializeBAMWriter(ret, header, presorted, createIndex); if (this.useAsyncIo) return new AsyncSAMFileWriter(ret, this.asyncOutputBufferSize); @@ -252,6 +283,7 @@ private void initializeBAMWriter(final BAMFileWriter writer, final SAMFileHeader if (maxRecordsInRam != null) { writer.setMaxRecordsInRam(maxRecordsInRam); } + if (this.tmpDir != null) writer.setTempDirectory(this.tmpDir); writer.setHeader(header); if (createIndex && writer.getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) { writer.enableBamIndexConstruction(); @@ -278,14 +310,7 @@ public SAMFileWriter makeSAMWriter(final SAMFileHeader header, final boolean pre ? new SAMTextWriter(new Md5CalculatingOutputStream(new FileOutputStream(outputFile, false), new File(outputFile.getAbsolutePath() + ".md5")), samFlagFieldOutput) : new SAMTextWriter(outputFile, samFlagFieldOutput); - ret.setSortOrder(header.getSortOrder(), presorted); - if (maxRecordsInRam != null) { - ret.setMaxRecordsInRam(maxRecordsInRam); - } - ret.setHeader(header); - - if (this.useAsyncIo) return new AsyncSAMFileWriter(ret, this.asyncOutputBufferSize); - else return ret; + return initWriter(header, presorted, ret); } catch (final IOException ioe) { throw new RuntimeIOException("Error opening file: " + outputFile.getAbsolutePath()); } @@ -308,7 +333,7 @@ public SAMFileWriter makeSAMWriter(final SAMFileHeader header, final boolean pre if (samFlagFieldOutput == SamFlagField.NONE) { samFlagFieldOutput = Defaults.SAM_FLAG_FIELD_FORMAT; } - return initWriter(header, presorted, false, new SAMTextWriter(stream, samFlagFieldOutput)); + return initWriter(header, presorted, new SAMTextWriter(stream, samFlagFieldOutput)); } /** @@ -322,24 +347,23 @@ public SAMFileWriter makeSAMWriter(final SAMFileHeader header, final boolean pre */ public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean presorted, final OutputStream stream) { - return initWriter(header, presorted, true, new BAMFileWriter(stream, null, this.getCompressionLevel(), this.deflaterFactory)); + return initWriter(header, presorted, new BAMFileWriter(stream, null, this.getCompressionLevel(), this.deflaterFactory)); } /** * Initialize SAMTextWriter or a BAMFileWriter and possibly wrap in AsyncSAMFileWriter - * * @param header entire header. Sort order is determined by the sortOrder property of this arg. * @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder. - * @param binary do we want to generate a BAM or a SAM * @param writer SAM or BAM writer to initialize and maybe wrap. */ - private SAMFileWriter initWriter(final SAMFileHeader header, final boolean presorted, final boolean binary, + private SAMFileWriter initWriter(final SAMFileHeader header, final boolean presorted, final SAMFileWriterImpl writer) { writer.setSortOrder(header.getSortOrder(), presorted); if (maxRecordsInRam != null) { writer.setMaxRecordsInRam(maxRecordsInRam); } + if (this.tmpDir != null) writer.setTempDirectory(this.tmpDir); writer.setHeader(header); if (this.useAsyncIo) return new AsyncSAMFileWriter(writer, this.asyncOutputBufferSize); diff --git a/src/main/java/htsjdk/samtools/SAMFileWriterImpl.java b/src/main/java/htsjdk/samtools/SAMFileWriterImpl.java index 130ecea4a..31a8604dc 100644 --- a/src/main/java/htsjdk/samtools/SAMFileWriterImpl.java +++ b/src/main/java/htsjdk/samtools/SAMFileWriterImpl.java @@ -75,6 +75,7 @@ public static int getDefaultMaxRecordsInRam() { * Sets the progress logger used by this implementation. Setting this lets this writer emit log * messages as SAM records in a SortingCollection are being written to disk. */ + @Override public void setProgressLogger(final ProgressLoggerInterface progress) { this.progressLogger = progress; } @@ -110,7 +111,11 @@ void setMaxRecordsInRam(final int maxRecordsInRam) { } this.maxRecordsInRam = maxRecordsInRam; } - + + int getMaxRecordsInRam() { + return maxRecordsInRam; + } + /** * When writing records that are not presorted, specify the path of the temporary directory * for spilling to disk. Must be called before setHeader(). @@ -122,6 +127,10 @@ void setTempDirectory(final File tmpDir) { } } + File getTempDirectory() { + return tmpDir; + } + /** * Must be called before addAlignment. Header cannot be null. */ @@ -153,6 +162,7 @@ public void setHeader(final SAMFileHeader header) } } + @Override public SAMFileHeader getFileHeader() { return header; } @@ -180,6 +190,7 @@ private SAMRecordComparator makeComparator() { * @throws IllegalArgumentException if the record's reference or mate reference indices cannot be * resolved against the writer's header using the current reference and mate reference names */ + @Override public void addAlignment(final SAMRecord alignment) { alignment.setHeaderStrict(header); // re-establish the record header and resolve reference indices @@ -206,6 +217,7 @@ private void assertPresorted(final SAMRecord alignment) { /** * Must be called or else file will likely be defective. */ + @Override public final void close() { if (!isClosed) { diff --git a/src/main/java/htsjdk/samtools/SAMProgramRecord.java b/src/main/java/htsjdk/samtools/SAMProgramRecord.java index 3bbecf90d..f5ddd964a 100644 --- a/src/main/java/htsjdk/samtools/SAMProgramRecord.java +++ b/src/main/java/htsjdk/samtools/SAMProgramRecord.java @@ -57,6 +57,7 @@ public SAMProgramRecord(final String id, SAMProgramRecord srcProgramRecord) { } } + @Override public String getId() { return getProgramGroupId(); } @@ -126,7 +127,14 @@ public int hashCode() { return result; } + @Override Set getStandardTags() { return STANDARD_TAGS; } + + + @Override + public String getSAMString() { + return new SAMTextHeaderCodec().getPGLine(this); + } } diff --git a/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java b/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java index fd81852a0..14f1c50e3 100644 --- a/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java +++ b/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java @@ -74,6 +74,7 @@ public SAMReadGroupRecord(final String id, final SAMReadGroupRecord srcProgramRe } } + @Override public String getId() { return getReadGroupId(); } public String getReadGroupId() { return mReadGroupId; } @@ -158,8 +159,14 @@ public int hashCode() { return mReadGroupId.hashCode(); } + @Override Set getStandardTags() { return STANDARD_TAGS; } + + @Override + public String getSAMString() { + return new SAMTextHeaderCodec().getRGLine(this); + } } diff --git a/src/main/java/htsjdk/samtools/SAMRecord.java b/src/main/java/htsjdk/samtools/SAMRecord.java index 13ec3860a..ec394ca17 100644 --- a/src/main/java/htsjdk/samtools/SAMRecord.java +++ b/src/main/java/htsjdk/samtools/SAMRecord.java @@ -238,7 +238,9 @@ public void setReadString(final String value) { mReadBases = NULL_SEQUENCE; } else { final byte[] bases = StringUtil.stringToBytes(value); - SAMUtils.normalizeBases(bases); + if (bases != null) { + SAMUtils.normalizeBases(bases); + } setReadBases(bases); } } @@ -262,7 +264,8 @@ public void setReadBases(final byte[] value) { * @return number of bases in the read. */ public int getReadLength() { - return getReadBases().length; + final byte[] readBases = getReadBases(); + return readBases == null ? 0 : readBases.length; } /** @@ -572,14 +575,16 @@ protected static String resolveNameFromIndex(final int referenceIndex, final SAM } /** - * @return 1-based inclusive leftmost position of the clipped sequence, or 0 if there is no position. + * @return 1-based inclusive leftmost position of the sequence remaining after clipping, or 0 + * if there is no position, e.g. for unmapped read. */ public int getAlignmentStart() { return mAlignmentStart; } /** - * @param value 1-based inclusive leftmost position of the clipped sequence, or 0 if there is no position. + * @param value 1-based inclusive leftmost position of the sequence remaining after clipping or 0 + * if there is no position, e.g. for unmapped read. */ public void setAlignmentStart(final int value) { mAlignmentStart = value; @@ -590,7 +595,8 @@ public void setAlignmentStart(final int value) { } /** - * @return 1-based inclusive rightmost position of the clipped sequence, or 0 read if unmapped. + * @return 1-based inclusive rightmost position of the sequence remaining after clipping or 0 + * if there is no position, e.g. for unmapped read. */ public int getAlignmentEnd() { if (getReadUnmappedFlag()) { @@ -627,38 +633,44 @@ public int getUnclippedEnd() { /** - * @param offset 1-based location within the unclipped sequence or 0 if there is no position. - *

* Non static version of the static function with the same name. - * @return 1-based inclusive reference position of the unclipped sequence at a given offset, + * + * @param position 1-based location within the unclipped sequence + * @return 1-based reference position of the unclipped sequence at a given read position, + * or 0 if there is no position. */ - public int getReferencePositionAtReadPosition(final int offset) { - return getReferencePositionAtReadPosition(this, offset); + public int getReferencePositionAtReadPosition(final int position) { + return getReferencePositionAtReadPosition(this, position); } /** - * @param rec record to use - * @param offset 1-based location within the unclipped sequence - * @return 1-based inclusive reference position of the unclipped sequence at a given offset, - * or 0 if there is no position. + * Returns the 1-based reference position for the provided 1-based position in read. + * * For example, given the sequence NNNAAACCCGGG, cigar 3S9M, and an alignment start of 1, - * and a (1-based)offset 10 (start of GGG) it returns 7 (1-based offset starting after the soft clip. + * and a (1-based) position of 10 (start of GGG) it returns 7 (1-based position starting after + * the soft clip. + * * For example: given the sequence AAACCCGGGTTT, cigar 4M1D6M, an alignment start of 1, - * an offset of 4 returns reference position 4, an offset of 5 returns reference position 6. + * a position of 4, returns reference position 4, a position of 5 returns reference position 6. + * * Another example: given the sequence AAACCCGGGTTT, cigar 4M1I6M, an alignment start of 1, - * an offset of 4 returns reference position 4, an offset of 5 returns 0. + * a position of 4 returns reference position 4, an position of 5 returns 0. + * + * @param rec record to use + * @param position 1-based location within the unclipped sequence + * @return 1-based reference position of the unclipped sequence at a given read position, + * or 0 if there is no position. */ - public static int getReferencePositionAtReadPosition(final SAMRecord rec, final int offset) { - - if (offset == 0) return 0; + public static int getReferencePositionAtReadPosition(final SAMRecord rec, final int position) { + if (position == 0) return 0; for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) { - if (CoordMath.getEnd(alignmentBlock.getReadStart(), alignmentBlock.getLength()) < offset) { + if (CoordMath.getEnd(alignmentBlock.getReadStart(), alignmentBlock.getLength()) < position) { continue; - } else if (offset < alignmentBlock.getReadStart()) { + } else if (position < alignmentBlock.getReadStart()) { return 0; } else { - return alignmentBlock.getReferenceStart() + offset - alignmentBlock.getReadStart(); + return alignmentBlock.getReferenceStart() + position - alignmentBlock.getReadStart(); } } return 0; // offset not located in an alignment block @@ -666,8 +678,9 @@ public static int getReferencePositionAtReadPosition(final SAMRecord rec, final /** + * Returns the 1-based position in the read of the 1-based reference position provided. + * * @param pos 1-based reference position - * return the offset * @return 1-based (to match getReferencePositionAtReadPosition behavior) inclusive position into the * unclipped sequence at a given reference position, or 0 if there is no such position. * @@ -678,37 +691,43 @@ public int getReadPositionAtReferencePosition(final int pos) { } /** + * Non-static version of static function with the same name. See examples below. + * * @param pos 1-based reference position - * @param returnLastBaseIfDeleted if positive, and reference position matches a deleted base in the read, function will - * return the offset + * @param returnLastBaseIfDeleted if positive, and reference position matches a deleted base in the read, + * function will return the offset * @return 1-based (to match getReferencePositionAtReadPosition behavior) inclusive position into the - * unclipped sequence at a given reference position, - * or 0 if there is no such position. If returnLastBaseIfDeleted is true deletions are assumed to "live" on the last read base - * in the preceding block. - * - * Non-static version of static function with the same name. See examples below. + * unclipped sequence at a given reference position, or 0 if there is no such position. If + * returnLastBaseIfDeleted is true deletions are assumed to "live" on the last read base + * in the preceding block. */ public int getReadPositionAtReferencePosition(final int pos, final boolean returnLastBaseIfDeleted) { return getReadPositionAtReferencePosition(this, pos, returnLastBaseIfDeleted); } /** - * @param rec record to use - * @param pos 1-based reference position - * @param returnLastBaseIfDeleted if positive, and reference position matches a deleted base in the read, function will - * return the offset - * @return 1-based (to match getReferencePositionAtReadPosition behavior) inclusive position into the - * unclipped sequence at a given reference position, - * or 0 if there is no such position. If returnLastBaseIfDeleted is true deletions are assumed to "live" on the last read base - * in the preceding block. + * Returns the 1-based position in the read of the provided reference position, or 0 if no + * such position exists. + * * For example, given the sequence NNNAAACCCGGG, cigar 3S9M, and an alignment start of 1, - * and a (1-based)pos of 7 (start of GGG) it returns 10 (1-based offset including the soft clip. + * and a (1-based) pos of 7 (start of GGG) it returns 10 (1-based position including the soft clip). + * * For example: given the sequence AAACCCGGGT, cigar 4M1D6M, an alignment start of 1, - * a reference position of 4 returns offset of 4, a reference of 5 also returns an offset 4 (using "left aligning") if returnLastBaseIfDeleted - * and 0 otherwise. + * a reference position of 4 returns read position 4, a reference position of 5 also returns a read + * position of 4 if returnLastBaseIfDeleted and 0 otherwise. + * * For example: given the sequence AAACtCGGGTT, cigar 4M1I6M, an alignment start of 1, - * a position 4 returns an offset 5, a position of 5 returns 6 (the inserted base is the 5th offset), a position of 11 returns 0 since - * that position in the reference doesn't overlap the read at all. + * a position 4 returns a position of 5, a position of 5 returns 6 (the inserted base is the 5th read position), + * a position of 11 returns 0 since that position in the reference doesn't overlap the read at all. + * + * @param rec record to use + * @param pos 1-based reference position + * @param returnLastBaseIfDeleted if positive, and reference position matches a deleted base in the read, + * function will return the position of the last non-deleted base + * @return 1-based (to match getReferencePositionAtReadPosition behavior) inclusive position into the + * unclipped sequence at a given reference position, or 0 if there is no such position. If + * returnLastBaseIfDeleted is true deletions are assumed to "live" on the last read base + * in the preceding block. * */ public static int getReadPositionAtReferencePosition(final SAMRecord rec, final int pos, final boolean returnLastBaseIfDeleted) { @@ -1500,7 +1519,7 @@ public SAMTagAndValue(final String tag, final Object value) { */ public List getAttributes() { SAMBinaryTagAndValue binaryAttributes = getBinaryAttributes(); - final List ret = new ArrayList(); + final List ret = new ArrayList<>(); while (binaryAttributes != null) { ret.add(new SAMTagAndValue(SAMTagUtil.getSingleton().makeStringTag(binaryAttributes.tag), binaryAttributes.value)); @@ -1750,7 +1769,7 @@ protected void eagerDecode() { /** * Run all validations of CIGAR. These include validation that the CIGAR makes sense independent of * placement, plus validation that CIGAR + placement yields all bases with M operator within the range of the reference. - * @param recordNumber For error reporting. -1 if not known. + * @param recordNumber For error reporting, the record number in the SAM/BAM file. -1 if not known. * @return List of errors, or null if no errors. */ public List validateCigar(final long recordNumber) { @@ -1859,35 +1878,40 @@ public int hashCode() { ArrayList ret = null; if (!getReadPairedFlag()) { if (getProperPairFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_PROPER_PAIR, "Proper pair flag should not be set for unpaired read.", getReadName())); if (firstOnly) return ret; } if (getMateUnmappedFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_MATE_UNMAPPED, "Mate unmapped flag should not be set for unpaired read.", getReadName())); if (firstOnly) return ret; } if (getMateNegativeStrandFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_MATE_NEG_STRAND, "Mate negative strand flag should not be set for unpaired read.", getReadName())); if (firstOnly) return ret; } if (getFirstOfPairFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_FIRST_OF_PAIR, "First of pair flag should not be set for unpaired read.", getReadName())); if (firstOnly) return ret; } if (getSecondOfPairFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_SECOND_OF_PAIR, "Second of pair flag should not be set for unpaired read.", getReadName())); if (firstOnly) return ret; } if (null != getHeader() && getMateReferenceIndex() != NO_ALIGNMENT_REFERENCE_INDEX) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_MATE_REF_INDEX, "MRNM should not be set for unpaired read.", getReadName())); if (firstOnly) return ret; } + if (!getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) { + if (ret == null) ret = new ArrayList<>(); + ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_UNPAIRED_MATE_REFERENCE, "Unpaired read mate reference is " + getMateReferenceName() + " not " + SAMRecord.NO_ALIGNMENT_REFERENCE_NAME + " for unpaired read", getReadName())); + if (firstOnly) return ret; + } } else { final List errors = isValidReferenceIndexAndPosition(mMateReferenceIndex, mMateReferenceName, getMateAlignmentStart(), true, firstOnly); @@ -1918,23 +1942,23 @@ public int hashCode() { */ } if (getInferredInsertSize() > MAX_INSERT_SIZE || getInferredInsertSize() < -MAX_INSERT_SIZE) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_INSERT_SIZE, "Insert size out of range", getReadName())); if (firstOnly) return ret; } if (getReadUnmappedFlag()) { if (getNotPrimaryAlignmentFlag()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_NOT_PRIM_ALIGNMENT, "Not primary alignment flag should not be set for unmapped read.", getReadName())); if (firstOnly) return ret; } if (getSupplementaryAlignmentFlag()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_SUPPLEMENTARY_ALIGNMENT, "Supplementary alignment flag should not be set for unmapped read.", getReadName())); if (firstOnly) return ret; } if (getMappingQuality() != 0) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_MAPPING_QUALITY, "MAPQ should be 0 for unmapped read.", getReadName())); if (firstOnly) return ret; } @@ -1943,22 +1967,22 @@ public int hashCode() { TODO: PIC-97 This validation should be enabled, but probably at this point there are too many BAM files that have the proper pair flag set when read or mate is unmapped. if (getProperPairFlagUnchecked()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_PROPER_PAIR, "Proper pair flag should not be set for unmapped read.", getReadName())); } */ } else { if (getMappingQuality() >= 256) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_MAPPING_QUALITY, "MAPQ should be < 256.", getReadName())); if (firstOnly) return ret; } if (getCigarLength() == 0) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "CIGAR should have > zero elements for mapped read.", getReadName())); /* todo - will uncomment once unit tests are added } else if (getCigar().getReadLength() != getReadLength()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "CIGAR read length " + getCigar().getReadLength() + " doesn't match read length " + getReadLength(), getReadName())); */ if (firstOnly) return ret; @@ -1969,7 +1993,7 @@ public int hashCode() { if (firstOnly) return ret; } if (!hasReferenceName()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_READ_UNMAPPED, "Mapped read should have valid reference name", getReadName())); if (firstOnly) return ret; } @@ -1987,14 +2011,14 @@ public int hashCode() { // Validate the RG ID is found in header final String rgId = (String)getAttribute(SAMTagUtil.getSingleton().RG); if (rgId != null && getHeader() != null && getHeader().getReadGroup(rgId) == null) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.READ_GROUP_NOT_FOUND, "RG ID on SAMRecord not found in header: " + rgId, getReadName())); if (firstOnly) return ret; } final List errors = isValidReferenceIndexAndPosition(mReferenceIndex, mReferenceName, getAlignmentStart(), false); if (errors != null) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.addAll(errors); if (firstOnly) return ret; } @@ -2005,7 +2029,7 @@ public int hashCode() { final String cq = (String)getAttribute(SAMTagUtil.getSingleton().CQ); final String cs = (String)getAttribute(SAMTagUtil.getSingleton().CS); if (cq == null || cq.isEmpty() || cs == null || cs.isEmpty()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.EMPTY_READ, "Zero-length read without FZ, CS or CQ tag", getReadName())); if (firstOnly) return ret; @@ -2019,7 +2043,7 @@ public int hashCode() { } } if (!hasIndel) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.EMPTY_READ, "Colorspace read with zero-length bases but no indel", getReadName())); if (firstOnly) return ret; @@ -2028,7 +2052,7 @@ public int hashCode() { } } if (this.getReadLength() != getBaseQualities().length && !Arrays.equals(getBaseQualities(), NULL_QUALS)) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_QUALS_LENGTH, "Read length does not match quals length", getReadName())); if (firstOnly) return ret; @@ -2036,13 +2060,39 @@ public int hashCode() { if (this.getAlignmentStart() != NO_ALIGNMENT_START && this.getIndexingBin() != null && this.computeIndexingBin() != this.getIndexingBin()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_INDEXING_BIN, "bin field of BAM record does not equal value computed based on alignment start and end, and length of sequence to which read is aligned", getReadName())); if (firstOnly) return ret; } + if (getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME) && + getMateAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) { + if (ret == null) ret = new ArrayList<>(); + ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_UNALIGNED_MATE_START, + "The unaligned mate start position is " + getAlignmentStart() + ", should be " + SAMRecord.NO_ALIGNMENT_START, + getReadName())); + if (firstOnly) return ret; + } + + if (getCigar().getReadLength() != 0 && getCigar().getReadLength() != getReadLength()) { + if (ret == null) ret = new ArrayList<>(); + ret.add(new SAMValidationError(SAMValidationError.Type.MISMATCH_CIGAR_SEQ_LENGTH, + "CIGAR covers " + getCigar().getReadLength() + " bases but the sequence is " + getReadLength() + " read bases ", + getReadName())); + if (firstOnly) return ret; + } + + if (getBaseQualities().length != 0 && getReadLength() != getBaseQualities().length) { + if (ret == null) ret = new ArrayList<>(); + ret.add(new SAMValidationError( + SAMValidationError.Type.MISMATCH_SEQ_QUAL_LENGTH, + "Read length is " + getReadLength() + " bases but have " + mBaseQualities.length + " qualities ", + getReadName())); + if (firstOnly) return ret; + } + if (ret == null || ret.isEmpty()) { return null; } @@ -2080,13 +2130,13 @@ protected void setFileSource(final SAMFileSource fileSource) { ArrayList ret = null; if (!hasReference) { if (alignmentStart != 0) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_ALIGNMENT_START, buildMessage("Alignment start should be 0 because reference name = *.", isMate), getReadName())); if (firstOnly) return ret; } } else { if (alignmentStart == 0) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_ALIGNMENT_START, buildMessage("Alignment start should != 0 because reference name != *.", isMate), getReadName())); if (firstOnly) return ret; } @@ -2094,12 +2144,12 @@ protected void setFileSource(final SAMFileSource fileSource) { final SAMSequenceRecord sequence = (referenceIndex != null? getHeader().getSequence(referenceIndex): getHeader().getSequence(referenceName)); if (sequence == null) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_REFERENCE_INDEX, buildMessage("Reference sequence not found in sequence dictionary.", isMate), getReadName())); if (firstOnly) return ret; } else { if (alignmentStart > sequence.getSequenceLength()) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_ALIGNMENT_START, buildMessage("Alignment start (" + alignmentStart + ") must be <= reference sequence length (" + sequence.getSequenceLength() + ") on reference " + sequence.getSequenceName(), isMate), getReadName())); if (firstOnly) return ret; diff --git a/src/main/java/htsjdk/samtools/SAMRecordCoordinateComparator.java b/src/main/java/htsjdk/samtools/SAMRecordCoordinateComparator.java index e8887bc46..fe054b40b 100644 --- a/src/main/java/htsjdk/samtools/SAMRecordCoordinateComparator.java +++ b/src/main/java/htsjdk/samtools/SAMRecordCoordinateComparator.java @@ -43,6 +43,7 @@ public class SAMRecordCoordinateComparator implements SAMRecordComparator, Serializable { private static final long serialVersionUID = 1L; + @Override public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) { int cmp = fileOrderCompare(samRecord1, samRecord2); if (cmp != 0) { @@ -83,6 +84,7 @@ private int compareInts(int i1, int i2) { * * @return negative if samRecord1 < samRecord2, 0 if equal, else positive */ + @Override public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) { if (null == samRecord1.getHeader() || null == samRecord2.getHeader()) { diff --git a/src/main/java/htsjdk/samtools/SAMRecordDuplicateComparator.java b/src/main/java/htsjdk/samtools/SAMRecordDuplicateComparator.java index 4ed2bb52d..436ba3c0a 100644 --- a/src/main/java/htsjdk/samtools/SAMRecordDuplicateComparator.java +++ b/src/main/java/htsjdk/samtools/SAMRecordDuplicateComparator.java @@ -220,6 +220,7 @@ private boolean pairedEndAndBothMapped(final SAMRecord record) { * If both reads are paired and both ends mapped, always prefer the first end over the second end. This is needed to * properly choose the first end for optical duplicate identification when both ends are mapped to the same position etc. */ + @Override public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) { populateTransientAttributes(samRecord1, samRecord2); int cmp; @@ -357,6 +358,7 @@ public int duplicateSetCompare(final SAMRecord samRecord1, final SAMRecord samRe /** * Less stringent than duplicateSetCompare, such that two records are equal enough such that their ordering in a sorted SAM file would be arbitrary. */ + @Override public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) { return fileOrderCompare(samRecord1, samRecord2, false, true); } diff --git a/src/main/java/htsjdk/samtools/SAMRecordQueryNameComparator.java b/src/main/java/htsjdk/samtools/SAMRecordQueryNameComparator.java index 7fd97f5b5..d2f7cdea9 100644 --- a/src/main/java/htsjdk/samtools/SAMRecordQueryNameComparator.java +++ b/src/main/java/htsjdk/samtools/SAMRecordQueryNameComparator.java @@ -31,6 +31,7 @@ public class SAMRecordQueryNameComparator implements SAMRecordComparator, Serializable { private static final long serialVersionUID = 1L; + @Override public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) { int cmp = fileOrderCompare(samRecord1, samRecord2); if (cmp != 0) { @@ -75,6 +76,7 @@ public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) { * * @return negative if samRecord1 < samRecord2, 0 if equal, else positive */ + @Override public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) { return compareReadNames(samRecord1.getReadName(), samRecord2.getReadName()); } diff --git a/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java b/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java index 2af91c30f..b55265f71 100644 --- a/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java +++ b/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java @@ -193,16 +193,21 @@ public void addRecord(final SAMRecord record) { } /** Returns a CloseableIterator over the collection of SAMRecords. */ + @Override public CloseableIterator iterator() { return new CloseableIterator() { private final Iterator iterator = records.iterator(); + @Override public void close() { /** Do nothing. */} + @Override public boolean hasNext() { return this.iterator.hasNext(); } + @Override public SAMRecord next() { return this.iterator.next(); } + @Override public void remove() { this.iterator.remove(); } }; } @@ -358,13 +363,8 @@ public void addPair(final String name, final int contig, final int start1, final end1.setMappingQuality(255); end1.setReadPairedFlag(true); end1.setProperPairFlag(true); - end1.setMateReferenceIndex(contig); - end1.setAttribute(SAMTag.MC.name(), readLength + "M"); - end1.setMateAlignmentStart(start2); - end1.setMateNegativeStrandFlag(true); end1.setFirstOfPairFlag(end1IsFirstOfPair); end1.setSecondOfPairFlag(!end1IsFirstOfPair); - end1.setInferredInsertSize((int) CoordMath.getLength(start1, CoordMath.getEnd(start2, this.readLength))); end1.setAttribute(SAMTag.RG.name(), READ_GROUP_ID); if (programRecord != null) { end1.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId()); @@ -383,13 +383,8 @@ public void addPair(final String name, final int contig, final int start1, final end2.setMappingQuality(255); end2.setReadPairedFlag(true); end2.setProperPairFlag(true); - end2.setMateReferenceIndex(contig); - end2.setAttribute(SAMTag.MC.name(), readLength + "M"); - end2.setMateAlignmentStart(start1); - end2.setMateNegativeStrandFlag(false); end2.setFirstOfPairFlag(!end1IsFirstOfPair); end2.setSecondOfPairFlag(end1IsFirstOfPair); - end2.setInferredInsertSize(end1.getInferredInsertSize()); end2.setAttribute(SAMTag.RG.name(), READ_GROUP_ID); if (programRecord != null) { end2.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId()); @@ -399,6 +394,9 @@ public void addPair(final String name, final int contig, final int start1, final } fillInBasesAndQualities(end2); + // set mate info + SamPairUtil.setMateInfo(end1, end2, true); + this.records.add(end1); this.records.add(end2); } @@ -487,7 +485,7 @@ public void addUnmappedPair(final String name) { end1.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId()); } if (this.unmappedHasBasesAndQualities) { - fillInBasesAndQualities(end1); + fillInBasesAndQualities(end1); } end2.setReadName(name); @@ -503,7 +501,7 @@ public void addUnmappedPair(final String name) { end2.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId()); } if (this.unmappedHasBasesAndQualities) { - fillInBasesAndQualities(end2); + fillInBasesAndQualities(end2); } this.records.add(end1); diff --git a/src/main/java/htsjdk/samtools/SAMRecordUtil.java b/src/main/java/htsjdk/samtools/SAMRecordUtil.java index d778789d7..9435934c5 100644 --- a/src/main/java/htsjdk/samtools/SAMRecordUtil.java +++ b/src/main/java/htsjdk/samtools/SAMRecordUtil.java @@ -23,23 +23,28 @@ */ package htsjdk.samtools; -import htsjdk.samtools.util.SequenceUtil; -import htsjdk.samtools.util.StringUtil; - import java.util.Arrays; import java.util.Collection; import java.util.List; /** * - * Use {@link SAMRecord#reverseComplement()} instead, which defaults to making a copy of attributes for reverse - * complement rather than changing them in-place. - * * @author alecw@broadinstitute.org + * + * @deprecated 10/27/2016 Use {@link SAMRecord} constants and functions */ @Deprecated public class SAMRecordUtil { + /** + * @deprecated 6/5/2017 Use {@link SAMRecord#TAGS_TO_REVERSE_COMPLEMENT} + */ + @Deprecated public static List TAGS_TO_REVERSE_COMPLEMENT = Arrays.asList(SAMTag.E2.name(), SAMTag.SQ.name()); + + /** + * @deprecated 6/5/2017 Use {@link SAMRecord#TAGS_TO_REVERSE} + */ + @Deprecated public static List TAGS_TO_REVERSE = Arrays.asList(SAMTag.OQ.name(), SAMTag.U2.name()); /** @@ -48,7 +53,11 @@ * or attributes. If a copy is needed use {@link #reverseComplement(SAMRecord, boolean)}. * See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE} * for the default set of tags that are handled. + * + * @deprecated 6/5/2017 Use {@link SAMRecord#reverseComplement} but note that the default behavior there is different + * It will default to making a copy, not reverse-complementing in-place! */ + @Deprecated public static void reverseComplement(final SAMRecord rec) { rec.reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, true); } @@ -61,7 +70,10 @@ public static void reverseComplement(final SAMRecord rec) { * * @param rec Record to reverse complement. * @param inplace Setting this to false will clone all attributes, bases and qualities before changing the values. + * + * @deprecated 6/5/2017 Use {@link SAMRecord#reverseComplement} */ + @Deprecated public static void reverseComplement(final SAMRecord rec, boolean inplace) { rec.reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, inplace); } @@ -70,7 +82,10 @@ public static void reverseComplement(final SAMRecord rec, boolean inplace) { * Reverse complement bases and reverse quality scores. In addition reverse complement any * non-null attributes specified by tagsToRevcomp and reverse and non-null attributes * specified by tagsToReverse. + * + * @deprecated 6/5/2017 Use {@link SAMRecord#reverseComplement} */ + @Deprecated public static void reverseComplement(final SAMRecord rec, final Collection tagsToRevcomp, final Collection tagsToReverse, boolean inplace) { rec.reverseComplement(tagsToRevcomp, tagsToReverse, inplace); } diff --git a/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java b/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java index b7744d796..86ffa6c9f 100644 --- a/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java +++ b/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java @@ -29,7 +29,6 @@ import java.math.BigInteger; import java.security.MessageDigest; import java.util.*; -import java.util.stream.Collector; import java.util.stream.Collectors; import javax.xml.bind.annotation.XmlElement; @@ -50,8 +49,8 @@ getter because the later wraps the list into an unmodifiable List see http://tech.joshuacummings.com/2010/10/problems-with-defensive-collection.html */ @XmlElement(name="Reference") - private List mSequences = new ArrayList(); - private final Map mSequenceMap = new HashMap(); + private List mSequences = new ArrayList<>(); + private final Map mSequenceMap = new HashMap<>(); public SAMSequenceDictionary() { } @@ -150,7 +149,7 @@ public boolean isEmpty() { private static String DICT_MISMATCH_TEMPLATE = "SAM dictionaries are not the same: %s."; /** * Non-comprehensive {@link #equals(Object)}-assertion: instead of calling {@link SAMSequenceRecord#equals(Object)} on constituent - * {@link SAMSequenceRecord}s in this dictionary against its pair in the target dictionary, in order, call + * {@link SAMSequenceRecord}s in this dictionary against its pair in the target dictionary, in order, call * {@link SAMSequenceRecord#isSameSequence(SAMSequenceRecord)}. * Aliases are ignored. * @@ -161,20 +160,49 @@ public void assertSameDictionary(final SAMSequenceDictionary that) { final Iterator thatSequences = that.mSequences.iterator(); for (final SAMSequenceRecord thisSequence : mSequences) { - if (!thatSequences.hasNext()) + if (!thatSequences.hasNext()) { throw new AssertionError(String.format(DICT_MISMATCH_TEMPLATE, thisSequence + " is present in only one dictionary")); - else { + } else { final SAMSequenceRecord thatSequence = thatSequences.next(); - if(!thatSequence.isSameSequence(thisSequence)) + if(!thatSequence.isSameSequence(thisSequence)) { throw new AssertionError( String.format(DICT_MISMATCH_TEMPLATE, thatSequence + " was found when " + thisSequence + " was expected") ); + } } } if (thatSequences.hasNext()) throw new AssertionError(String.format(DICT_MISMATCH_TEMPLATE, thatSequences.next() + " is present in only one dictionary")); } + /** + * Non-comprehensive {@link #equals(Object)}-validation: instead of calling {@link SAMSequenceRecord#equals(Object)} on constituent + * {@link SAMSequenceRecord}s in this dictionary against its pair in the target dictionary, in order, call + * {@link SAMSequenceRecord#isSameSequence(SAMSequenceRecord)}. + * + * @param that {@link SAMSequenceDictionary} to compare against + * @return true if the dictionaries are the same, false otherwise + * + */ + public boolean isSameDictionary(final SAMSequenceDictionary that) { + if (that == null || that.mSequences == null) return false; + if (this == that) return true; + + final Iterator thatSequences = that.mSequences.iterator(); + for (final SAMSequenceRecord thisSequence : mSequences) { + if (!thatSequences.hasNext()) { + return false; + } else { + final SAMSequenceRecord thatSequence = thatSequences.next(); + if (!thatSequence.isSameSequence(thisSequence)) { + return false; + } + } + } + + return !thatSequences.hasNext(); + } + /** returns true if the two dictionaries are the same, aliases are NOT considered */ @Override public boolean equals(Object o) { @@ -183,9 +211,7 @@ public boolean equals(Object o) { SAMSequenceDictionary that = (SAMSequenceDictionary) o; - if (!mSequences.equals(that.mSequences)) return false; - - return true; + return mSequences.equals(that.mSequences); } /** @@ -318,8 +344,8 @@ static public SAMSequenceDictionary mergeDictionaries(final SAMSequenceDictionar finalDict.addSequence(sMerged); final Set allTags = new HashSet<>(); - s1.getAttributes().stream().forEach(a -> allTags.add(a.getKey())); - s2.getAttributes().stream().forEach(a -> allTags.add(a.getKey())); + s1.getAttributes().forEach(a -> allTags.add(a.getKey())); + s2.getAttributes().forEach(a -> allTags.add(a.getKey())); for (final String tag : allTags) { final String value1 = s1.getAttribute(tag); diff --git a/src/main/java/htsjdk/samtools/SAMSequenceRecord.java b/src/main/java/htsjdk/samtools/SAMSequenceRecord.java index 6bca979cc..a4b4df236 100644 --- a/src/main/java/htsjdk/samtools/SAMSequenceRecord.java +++ b/src/main/java/htsjdk/samtools/SAMSequenceRecord.java @@ -23,6 +23,9 @@ */ package htsjdk.samtools; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.bind.annotation.XmlValue; import java.math.BigInteger; import java.net.URI; import java.net.URISyntaxException; @@ -32,10 +35,6 @@ import java.util.Set; import java.util.regex.Pattern; -import javax.xml.bind.annotation.XmlAttribute; -import javax.xml.bind.annotation.XmlRootElement; -import javax.xml.bind.annotation.XmlValue; - /** * Header information about a reference sequence. Corresponds to @SQ header record in SAM text header. */ @@ -194,10 +193,12 @@ public int hashCode() { return mSequenceName != null ? mSequenceName.hashCode() : 0; } + @Override Set getStandardTags() { return STANDARD_TAGS; } + @Override public final SAMSequenceRecord clone() { final SAMSequenceRecord ret = new SAMSequenceRecord(this.mSequenceName, this.mSequenceLength); ret.mSequenceIndex = this.mSequenceIndex; @@ -244,5 +245,10 @@ public String toString() { getAssembly() ); } + + @Override + public String getSAMString() { + return new SAMTextHeaderCodec().getSQLine(this); + } } diff --git a/src/main/java/htsjdk/samtools/SAMTestUtil.java b/src/main/java/htsjdk/samtools/SAMTestUtil.java index 83766f367..ec85ce2da 100644 --- a/src/main/java/htsjdk/samtools/SAMTestUtil.java +++ b/src/main/java/htsjdk/samtools/SAMTestUtil.java @@ -23,6 +23,8 @@ */ package htsjdk.samtools; +import java.util.List; + /** * Misc methods for SAM-related unit tests. These are in the src tree rather than the tests tree * so that they will be included in sam.jar, and therefore can be used by tests outside of htsjdk.samtools. @@ -55,47 +57,21 @@ public void assertPairValid(final SAMRecord firstEnd, final SAMRecord secondEnd) } /** - * Basic sanity check for a SAMRecord. - * @throws SanityCheckFailedException if the sanity check failed + * Basic sanity check for a SAMRecord. Print errors to screen. + * @param read SAM record + * @throws IllegalArgumentException if read is null + * @throws SanityCheckFailedException if errors */ - public void assertReadValid(final SAMRecord read) throws SanityCheckFailedException { - assertEquals(read.getReadBases().length, read.getBaseQualities().length); - // Note that it is possible to have an unmapped read that has a coordinate - if (read.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) { - assertEquals(read.getAlignmentStart(), SAMRecord.NO_ALIGNMENT_START); - assertTrue(read.getReadUnmappedFlag()); - } else { - assertNotSame(read.getAlignmentStart(), SAMRecord.NO_ALIGNMENT_START); - } - if (read.getReadUnmappedFlag()) { - assertEquals(read.getMappingQuality(), SAMRecord.NO_MAPPING_QUALITY); - assertEquals(read.getCigar().getCigarElements().size(), 0); - } else { - assertNotSame(read.getCigar().getCigarElements(), 0); + public static void assertReadValid(final SAMRecord read) throws SanityCheckFailedException { + if (read == null) { + throw new IllegalArgumentException("SAMRecord is null"); } - if (read.getReadPairedFlag()) { - if (read.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) { - assertEquals(read.getMateAlignmentStart(), SAMRecord.NO_ALIGNMENT_START); - assertTrue(read.getMateUnmappedFlag()); - } else { - // Even if the mate is unmapped, if it has a reference name, it should have a position. - assertNotSame(read.getMateAlignmentStart(), SAMRecord.NO_ALIGNMENT_START); - } - if (read.getReadUnmappedFlag() || read.getMateUnmappedFlag() || - !read.getReferenceName().equals(read.getMateReferenceName())) { - assertEquals(read.getInferredInsertSize(), 0); - } else { - assertNotSame(read.getInferredInsertSize(), 0); - } - if (!read.getReadUnmappedFlag() && !read.getMateUnmappedFlag()) { - assertNotSame(read.getReadNegativeStrandFlag(), read.getMateNegativeStrandFlag()); - assertNotSame(read.getMateNegativeStrandFlag(), - read.getReadName()); - } - } else { - assertEquals(read.getInferredInsertSize(), 0); + final List errors = read.isValid(false); + if ( errors != null) { + errors.forEach(v -> System.out.println(v.toString())); } + assertTrue(errors.isEmpty()); } private static void assertEquals(T a, T b) { diff --git a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java index fb4b02ac3..908e8360b 100644 --- a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java +++ b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java @@ -228,6 +228,25 @@ private void parseHDLine(final ParsedHeaderLine parsedHeaderLine) { if (!parsedHeaderLine.requireTag(SAMFileHeader.VERSION_TAG)) { return; } + + final String soString = parsedHeaderLine.getValue(SAMFileHeader.SORT_ORDER_TAG); + try { + if (soString != null) SAMFileHeader.SortOrder.valueOf(soString); + } catch (IllegalArgumentException e) { + reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() + + " line has non-conforming SO tag value: "+ soString + ".", + SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null); + } + + final String goString = parsedHeaderLine.getValue(SAMFileHeader.GROUP_ORDER_TAG); + try { + if (goString != null) SAMFileHeader.GroupOrder.valueOf(goString); + } catch (IllegalArgumentException e) { + reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() + + " line has non-conforming GO tag value: "+ goString + ".", + SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null); + } + transferAttributes(mFileHeader, parsedHeaderLine.mKeyValuePairs); } @@ -429,22 +448,27 @@ private void println(final String s) { } private void writePGLine(final SAMProgramRecord programRecord) { - if (programRecord == null) { - return; - } + println(getPGLine(programRecord)); + } + + protected String getPGLine(final SAMProgramRecord programRecord) { final String[] fields = new String[2 + programRecord.getAttributes().size()]; fields[0] = HEADER_LINE_START + HeaderRecordType.PG; fields[1] = SAMProgramRecord.PROGRAM_GROUP_ID_TAG + TAG_KEY_VALUE_SEPARATOR + programRecord.getProgramGroupId(); encodeTags(programRecord, fields, 2); - println(StringUtil.join(FIELD_SEPARATOR, fields)); + return StringUtil.join(FIELD_SEPARATOR, fields); } private void writeRGLine(final SAMReadGroupRecord readGroup) { - final String[] fields = new String[2 + readGroup.getAttributes().size()]; - fields[0] = HEADER_LINE_START + HeaderRecordType.RG; - fields[1] = SAMReadGroupRecord.READ_GROUP_ID_TAG + TAG_KEY_VALUE_SEPARATOR + readGroup.getReadGroupId(); - encodeTags(readGroup, fields, 2); - println(StringUtil.join(FIELD_SEPARATOR, fields)); + println(getRGLine(readGroup)); + } + + protected String getRGLine(final SAMReadGroupRecord readGroup) { + final String[] fields = new String[2 + readGroup.getAttributes().size()]; + fields[0] = HEADER_LINE_START + HeaderRecordType.RG; + fields[1] = SAMReadGroupRecord.READ_GROUP_ID_TAG + TAG_KEY_VALUE_SEPARATOR + readGroup.getReadGroupId(); + encodeTags(readGroup, fields, 2); + return StringUtil.join(FIELD_SEPARATOR, fields); } private void writeHDLine(final boolean keepExistingVersionNumber) { @@ -470,13 +494,17 @@ private void writeHDLine(final boolean keepExistingVersionNumber) { } private void writeSQLine(final SAMSequenceRecord sequenceRecord) { + println(getSQLine(sequenceRecord)); + } + + protected String getSQLine(final SAMSequenceRecord sequenceRecord) { final int numAttributes = sequenceRecord.getAttributes() != null ? sequenceRecord.getAttributes().size() : 0; final String[] fields = new String[3 + numAttributes]; fields[0] = HEADER_LINE_START + HeaderRecordType.SQ; fields[1] = SAMSequenceRecord.SEQUENCE_NAME_TAG + TAG_KEY_VALUE_SEPARATOR + sequenceRecord.getSequenceName(); fields[2] = SAMSequenceRecord.SEQUENCE_LENGTH_TAG + TAG_KEY_VALUE_SEPARATOR + Integer.toString(sequenceRecord.getSequenceLength()); encodeTags(sequenceRecord, fields, 3); - println(StringUtil.join(FIELD_SEPARATOR, fields)); + return StringUtil.join(FIELD_SEPARATOR, fields); } /** diff --git a/src/main/java/htsjdk/samtools/SAMTextReader.java b/src/main/java/htsjdk/samtools/SAMTextReader.java index 3968f1cc9..62f871752 100644 --- a/src/main/java/htsjdk/samtools/SAMTextReader.java +++ b/src/main/java/htsjdk/samtools/SAMTextReader.java @@ -79,22 +79,27 @@ public SAMTextReader(final InputStream stream, final File file, final Validation * * @param enabled true to write source information into each SAMRecord. */ + @Override public void enableFileSource(final SamReader reader, final boolean enabled) { this.mParentReader = enabled ? reader : null; } + @Override void enableIndexCaching(final boolean enabled) { throw new UnsupportedOperationException("Cannot enable index caching for a SAM text reader"); } + @Override void enableIndexMemoryMapping(final boolean enabled) { throw new UnsupportedOperationException("Cannot enable index memory mapping for a SAM text reader"); } + @Override void enableCrcChecking(final boolean enabled) { // Do nothing - this has no meaning for SAM reading } + @Override void setSAMRecordFactory(final SAMRecordFactory factory) { this.samRecordFactory = factory; } @@ -104,14 +109,17 @@ void setSAMRecordFactory(final SAMRecordFactory factory) { return SamReader.Type.SAM_TYPE; } + @Override public boolean hasIndex() { return false; } + @Override public BAMIndex getIndex() { throw new UnsupportedOperationException(); } + @Override public void close() { if (mReader != null) { try { @@ -122,14 +130,17 @@ public void close() { } } + @Override public SAMFileHeader getFileHeader() { return mFileHeader; } + @Override public ValidationStringency getValidationStringency() { return validationStringency; } + @Override public void setValidationStringency(final ValidationStringency stringency) { this.validationStringency = stringency; } @@ -141,6 +152,7 @@ public void setValidationStringency(final ValidationStringency stringency) { * * @return Iterator of SAMRecords in file order. */ + @Override public CloseableIterator getIterator() { if (mReader == null) { throw new IllegalStateException("File reader is closed"); @@ -158,6 +170,7 @@ public void setValidationStringency(final ValidationStringency stringency) { * @param fileSpan The file span. * @return An iterator over the given file span. */ + @Override public CloseableIterator getIterator(final SAMFileSpan fileSpan) { throw new UnsupportedOperationException("Cannot directly iterate over regions within SAM text files."); } @@ -167,6 +180,7 @@ public void setValidationStringency(final ValidationStringency stringency) { * * @return An pointer to the first read in the file. */ + @Override public SAMFileSpan getFilePointerSpanningReads() { throw new UnsupportedOperationException("Cannot retrieve file pointers within SAM text files."); } @@ -186,10 +200,12 @@ public SAMFileSpan getFilePointerSpanningReads() { /** * Unsupported for SAM text files. */ + @Override public CloseableIterator queryAlignmentStart(final String sequence, final int start) { throw new UnsupportedOperationException("Cannot query SAM text files"); } + @Override public CloseableIterator queryUnmapped() { throw new UnsupportedOperationException("Cannot query SAM text files"); } @@ -220,14 +236,17 @@ private RecordIterator() { } } + @Override public void close() { SAMTextReader.this.close(); } + @Override public boolean hasNext() { return mCurrentLine != null; } + @Override public SAMRecord next() { if (!hasNext()) { throw new IllegalStateException("Cannot call next() on exhausted iterator"); @@ -239,6 +258,7 @@ public SAMRecord next() { } } + @Override public void remove() { throw new UnsupportedOperationException("Not supported: remove"); } diff --git a/src/main/java/htsjdk/samtools/SAMTextWriter.java b/src/main/java/htsjdk/samtools/SAMTextWriter.java index 0786d670c..70dd4a229 100644 --- a/src/main/java/htsjdk/samtools/SAMTextWriter.java +++ b/src/main/java/htsjdk/samtools/SAMTextWriter.java @@ -122,6 +122,7 @@ public SAMTextWriter(final OutputStream stream, final SamFlagField samFlagFieldO * * @param alignment SAMRecord. */ + @Override public void writeAlignment(final SAMRecord alignment) { try { out.write(alignment.getReadName()); @@ -188,6 +189,7 @@ static synchronized String getSAMString(final SAMRecord alignment) { * * @param textHeader String containing the text to write. */ + @Override public void writeHeader(final String textHeader) { try { out.write(textHeader); @@ -199,6 +201,7 @@ public void writeHeader(final String textHeader) { /** * Do any required flushing here. */ + @Override public void finish() { try { out.close(); @@ -212,6 +215,7 @@ public void finish() { * * @return Output filename, or null if there isn't one. */ + @Override public String getFilename() { if (file == null) { return null; diff --git a/src/main/java/htsjdk/samtools/SAMTools.java b/src/main/java/htsjdk/samtools/SAMTools.java index 551f846d6..911198e0a 100644 --- a/src/main/java/htsjdk/samtools/SAMTools.java +++ b/src/main/java/htsjdk/samtools/SAMTools.java @@ -31,7 +31,10 @@ /** * Command line utility for manipulating SAM/BAM files. + * @deprecated since 07/2017. This class does not add anything to the HTSJDK library except an example of how to iterate over a SAM/BAM file. + * In addition, it is not tested. */ +@Deprecated public class SAMTools { private String mCommand = null; private File mInputFile = null; diff --git a/src/main/java/htsjdk/samtools/SAMUtils.java b/src/main/java/htsjdk/samtools/SAMUtils.java index 25b6799c7..5b81de979 100644 --- a/src/main/java/htsjdk/samtools/SAMUtils.java +++ b/src/main/java/htsjdk/samtools/SAMUtils.java @@ -43,14 +43,17 @@ import java.util.TreeMap; import java.util.regex.Pattern; - /** * Utilty methods. */ public final class SAMUtils { - /** regex for semicolon, used in {@link SAMUtils#getOtherCanonicalAlignments(SAMRecord)} */ + /** + * regex for semicolon, used in {@link SAMUtils#getOtherCanonicalAlignments(SAMRecord)} + */ private static final Pattern SEMICOLON_PAT = Pattern.compile("[;]"); - /** regex for comma, used in {@link SAMUtils#getOtherCanonicalAlignments(SAMRecord)} */ + /** + * regex for comma, used in {@link SAMUtils#getOtherCanonicalAlignments(SAMRecord)} + */ private static final Pattern COMMA_PAT = Pattern.compile("[,]"); // Representation of bases, one for when in low-order nybble, one for when in high-order nybble. @@ -87,32 +90,31 @@ private static final byte COMPRESSED_K_HIGH = (byte) (COMPRESSED_K_LOW << 4); private static final byte COMPRESSED_D_HIGH = (byte) (COMPRESSED_D_LOW << 4); private static final byte COMPRESSED_B_HIGH = (byte) (COMPRESSED_B_LOW << 4); - - private static final byte [] COMPRESSED_LOOKUP_TABLE = - new byte[]{ - '=', - 'A', - 'C', - 'M', - 'G', - 'R', - 'S', - 'V', - 'T', - 'W', - 'Y', - 'H', - 'K', - 'D', - 'B', - 'N' - }; - + + private static final byte[] COMPRESSED_LOOKUP_TABLE = { + '=', + 'A', + 'C', + 'M', + 'G', + 'R', + 'S', + 'V', + 'T', + 'W', + 'Y', + 'H', + 'K', + 'D', + 'B', + 'N' + }; + public static final int MAX_PHRED_SCORE = 93; /** - * Convert from a byte array containing =AaCcGgTtNn represented as ASCII, to a byte array half as long, - * with =, A, C, G, T converted to 0, 1, 2, 4, 8, 15. + * Convert from a byte array containing =AaCcGgTtNnMmRrSsVvWwYyHhKkDdBb represented as ASCII, to a byte array half as long, + * with for example, =, A, C, G, T converted to 0, 1, 2, 4, 8, 15. * * @param readBases Bases as ASCII bytes. * @return New byte array with bases represented as nybbles, in BAM binary format. @@ -126,17 +128,17 @@ } // Last nybble if (i == readBases.length) { - compressedBases[i / 2] = charToCompressedBaseHigh((char) readBases[i - 1]); + compressedBases[i / 2] = charToCompressedBaseHigh(readBases[i - 1]); } return compressedBases; } /** - * Convert from a byte array with basese stored in nybbles, with =, A, C, G, T represented as 0, 1, 2, 4, 8, 15, + * Convert from a byte array with bases stored in nybbles, with for example,=, A, C, G, T, N represented as 0, 1, 2, 4, 8, 15, * to a a byte array containing =AaCcGgTtNn represented as ASCII. * - * @param length Number of bases (not bytes) to convert. - * @param compressedBases Bases represented as nybbles, in BAM binary format. + * @param length Number of bases (not bytes) to convert. + * @param compressedBases Bases represented as nybbles, in BAM binary format. * @param compressedOffset Byte offset in compressedBases to start. * @return New byte array with bases as ASCII bytes. */ @@ -158,10 +160,11 @@ /** * Convert from ASCII byte to BAM nybble representation of a base in low-order nybble. * - * @param base One of =AaCcGgTtNn. + * @param base One of =AaCcGgTtNnMmRrSsVvWwYyHhKkDdBb. * @return Low-order nybble-encoded equivalent. + * @throws IllegalArgumentException if the base is not one of =AaCcGgTtNnMmRrSsVvWwYyHhKkDdBb. */ - private static byte charToCompressedBaseLow(final int base) { + private static byte charToCompressedBaseLow(final byte base) { switch (base) { case '=': return COMPRESSED_EQUAL_LOW; @@ -214,17 +217,18 @@ private static byte charToCompressedBaseLow(final int base) { case 'b': return COMPRESSED_B_LOW; default: - throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); + throw new IllegalArgumentException("Bad base passed to charToCompressedBaseLow: " + Character.toString((char) base) + "(" + base + ")"); } } /** * Convert from ASCII byte to BAM nybble representation of a base in high-order nybble. * - * @param base One of =AaCcGgTtNn. + * @param base One of =AaCcGgTtNnMmRrSsVvWwYyHhKkDdBb. * @return High-order nybble-encoded equivalent. + * @throws IllegalArgumentException if the base is not one of =AaCcGgTtNnMmRrSsVvWwYyHhKkDdBb. */ - private static byte charToCompressedBaseHigh(final int base) { + private static byte charToCompressedBaseHigh(final byte base) { switch (base) { case '=': return COMPRESSED_EQUAL_HIGH; @@ -277,20 +281,22 @@ private static byte charToCompressedBaseHigh(final int base) { case 'b': return COMPRESSED_B_HIGH; default: - throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); + throw new IllegalArgumentException("Bad base passed to charToCompressedBaseHigh: " + Character.toString((char) base) + "(" + base + ")"); } } - + /** * Returns the byte corresponding to a certain nybble + * * @param base One of COMPRESSED_*_LOW, a low-order nybble encoded base. - * @return ASCII base, one of ACGTN=. + * @return ASCII base, one of =ACGTNMRSVWYHKDB. + * @throws IllegalArgumentException if the base is not one of =ACGTNMRSVWYHKDB. */ - private static byte compressedBaseToByte(byte base){ - try{ + private static byte compressedBaseToByte(byte base) { + try { return COMPRESSED_LOOKUP_TABLE[base]; - }catch(IndexOutOfBoundsException e){ - throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); + } catch (IndexOutOfBoundsException e) { + throw new IllegalArgumentException("Bad base passed to charToCompressedBase: " + Character.toString((char) base) + "(" + base + ")"); } } @@ -301,7 +307,7 @@ private static byte compressedBaseToByte(byte base){ * @return ASCII base, one of ACGTN=. */ private static byte compressedBaseToByteLow(final int base) { - return compressedBaseToByte((byte)(base & 0xf)); + return compressedBaseToByte((byte) (base & 0xf)); } /** @@ -311,13 +317,13 @@ private static byte compressedBaseToByteLow(final int base) { * @return ASCII base, one of ACGTN=. */ private static byte compressedBaseToByteHigh(final int base) { - return compressedBaseToByte((byte)((base >> 4) & 0xf)); + return compressedBaseToByte((byte) ((base >> 4) & 0xf)); } /** * Convert bases in place into canonical form, upper case, and with no-call represented as N. * - * @param bases + * @param bases byte array of bases to "normalize", in place. */ static void normalizeBases(final byte[] bases) { for (int i = 0; i < bases.length; ++i) { @@ -431,11 +437,11 @@ static int reg2bin(final int beg, final int end) { /** * Handle a list of validation errors according to the validation stringency. * - * @param validationErrors List of errors to report, or null if there are no errors. - * @param samRecordIndex Record number of the SAMRecord corresponding to the validation errors, or -1 if - * the record number is not known. + * @param validationErrors List of errors to report, or null if there are no errors. + * @param samRecordIndex Record number of the SAMRecord corresponding to the validation errors, or -1 if + * the record number is not known. * @param validationStringency If STRICT, throw a SAMFormatException. If LENIENT, print the validation - * errors to stderr. If SILENT, do nothing. + * errors to stderr. If SILENT, do nothing. */ public static void processValidationErrors(final List validationErrors, final long samRecordIndex, @@ -461,11 +467,10 @@ public static void processValidationError(final SAMValidationError validationErr } else if (validationStringency == ValidationStringency.LENIENT) { System.err.println("Ignoring SAM validation error: " + validationError); } - } private static final SAMHeaderRecordComparator HEADER_RECORD_COMPARATOR = - new SAMHeaderRecordComparator( + new SAMHeaderRecordComparator<>( SAMReadGroupRecord.PLATFORM_UNIT_TAG, SAMReadGroupRecord.LIBRARY_TAG, SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG, @@ -473,7 +478,8 @@ public static void processValidationError(final SAMValidationError validationErr SAMReadGroupRecord.SEQUENCING_CENTER_TAG, SAMReadGroupRecord.PLATFORM_TAG, SAMReadGroupRecord.DESCRIPTION_TAG, - SAMReadGroupRecord.READ_GROUP_ID_TAG // We don't actually want to compare with ID but it's suitable + SAMReadGroupRecord.READ_GROUP_ID_TAG + // We don't actually want to compare with ID but it's suitable // "just in case" since it's the only one that's actually required ); @@ -494,11 +500,11 @@ public static String calculateReadGroupRecordChecksum(final File input, final Fi // Sort the read group records by their first final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(input); - final List sortedRecords = new ArrayList(reader.getFileHeader().getReadGroups()); + final List sortedRecords = new ArrayList<>(reader.getFileHeader().getReadGroups()); Collections.sort(sortedRecords, HEADER_RECORD_COMPARATOR); for (final SAMReadGroupRecord rgRecord : sortedRecords) { - final TreeMap sortedAttributes = new TreeMap(); + final TreeMap sortedAttributes = new TreeMap<>(); for (final Map.Entry attributeEntry : rgRecord.getAttributes()) { sortedAttributes.put(attributeEntry.getKey(), attributeEntry.getValue()); } @@ -536,7 +542,7 @@ public static void chainSAMProgramRecord(final SAMFileHeader header, final SAMPr final List pgs = header.getProgramRecords(); if (!pgs.isEmpty()) { - final List referencedIds = new ArrayList(); + final List referencedIds = new ArrayList<>(); for (final SAMProgramRecord pg : pgs) { if (pg.getPreviousProgramGroupId() != null) { referencedIds.add(pg.getPreviousProgramGroupId()); @@ -557,7 +563,7 @@ public static void chainSAMProgramRecord(final SAMFileHeader header, final SAMPr /** * Strip mapping information from a SAMRecord. - * + *

* WARNING: by clearing the secondary and supplementary flags, * this may have the affect of producing multiple distinct records with the * same read name and flags, which may lead to invalid SAM/BAM output. @@ -565,7 +571,7 @@ public static void chainSAMProgramRecord(final SAMFileHeader header, final SAMPr */ public static void makeReadUnmapped(final SAMRecord rec) { if (rec.getReadNegativeStrandFlag()) { - SAMRecordUtil.reverseComplement(rec); + rec.reverseComplement(true); rec.setReadNegativeStrandFlag(false); } rec.setDuplicateReadFlag(false); @@ -619,13 +625,13 @@ public static boolean cigarMapsNoBasesToRef(final Cigar cigar) { /** * Tests if the provided record is mapped entirely beyond the end of the reference (i.e., the alignment start is greater than the * length of the sequence to which the record is mapped). + * * @param record must not have a null SamFileHeader */ public static boolean recordMapsEntirelyBeyondEndOfReference(final SAMRecord record) { if (record.getHeader() == null) { throw new SAMException("A non-null SAMHeader is required to resolve the mapping position: " + record.getReadName()); - } - else { + } else { return record.getHeader().getSequence(record.getReferenceIndex()).getSequenceLength() < record.getAlignmentStart(); } } @@ -643,7 +649,6 @@ public static int compareMapqs(final int mapq1, final int mapq2) { else return mapq1 - mapq2; } - /** * Hokey algorithm for combining two MAPQs into values that are comparable, being cognizant of the fact * that in MAPQ world, 1 > 255 > 0. In this algorithm, 255 is treated as if it were 0.01, so that @@ -652,11 +657,17 @@ public static int compareMapqs(final int mapq1, final int mapq2) { * invocations of this method. */ public static int combineMapqs(int m1, int m2) { - if (m1 == 255) m1 = 1; - else m1 *= 100; + if (m1 == 255) { + m1 = 1; + } else { + m1 *= 100; + } - if (m2 == 255) m2 = 1; - else m2 *= 100; + if (m2 == 255) { + m2 = 1; + } else { + m2 *= 100; + } return m1 + m2; @@ -679,15 +690,15 @@ public static long findVirtualOffsetOfFirstRecordInBam(final File bamFile) { * reference sequence. Note that clipped portions, and inserted and deleted bases (vs. the reference) * are not represented in the alignment blocks. * - * @param cigar The cigar containing the alignment information + * @param cigar The cigar containing the alignment information * @param alignmentStart The start (1-based) of the alignment - * @param cigarTypeName The type of cigar passed - for error logging. + * @param cigarTypeName The type of cigar passed - for error logging. * @return List of alignment blocks */ public static List getAlignmentBlocks(final Cigar cigar, final int alignmentStart, final String cigarTypeName) { if (cigar == null) return Collections.emptyList(); - final List alignmentBlocks = new ArrayList(); + final List alignmentBlocks = new ArrayList<>(); int readBase = 1; int refBase = alignmentStart; @@ -718,7 +729,7 @@ public static long findVirtualOffsetOfFirstRecordInBam(final File bamFile) { refBase += length; break; default: - throw new IllegalStateException("Case statement didn't deal with " + cigarTypeName + " op: " + e.getOperator()); + throw new IllegalStateException("Case statement didn't deal with " + cigarTypeName + " op: " + e.getOperator() + "in CIGAR: " + cigar); } } return Collections.unmodifiableList(alignmentBlocks); @@ -726,7 +737,7 @@ public static long findVirtualOffsetOfFirstRecordInBam(final File bamFile) { /** * @param alignmentStart The start (1-based) of the alignment - * @param cigar The cigar containing the alignment information + * @param cigar The cigar containing the alignment information * @return the alignment start (1-based, inclusive) adjusted for clipped bases. For example if the read * has an alignment start of 100 but the first 4 bases were clipped (hard or soft clipped) * then this method will return 96. @@ -750,7 +761,7 @@ public static int getUnclippedStart(final int alignmentStart, final Cigar cigar) /** * @param alignmentEnd The end (1-based) of the alignment - * @param cigar The cigar containing the alignment information + * @param cigar The cigar containing the alignment information * @return the alignment end (1-based, inclusive) adjusted for clipped bases. For example if the read * has an alignment end of 100 but the last 7 bases were clipped (hard or soft clipped) * then this method will return 107. @@ -788,7 +799,7 @@ public static String getMateCigarString(final SAMRecord rec) { /** * Returns the Mate Cigar or null if there is none. * - * @param rec the SAM record + * @param rec the SAM record * @param withValidation true if we are to validate the mate cigar before returning, false otherwise. * @return Cigar object for the read's mate, or null if there is none. */ @@ -832,11 +843,11 @@ public static int getMateCigarLength(final SAMRecord rec) { */ public static int getMateAlignmentEnd(final SAMRecord rec) { if (rec.getMateUnmappedFlag()) { - throw new RuntimeException("getMateAlignmentEnd called on an unmapped mate."); + throw new RuntimeException("getMateAlignmentEnd called on an unmapped mate: " + rec); } final Cigar mateCigar = SAMUtils.getMateCigar(rec); if (mateCigar == null) { - throw new SAMException("Mate CIGAR (Tag MC) not found."); + throw new SAMException("Mate CIGAR (Tag MC) not found:" + rec); } return CoordMath.getEnd(rec.getMateAlignmentStart(), mateCigar.getReferenceLength()); } @@ -851,15 +862,14 @@ public static int getMateAlignmentEnd(final SAMRecord rec) { */ public static int getMateUnclippedStart(final SAMRecord rec) { if (rec.getMateUnmappedFlag()) - throw new RuntimeException("getMateUnclippedStart called on an unmapped mate."); + throw new RuntimeException("getMateUnclippedStart called on an unmapped mate: " + rec); final Cigar mateCigar = getMateCigar(rec); if (mateCigar == null) { - throw new SAMException("Mate CIGAR (Tag MC) not found."); + throw new SAMException("Mate CIGAR (Tag MC) not found: " + rec); } return SAMUtils.getUnclippedStart(rec.getMateAlignmentStart(), mateCigar); } - /** * @param rec the SAM record * @return the mate alignment end (1-based, inclusive) adjusted for clipped bases. For example if the mate @@ -870,20 +880,20 @@ public static int getMateUnclippedStart(final SAMRecord rec) { */ public static int getMateUnclippedEnd(final SAMRecord rec) { if (rec.getMateUnmappedFlag()) { - throw new RuntimeException("getMateUnclippedEnd called on an unmapped mate."); + throw new RuntimeException("getMateUnclippedEnd called on an unmapped mate: " + rec); } final Cigar mateCigar = SAMUtils.getMateCigar(rec); if (mateCigar == null) { - throw new SAMException("Mate CIGAR (Tag MC) not found."); + throw new SAMException("Mate CIGAR (Tag MC) not found: " + rec); } return SAMUtils.getUnclippedEnd(getMateAlignmentEnd(rec), mateCigar); } /** * @param rec the SAM record - * Returns blocks of the mate sequence that have been aligned directly to the - * reference sequence. Note that clipped portions of the mate and inserted and - * deleted bases (vs. the reference) are not represented in the alignment blocks. + * Returns blocks of the mate sequence that have been aligned directly to the + * reference sequence. Note that clipped portions of the mate and inserted and + * deleted bases (vs. the reference) are not represented in the alignment blocks. */ public static List getMateAlignmentBlocks(final SAMRecord rec) { return getAlignmentBlocks(getMateCigar(rec), rec.getMateAlignmentStart(), "mate cigar"); @@ -893,12 +903,12 @@ public static int getMateUnclippedEnd(final SAMRecord rec) { * Run all validations of the mate's CIGAR. These include validation that the CIGAR makes sense independent of * placement, plus validation that CIGAR + placement yields all bases with M operator within the range of the reference. * - * @param rec the SAM record - * @param cigar The cigar containing the alignment information - * @param referenceIndex The reference index + * @param rec the SAM record + * @param cigar The cigar containing the alignment information + * @param referenceIndex The reference index * @param alignmentBlocks The alignment blocks (parsed from the cigar) - * @param recordNumber For error reporting. -1 if not known. - * @param cigarTypeName For error reporting. "Read CIGAR" or "Mate Cigar" + * @param recordNumber For error reporting. -1 if not known. + * @param cigarTypeName For error reporting. "Read CIGAR" or "Mate Cigar" * @return List of errors, or null if no errors. */ @@ -913,16 +923,15 @@ public static int getMateUnclippedEnd(final SAMRecord rec) { if (referenceIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { SAMFileHeader samHeader = rec.getHeader(); if (null == samHeader) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.MISSING_HEADER, cigarTypeName + " A non-null SAMHeader is required to validate cigar elements for: ", rec.getReadName(), recordNumber)); - } - else { + } else { final SAMSequenceRecord sequence = samHeader.getSequence(referenceIndex); final int referenceSequenceLength = sequence.getSequenceLength(); for (final AlignmentBlock alignmentBlock : alignmentBlocks) { if (alignmentBlock.getReferenceStart() + alignmentBlock.getLength() - 1 > referenceSequenceLength) { - if (ret == null) ret = new ArrayList(); + if (ret == null) ret = new ArrayList<>(); ret.add(new SAMValidationError(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE, cigarTypeName + " M operator maps off end of reference", rec.getReadName(), recordNumber)); break; @@ -937,7 +946,7 @@ public static int getMateUnclippedEnd(final SAMRecord rec) { * Run all validations of the mate's CIGAR. These include validation that the CIGAR makes sense independent of * placement, plus validation that CIGAR + placement yields all bases with M operator within the range of the reference. * - * @param rec the SAM record + * @param rec the SAM record * @param recordNumber For error reporting. -1 if not known. * @return List of errors, or null if no errors. */ @@ -951,7 +960,7 @@ public static int getMateUnclippedEnd(final SAMRecord rec) { } } else { if (getMateCigarString(rec) != null) { - ret = new ArrayList(); + ret = new ArrayList<>(); if (!rec.getReadPairedFlag()) { // If the read is not paired, and the Mate Cigar String (MC Attribute) exists, that is a validation error ret.add(new SAMValidationError(SAMValidationError.Type.MATE_CIGAR_STRING_INVALID_PRESENCE, @@ -981,11 +990,11 @@ public static boolean hasMateCigar(SAMRecord rec) { } /** - * Returns a string that is the the read group ID and read name separated by a colon. This is meant to cannonically + * Returns a string that is the the read group ID and read name separated by a colon. This is meant to canonically * identify a given record within a set of records. * - * @param record - * @return + * @param record SAMRecord for which "canonical" read name is requested + * @return The record's readgroup-id (if non-null) and the read name, separated by a colon, ':' */ public static String getCanonicalRecordName(final SAMRecord record) { String name = record.getStringAttribute(ReservedTagConstants.READ_GROUP_ID); @@ -999,7 +1008,7 @@ public static String getCanonicalRecordName(final SAMRecord record) { * or the given record's start position is greater than its mate's start position, zero is automatically returned. * NB: This method assumes that the record's mate is not contained within the given record's alignment. * - * @param rec + * @param rec SAMRecord that needs clipping due to overlapping pairs. * @return the number of bases at the end of the read that need to be clipped such that there would be no overlapping bases with its mate. * Read bases include only those from insertion, match, or mismatch Cigar operators. */ @@ -1010,7 +1019,8 @@ public static int getNumOverlappingAlignedBasesToClip(final SAMRecord rec) { // Only clip records that are left-most in genomic order and overlapping. if (rec.getMateAlignmentStart() < rec.getAlignmentStart()) return 0; // right-most, so ignore. - else if (rec.getMateAlignmentStart() == rec.getAlignmentStart() && rec.getFirstOfPairFlag()) return 0; // same start, so pick the first end + else if (rec.getMateAlignmentStart() == rec.getAlignmentStart() && rec.getFirstOfPairFlag()) + return 0; // same start, so pick the first end // Find the number of read bases after the given mate's alignment start. int numBasesToClip = 0; @@ -1023,12 +1033,11 @@ public static int getNumOverlappingAlignedBasesToClip(final SAMRecord rec) { if (refStartPos <= refPos + refBasesLength - 1) { // add to clipped bases if (operator == CigarOperator.MATCH_OR_MISMATCH) { // M if (refStartPos < refPos) numBasesToClip += refBasesLength; // use all of the bases - else numBasesToClip += (refPos + refBasesLength) - refStartPos; // since the mate's alignment start can be in the middle of a cigar element - } - else if (operator == CigarOperator.SOFT_CLIP || operator == CigarOperator.HARD_CLIP || operator == CigarOperator.PADDING || operator == CigarOperator.SKIPPED_REGION) { + else + numBasesToClip += (refPos + refBasesLength) - refStartPos; // since the mate's alignment start can be in the middle of a cigar element + } else if (operator == CigarOperator.SOFT_CLIP || operator == CigarOperator.HARD_CLIP || operator == CigarOperator.PADDING || operator == CigarOperator.SKIPPED_REGION) { // ignore - } - else { // ID + } else { // ID numBasesToClip += operator.consumesReadBases() ? el.getLength() : 0; // clip all the bases in the read from this operator } } @@ -1041,14 +1050,14 @@ else if (operator == CigarOperator.SOFT_CLIP || operator == CigarOperator.HARD_C } /** - * Returns a (possibly new) record that has been clipped if isa mapped paired and has overlapping bases with its mate. + * Returns a (possibly new) record that has been clipped if input is a mapped paired and has overlapping bases with its mate. * See {@link #getNumOverlappingAlignedBasesToClip(SAMRecord)} for how the number of overlapping bases is computed. * NB: this does not properly consider a cigar like: 100M20S10H. * NB: This method assumes that the record's mate is not contained within the given record's alignment. * - * @param record the record from which to clip bases. + * @param record the record from which to clip bases. * @param noSideEffects if true a modified clone of the original record is returned, otherwise we modify the record directly. - * @return + * @return a (possibly new) record that has been clipped */ public static SAMRecord clipOverlappingAlignedBases(final SAMRecord record, final boolean noSideEffects) { return clipOverlappingAlignedBases(record, getNumOverlappingAlignedBasesToClip(record), noSideEffects); @@ -1060,18 +1069,20 @@ public static SAMRecord clipOverlappingAlignedBases(final SAMRecord record, fina * NB: this does not properly consider a cigar like: 100M20S10H. * NB: This method assumes that the record's mate is not contained within the given record's alignment. * - * @param record the record from which to clip bases. + * @param record the record from which to clip bases. * @param numOverlappingBasesToClip the number of bases to clip at the end of the read. - * @param noSideEffects if true a modified clone of the original record is returned, otherwise we modify the record directly. - * @return + * @param noSideEffects if true a modified clone of the original record is returned, otherwise we modify the record directly. + * @return Returns a (possibly new) SAMRecord with the given number of bases soft-clipped */ public static SAMRecord clipOverlappingAlignedBases(final SAMRecord record, final int numOverlappingBasesToClip, final boolean noSideEffects) { // NB: ignores how to handle supplemental records when present for both ends by just using the mate information in the record. - if (numOverlappingBasesToClip <= 0 || record.getReadUnmappedFlag() || record.getMateUnmappedFlag()) return record; + if (numOverlappingBasesToClip <= 0 || record.getReadUnmappedFlag() || record.getMateUnmappedFlag()) { + return record; + } try { - final SAMRecord rec = noSideEffects ? ((SAMRecord)record.clone()) : record; + final SAMRecord rec = noSideEffects ? ((SAMRecord) record.clone()) : record; // watch out for when the second read overlaps all of the first read if (rec.getMateAlignmentStart() <= rec.getAlignmentStart()) { // make it unmapped @@ -1082,7 +1093,7 @@ public static SAMRecord clipOverlappingAlignedBases(final SAMRecord record, fina // 1-based index of first base in read to clip. int clipFrom = rec.getReadLength() - numOverlappingBasesToClip + 1; // we have to check if the last cigar element is soft-clipping, so we can subtract that from clipFrom - final CigarElement cigarElement = rec.getCigar().getCigarElement(rec.getCigarLength()-1); + final CigarElement cigarElement = rec.getCigar().getCigarElement(rec.getCigarLength() - 1); if (CigarOperator.SOFT_CLIP == cigarElement.getOperator()) clipFrom -= cigarElement.getLength(); // FIXME: does not properly consider a cigar like: 100M20S10H @@ -1108,100 +1119,102 @@ public static boolean isValidUnsignedIntegerAttribute(long value) { * Extract a List of 'other canonical alignments' from a SAM record. Those alignments are stored as a string in the 'SA' tag as defined * in the SAM specification. * The name, sequence and qualities, mate data are copied from the original record. + * * @param record must be non null and must have a non-null associated header. * @return a list of 'other canonical alignments' SAMRecords. The list is empty if the 'SA' attribute is missing. */ public static List getOtherCanonicalAlignments(final SAMRecord record) { - if( record == null ) throw new IllegalArgumentException("record is null"); - if( record.getHeader() == null ) throw new IllegalArgumentException("record.getHeader() is null"); + if (record == null) throw new IllegalArgumentException("record is null"); + if (record.getHeader() == null) throw new IllegalArgumentException("record.getHeader() is null"); /* extract value of SA tag */ - final Object saValue = record.getAttribute( SAMTagUtil.getSingleton().SA ); - if( saValue == null ) return Collections.emptyList(); - if( ! (saValue instanceof String) ) throw new SAMException( - "Expected a String for attribute 'SA' but got " + saValue.getClass() ); + final Object saValue = record.getAttribute(SAMTagUtil.getSingleton().SA); + if (saValue == null) return Collections.emptyList(); + if (!(saValue instanceof String)) throw new SAMException( + "Expected a String for attribute 'SA' but got " + saValue.getClass() + ". Record: " + record); final SAMRecordFactory samReaderFactory = new DefaultSAMRecordFactory(); /* the spec says: "Other canonical alignments in a chimeric alignment, formatted as a * semicolon-delimited list: (rname,pos,strand,CIGAR,mapQ,NM;)+. * Each element in the list represents a part of the chimeric alignment. - * Conventionally, at a supplementary line, the 1rst element points to the primary line. + * Conventionally, at a supplementary line, the 1st element points to the primary line. */ /* break string using semicolon */ - final String semiColonStrs[] = SEMICOLON_PAT.split((String)saValue); + final String semiColonStrs[] = SEMICOLON_PAT.split((String) saValue); /* the result list */ - final List alignments = new ArrayList<>( semiColonStrs.length ); + final List alignments = new ArrayList<>(semiColonStrs.length); /* base SAM flag */ - int record_flag = record.getFlags() ; + int record_flag = record.getFlags(); record_flag &= ~SAMFlag.PROPER_PAIR.flag; record_flag &= ~SAMFlag.SUPPLEMENTARY_ALIGNMENT.flag; record_flag &= ~SAMFlag.READ_REVERSE_STRAND.flag; - - for(int i=0; i< semiColonStrs.length;++i ) { + for (int i = 0; i < semiColonStrs.length; ++i) { final String semiColonStr = semiColonStrs[i]; /* ignore empty string */ - if( semiColonStr.isEmpty() ) continue; + if (semiColonStr.isEmpty()) continue; /* break string using comma */ final String commaStrs[] = COMMA_PAT.split(semiColonStr); - if( commaStrs.length != 6 ) throw new SAMException("Bad 'SA' attribute in " + semiColonStr); + if (commaStrs.length != 6) + throw new SAMException("Bad 'SA' attribute in " + semiColonStr + ". Record: " + record); /* create the new record */ - final SAMRecord otherRec = samReaderFactory.createSAMRecord( record.getHeader() ); + final SAMRecord otherRec = samReaderFactory.createSAMRecord(record.getHeader()); /* copy fields from the original record */ - otherRec.setReadName( record.getReadName() ); - otherRec.setReadBases( record.getReadBases() ); - otherRec.setBaseQualities( record.getBaseQualities() ); - if( record.getReadPairedFlag() && !record.getMateUnmappedFlag()) { - otherRec.setMateReferenceIndex( record.getMateReferenceIndex() ); - otherRec.setMateAlignmentStart( record.getMateAlignmentStart() ); + otherRec.setReadName(record.getReadName()); + otherRec.setReadBases(record.getReadBases()); + otherRec.setBaseQualities(record.getBaseQualities()); + if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) { + otherRec.setMateReferenceIndex(record.getMateReferenceIndex()); + otherRec.setMateAlignmentStart(record.getMateAlignmentStart()); } /* get reference sequence */ - final int tid = record.getHeader().getSequenceIndex( commaStrs[0] ); - if( tid == -1 ) throw new SAMException("Unknown contig in " + semiColonStr); - otherRec.setReferenceIndex( tid ); + final int tid = record.getHeader().getSequenceIndex(commaStrs[0]); + if (tid == -1) + throw new SAMException("Unknown contig in " + semiColonStr + ". Record: " + record); + otherRec.setReferenceIndex(tid); /* fill POS */ final int alignStart; try { alignStart = Integer.parseInt(commaStrs[1]); - } catch( final NumberFormatException err ) { - throw new SAMException("bad POS in "+semiColonStr, err); + } catch (final NumberFormatException err) { + throw new SAMException("bad POS in " + semiColonStr + ". Record: " + record, err); } - otherRec.setAlignmentStart( alignStart ); + otherRec.setAlignmentStart(alignStart); /* set TLEN */ - if( record.getReadPairedFlag() && - !record.getMateUnmappedFlag() && - record.getMateReferenceIndex() == tid ) { - otherRec.setInferredInsertSize( record.getMateAlignmentStart() - alignStart ); + if (record.getReadPairedFlag() && + !record.getMateUnmappedFlag() && + record.getMateReferenceIndex() == tid) { + otherRec.setInferredInsertSize(record.getMateAlignmentStart() - alignStart); } /* set FLAG */ - int other_flag = record_flag; - other_flag |= (commaStrs[2].equals("+") ? 0 : SAMFlag.READ_REVERSE_STRAND.flag) ; + int other_flag = record_flag; + other_flag |= (commaStrs[2].equals("+") ? 0 : SAMFlag.READ_REVERSE_STRAND.flag); /* spec: Conventionally, at a supplementary line, the 1st element points to the primary line */ - if( !( record.getSupplementaryAlignmentFlag() && i==0 ) ) { - other_flag |= SAMFlag.SUPPLEMENTARY_ALIGNMENT.flag; - } - otherRec.setFlags(other_flag); + if (!(record.getSupplementaryAlignmentFlag() && i == 0)) { + other_flag |= SAMFlag.SUPPLEMENTARY_ALIGNMENT.flag; + } + otherRec.setFlags(other_flag); /* set CIGAR */ - otherRec.setCigar( TextCigarCodec.decode( commaStrs[3] ) ); + otherRec.setCigar(TextCigarCodec.decode(commaStrs[3])); /* set MAPQ */ try { - otherRec.setMappingQuality( Integer.parseInt(commaStrs[4]) ); + otherRec.setMappingQuality(Integer.parseInt(commaStrs[4])); } catch (final NumberFormatException err) { - throw new SAMException("bad MAPQ in "+semiColonStr, err); + throw new SAMException("bad MAPQ in " + semiColonStr + ". Record: " + record, err); } /* fill NM */ @@ -1210,16 +1223,16 @@ public static boolean isValidUnsignedIntegerAttribute(long value) { otherRec.setAttribute(SAMTagUtil.getSingleton().NM, Integer.parseInt(commaStrs[5])); } } catch (final NumberFormatException err) { - throw new SAMException("bad NM in "+semiColonStr, err); + throw new SAMException("bad NM in " + semiColonStr + ". Record: " + record, err); } /* if strand is not the same: reverse-complement */ - if( otherRec.getReadNegativeStrandFlag() != record.getReadNegativeStrandFlag() ) { - SAMRecordUtil.reverseComplement(otherRec); + if (otherRec.getReadNegativeStrandFlag() != record.getReadNegativeStrandFlag()) { + otherRec.reverseComplement(true); } /* add the alignment */ - alignments.add( otherRec ); + alignments.add(otherRec); } return alignments; } diff --git a/src/main/java/htsjdk/samtools/SAMValidationError.java b/src/main/java/htsjdk/samtools/SAMValidationError.java index d560b119e..edd49c13c 100644 --- a/src/main/java/htsjdk/samtools/SAMValidationError.java +++ b/src/main/java/htsjdk/samtools/SAMValidationError.java @@ -171,6 +171,9 @@ HEADER_RECORD_MISSING_REQUIRED_TAG, + /** Header tag contains illegal value */ + HEADER_TAG_NON_CONFORMING_VALUE, + /** Date string is not ISO-8601 */ INVALID_DATE_STRING(Severity.WARNING), @@ -205,7 +208,22 @@ MISMATCH_MATE_CIGAR_STRING, /** There is a Cigar String (stored in the MC Tag) for a read whose mate is NOT mapped. */ - MATE_CIGAR_STRING_INVALID_PRESENCE; + MATE_CIGAR_STRING_INVALID_PRESENCE, + + /** The mate reference of the unpaired read should be "*" */ + INVALID_UNPAIRED_MATE_REFERENCE, + + /** The unaligned mate read start position should be 0 */ + INVALID_UNALIGNED_MATE_START, + + /** Mismatch between the number of bases covered by the CIGAR and sequence */ + MISMATCH_CIGAR_SEQ_LENGTH, + + /** Mismatch between the sequence and quality length */ + MISMATCH_SEQ_QUAL_LENGTH, + + /** Mismatch between file and sequence dictionaries */ + MISMATCH_FILE_SEQ_DICT; public final Severity severity; diff --git a/src/main/java/htsjdk/samtools/SamFileHeaderMerger.java b/src/main/java/htsjdk/samtools/SamFileHeaderMerger.java index b3f588caa..d3cf16ada 100644 --- a/src/main/java/htsjdk/samtools/SamFileHeaderMerger.java +++ b/src/main/java/htsjdk/samtools/SamFileHeaderMerger.java @@ -98,6 +98,7 @@ //HeaderRecordFactory that creates SAMReadGroupRecord instances. private static final HeaderRecordFactory READ_GROUP_RECORD_FACTORY = new HeaderRecordFactory() { + @Override public SAMReadGroupRecord createRecord(final String id, final SAMReadGroupRecord srcReadGroupRecord) { return new SAMReadGroupRecord(id, srcReadGroupRecord); } @@ -105,6 +106,7 @@ public SAMReadGroupRecord createRecord(final String id, final SAMReadGroupRecord //HeaderRecordFactory that creates SAMProgramRecord instances. private static final HeaderRecordFactory PROGRAM_RECORD_FACTORY = new HeaderRecordFactory() { + @Override public SAMProgramRecord createRecord(final String id, final SAMProgramRecord srcProgramRecord) { return new SAMProgramRecord(id, srcProgramRecord); } @@ -112,6 +114,7 @@ public SAMProgramRecord createRecord(final String id, final SAMProgramRecord src //comparator used to sort lists of program group and read group records private static final Comparator RECORD_ID_COMPARATOR = new Comparator() { + @Override public int compare(final AbstractSAMHeaderRecord o1, final AbstractSAMHeaderRecord o2) { return o1.getId().compareTo(o2.getId()); } diff --git a/src/main/java/htsjdk/samtools/SamFileValidator.java b/src/main/java/htsjdk/samtools/SamFileValidator.java index e40bfe94f..3e316a235 100644 --- a/src/main/java/htsjdk/samtools/SamFileValidator.java +++ b/src/main/java/htsjdk/samtools/SamFileValidator.java @@ -88,6 +88,7 @@ private Histogram errorsByType; private PairEndInfoMap pairEndInfoByName; private ReferenceSequenceFileWalker refFileWalker; + private SAMSequenceDictionary samSequenceDictionary; private boolean verbose; private int maxVerboseOutput; private SAMSortOrderChecker orderChecker; @@ -96,6 +97,8 @@ private boolean bisulfiteSequenced; private IndexValidationStringency indexValidationStringency; private boolean sequenceDictionaryEmptyAndNoWarningEmitted; + private int numWarnings; + private int numErrors; private final int maxTempFiles; @@ -111,6 +114,8 @@ public SamFileValidator(final PrintWriter out, final int maxTempFiles) { this.ignoreWarnings = false; this.bisulfiteSequenced = false; this.sequenceDictionaryEmptyAndNoWarningEmitted = false; + this.numWarnings = 0; + this.numErrors = 0; } Histogram getErrorsByType() { @@ -150,7 +155,7 @@ public boolean validateSamFileSummary(final SamReader samReader, final Reference for (final Histogram.Bin bin : errorsByType.values()) { errorsAndWarningsByType.increment(bin.getId().getHistogramString(), bin.getValue()); } - final MetricsFile metricsFile = new MetricsFile(); + final MetricsFile metricsFile = new MetricsFile<>(); errorsByType.setBinLabel("Error Type"); errorsByType.setValueLabel("Count"); metricsFile.setHistogram(errorsAndWarningsByType); @@ -176,7 +181,7 @@ public boolean validateSamFileVerbose(final SamReader samReader, final Reference } catch (MaxOutputExceededException e) { out.println("Maximum output of [" + maxVerboseOutput + "] errors reached."); } - boolean result = errorsByType.isEmpty(); + final boolean result = errorsByType.isEmpty(); cleanup(); return result; } @@ -245,13 +250,13 @@ private void validateUnmatchedPairs() { // For the coordinate-sorted map, need to detect mate pairs in which the mateReferenceIndex on one end // does not match the readReference index on the other end, so the pairs weren't united and validated. inMemoryPairMap = new InMemoryPairEndInfoMap(); - CloseableIterator> it = ((CoordinateSortedPairEndInfoMap) pairEndInfoByName).iterator(); + final CloseableIterator> it = pairEndInfoByName.iterator(); while (it.hasNext()) { - Map.Entry entry = it.next(); - PairEndInfo pei = inMemoryPairMap.remove(entry.getValue().readReferenceIndex, entry.getKey()); + final Map.Entry entry = it.next(); + final PairEndInfo pei = inMemoryPairMap.remove(entry.getValue().readReferenceIndex, entry.getKey()); if (pei != null) { // Found a mismatch btw read.mateReferenceIndex and mate.readReferenceIndex - List errors = pei.validateMates(entry.getValue(), entry.getKey()); + final List errors = pei.validateMates(entry.getValue(), entry.getKey()); for (final SAMValidationError error : errors) { addError(error); } @@ -301,8 +306,7 @@ private void validateSamRecordsAndQualityFormat(final Iterable samRec if (cigarIsValid) { try { validateNmTag(record, recordNumber); - } - catch (SAMException e) { + } catch (SAMException e) { if (hasValidSortOrder) { // If a CRAM file has an invalid sort order, the ReferenceFileWalker will throw a // SAMException due to an out of order request when retrieving reference bases during NM @@ -402,10 +406,7 @@ private void validateSecondaryBaseCalls(final SAMRecord record, final long recor } private boolean validateCigar(final SAMRecord record, final long recordNumber) { - if (record.getReadUnmappedFlag()) { - return true; - } - return validateCigar(record, recordNumber, true); + return record.getReadUnmappedFlag() || validateCigar(record, recordNumber, true); } private boolean validateMateCigar(final SAMRecord record, final long recordNumber) { @@ -455,6 +456,7 @@ private void init(final ReferenceSequenceFile reference, final SAMFileHeader hea } if (reference != null) { this.refFileWalker = new ReferenceSequenceFileWalker(reference); + this.samSequenceDictionary = reference.getSequenceDictionary(); } } @@ -522,6 +524,12 @@ private void validateHeader(final SAMFileHeader fileHeader) { } if (fileHeader.getSequenceDictionary().isEmpty()) { sequenceDictionaryEmptyAndNoWarningEmitted = true; + } else { + if (samSequenceDictionary != null) { + if (!fileHeader.getSequenceDictionary().isSameDictionary(samSequenceDictionary)) { + addError(new SAMValidationError(Type.MISMATCH_FILE_SEQ_DICT, "Mismatch between file and sequence dictionary", null)); + } + } } if (fileHeader.getReadGroups().isEmpty()) { addError(new SAMValidationError(Type.MISSING_READ_GROUP, "Read groups is empty", null)); @@ -537,7 +545,7 @@ private void validateHeader(final SAMFileHeader fileHeader) { } final List rgs = fileHeader.getReadGroups(); - final Set readGroupIDs = new HashSet(); + final Set readGroupIDs = new HashSet<>(); for (final SAMReadGroupRecord record : rgs) { final String readGroupID = record.getReadGroupId(); @@ -554,12 +562,12 @@ private void validateHeader(final SAMFileHeader fileHeader) { "A platform (PL) attribute was not found for read group ", readGroupID)); } - else { + else { // NB: cannot be null, so not catching a NPE try { SAMReadGroupRecord.PlatformValue.valueOf(platformValue.toUpperCase()); } catch (IllegalArgumentException e) { - addError(new SAMValidationError(Type.INVALID_PLATFORM_VALUE, + addError(new SAMValidationError(Type.INVALID_PLATFORM_VALUE, "The platform (PL) attribute (" + platformValue + ") + was not one of the valid values for read group ", readGroupID)); } @@ -567,11 +575,41 @@ private void validateHeader(final SAMFileHeader fileHeader) { } } + /** + * Number of warnings during SAM file validation + * + * @return number of warnings + */ + public int getNumWarnings() { + return this.numWarnings; + } + + /** + * Number of errors during SAM file validation + * + * @return number of errors + */ + public int getNumErrors() { + return this.numErrors; + } + private void addError(final SAMValidationError error) { // Just ignore an error if it's of a type we're not interested in if (this.errorsToIgnore.contains(error.getType())) return; - if (this.ignoreWarnings && error.getType().severity == SAMValidationError.Severity.WARNING) return; + switch (error.getType().severity) { + case WARNING: + if ( this.ignoreWarnings ) { + return; + } + this.numWarnings++; + break; + case ERROR: + this.numErrors++; + break; + default: + throw new SAMException("Unknown SAM validation error severity: " + error.getType().severity); + } this.errorsByType.increment(error.getType()); if (verbose) { @@ -659,11 +697,10 @@ public PairEndInfo(final SAMRecord record, final long recordNumber) { this.firstOfPairFlag = record.getFirstOfPairFlag(); } - private PairEndInfo(int readAlignmentStart, int readReferenceIndex, boolean readNegStrandFlag, boolean readUnmappedFlag, - String readCigarString, - int mateAlignmentStart, int mateReferenceIndex, boolean mateNegStrandFlag, boolean mateUnmappedFlag, - String mateCigarString, - boolean firstOfPairFlag, long recordNumber) { + private PairEndInfo(final int readAlignmentStart, final int readReferenceIndex, final boolean readNegStrandFlag, final boolean readUnmappedFlag, + final String readCigarString, + final int mateAlignmentStart, final int mateReferenceIndex, final boolean mateNegStrandFlag, final boolean mateUnmappedFlag, + final String mateCigarString, final boolean firstOfPairFlag, final long recordNumber) { this.readAlignmentStart = readAlignmentStart; this.readReferenceIndex = readReferenceIndex; this.readNegStrandFlag = readNegStrandFlag; @@ -679,7 +716,7 @@ private PairEndInfo(int readAlignmentStart, int readReferenceIndex, boolean read } public List validateMates(final PairEndInfo mate, final String readName) { - final List errors = new ArrayList(); + final List errors = new ArrayList<>(); validateMateFields(this, mate, readName, errors); validateMateFields(mate, this, readName, errors); // Validations that should not be repeated on both ends @@ -750,21 +787,25 @@ private void validateMateFields(final PairEndInfo end1, final PairEndInfo end2, PairEndInfo remove(int mateReferenceIndex, String key); + @Override CloseableIterator> iterator(); } private class CoordinateSortedPairEndInfoMap implements PairEndInfoMap { private final CoordinateSortedPairInfoMap onDiskMap = - new CoordinateSortedPairInfoMap(maxTempFiles, new Codec()); + new CoordinateSortedPairInfoMap<>(maxTempFiles, new Codec()); + @Override public void put(int mateReferenceIndex, String key, PairEndInfo value) { onDiskMap.put(mateReferenceIndex, key, value); } + @Override public PairEndInfo remove(int mateReferenceIndex, String key) { return onDiskMap.remove(mateReferenceIndex, key); } + @Override public CloseableIterator> iterator() { return onDiskMap.iterator(); } @@ -773,14 +814,17 @@ public PairEndInfo remove(int mateReferenceIndex, String key) { private DataInputStream in; private DataOutputStream out; + @Override public void setOutputStream(final OutputStream os) { this.out = new DataOutputStream(os); } + @Override public void setInputStream(final InputStream is) { this.in = new DataInputStream(is); } + @Override public void encode(final String key, final PairEndInfo record) { try { out.writeUTF(key); @@ -802,6 +846,7 @@ public void encode(final String key, final PairEndInfo record) { } } + @Override public Map.Entry decode() { try { final String key = in.readUTF(); @@ -836,33 +881,40 @@ public void encode(final String key, final PairEndInfo record) { } private static class InMemoryPairEndInfoMap implements PairEndInfoMap { - private final Map map = new HashMap(); + private final Map map = new HashMap<>(); + @Override public void put(int mateReferenceIndex, String key, PairEndInfo value) { if (mateReferenceIndex != value.mateReferenceIndex) throw new IllegalArgumentException("mateReferenceIndex does not agree with PairEndInfo"); map.put(key, value); } + @Override public PairEndInfo remove(int mateReferenceIndex, String key) { return map.remove(key); } + @Override public CloseableIterator> iterator() { final Iterator> it = map.entrySet().iterator(); return new CloseableIterator>() { + @Override public void close() { // do nothing } + @Override public boolean hasNext() { return it.hasNext(); } + @Override public Map.Entry next() { return it.next(); } + @Override public void remove() { it.remove(); } diff --git a/src/main/java/htsjdk/samtools/SamInputResource.java b/src/main/java/htsjdk/samtools/SamInputResource.java index f25d97bb6..a039e5aa3 100644 --- a/src/main/java/htsjdk/samtools/SamInputResource.java +++ b/src/main/java/htsjdk/samtools/SamInputResource.java @@ -29,6 +29,7 @@ import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.seekablestream.SeekableStreamFactory; import htsjdk.samtools.sra.SRAAccession; +import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Lazy; import htsjdk.samtools.util.RuntimeIOException; @@ -39,9 +40,12 @@ import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; +import java.nio.channels.SeekableByteChannel; import java.nio.file.FileSystemNotFoundException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.function.Function; +import java.util.function.Supplier; /** * Describes a SAM-like resource, including its data (where the records are), and optionally an index. @@ -89,7 +93,15 @@ public String toString() { public static SamInputResource of(final File file) { return new SamInputResource(new FileInputResource(file)); } /** Creates a {@link SamInputResource} reading from the provided resource, with no index. */ - public static SamInputResource of(final Path path) { return new SamInputResource(new PathInputResource(path)); } + public static SamInputResource of(final Path path) { + return new SamInputResource(new PathInputResource(path)); + } + + /** Creates a {@link SamInputResource} reading from the provided resource, with no index, + * and with a wrapper to apply to the SeekableByteChannel for custom prefetching/buffering. */ + public static SamInputResource of(final Path path, Function wrapper) { + return new SamInputResource(new PathInputResource(path, wrapper)); + } /** Creates a {@link SamInputResource} reading from the provided resource, with no index. */ public static SamInputResource of(final InputStream inputStream) { return new SamInputResource(new InputStreamInputResource(inputStream)); } @@ -125,6 +137,12 @@ public SamInputResource index(final Path path) { return this; } + /** Updates the index to point at the provided resource, with the provided wrapper, then returns itself. */ + public SamInputResource index(final Path path, Function wrapper) { + this.index = new PathInputResource(path, wrapper); + return this; + } + /** Updates the index to point at the provided resource, then returns itself. */ public SamInputResource index(final InputStream inputStream) { this.index = new InputStreamInputResource(inputStream); @@ -213,9 +231,9 @@ public String toString() { class FileInputResource extends InputResource { final File fileResource; - final Lazy lazySeekableStream = new Lazy(new Lazy.LazyInitializer() { + final Lazy lazySeekableStream = new Lazy<>(new Supplier() { @Override - public SeekableStream make() { + public SeekableStream get() { try { return new SeekableFileStream(fileResource); } catch (final FileNotFoundException e) { @@ -268,11 +286,12 @@ public SRAAccession asSRAAccession() { class PathInputResource extends InputResource { final Path pathResource; - final Lazy lazySeekableStream = new Lazy(new Lazy.LazyInitializer() { + final Function wrapper; + final Lazy lazySeekableStream = new Lazy<>(new Supplier() { @Override - public SeekableStream make() { + public SeekableStream get() { try { - return new SeekablePathStream(pathResource); + return new SeekablePathStream(pathResource, wrapper); } catch (final IOException e) { throw new RuntimeIOException(e); } @@ -281,8 +300,14 @@ public SeekableStream make() { PathInputResource(final Path pathResource) { + this(pathResource, Function.identity()); + } + + // wrapper applies to the SeekableByteChannel for custom prefetching/buffering. + PathInputResource(final Path pathResource, Function wrapper) { super(Type.PATH); this.pathResource = pathResource; + this.wrapper = wrapper; } @Override @@ -327,9 +352,9 @@ public SRAAccession asSRAAccession() { class UrlInputResource extends InputResource { final URL urlResource; - final Lazy lazySeekableStream = new Lazy(new Lazy.LazyInitializer() { + final Lazy lazySeekableStream = new Lazy<>(new Supplier() { @Override - public SeekableStream make() { + public SeekableStream get() { try { return SeekableStreamFactory.getInstance().getStreamFor(urlResource); } catch (final IOException ioe) { throw new RuntimeIOException(ioe); } } @@ -348,8 +373,8 @@ public File asFile() { @Override public Path asPath() { try { - return Paths.get(urlResource.toURI()); - } catch (URISyntaxException | IllegalArgumentException | + return IOUtil.getPath(urlResource.toExternalForm()); + } catch (IOException | IllegalArgumentException | FileSystemNotFoundException | SecurityException e) { return null; } diff --git a/src/main/java/htsjdk/samtools/SamPairUtil.java b/src/main/java/htsjdk/samtools/SamPairUtil.java index ee1707bd5..4849850ec 100644 --- a/src/main/java/htsjdk/samtools/SamPairUtil.java +++ b/src/main/java/htsjdk/samtools/SamPairUtil.java @@ -424,6 +424,7 @@ public SetMateInfoIterator(final Iterator iterator, final boolean set */ public long getNumMateCigarsAdded() { return this.numMateCigarsAdded; } + @Override public boolean hasNext() { return (!records.isEmpty() || super.hasNext()); } @@ -495,12 +496,14 @@ private void advance() { } } + @Override public SAMRecord next() { advance(); if (records.isEmpty()) throw new IllegalStateException("Unexpectedly found an empty record list"); return this.records.poll(); } + @Override public SAMRecord peek() { advance(); if (records.isEmpty()) throw new IllegalStateException("Unexpectedly found an empty record list"); diff --git a/src/main/java/htsjdk/samtools/SamReader.java b/src/main/java/htsjdk/samtools/SamReader.java index 2f1b2f9dd..08f93ec17 100644 --- a/src/main/java/htsjdk/samtools/SamReader.java +++ b/src/main/java/htsjdk/samtools/SamReader.java @@ -164,6 +164,7 @@ public String toString() { * Only a single open iterator on a SAM or BAM file may be extant at any one time. If you want to start * a second iteration, the first one must be closed first. */ + @Override public SAMRecordIterator iterator(); /** @@ -381,7 +382,11 @@ public PrimitiveSamReaderToSamReaderAdapter(final PrimitiveSamReader p, final Sa this.resource = resource; } - PrimitiveSamReader underlyingReader() { + /** + * Access the underlying {@link PrimitiveSamReader} used by this adapter. + * @return the {@link PrimitiveSamReader} used by this adapter. + */ + public PrimitiveSamReader underlyingReader() { return p; } @@ -554,6 +559,7 @@ public AssertingIterator(final CloseableIterator iterator) { wrappedIterator = iterator; } + @Override public SAMRecordIterator assertSorted(final SAMFileHeader.SortOrder sortOrder) { if (sortOrder == null || sortOrder == SAMFileHeader.SortOrder.unsorted) { @@ -565,6 +571,7 @@ public SAMRecordIterator assertSorted(final SAMFileHeader.SortOrder sortOrder) { return this; } + @Override public SAMRecord next() { final SAMRecord result = wrappedIterator.next(); if (comparator != null) { @@ -587,10 +594,13 @@ public SAMRecord next() { return result; } + @Override public void close() { wrappedIterator.close(); } + @Override public boolean hasNext() { return wrappedIterator.hasNext(); } + @Override public void remove() { wrappedIterator.remove(); } } diff --git a/src/main/java/htsjdk/samtools/SamReaderFactory.java b/src/main/java/htsjdk/samtools/SamReaderFactory.java index 8769f4879..3d6a80fa2 100644 --- a/src/main/java/htsjdk/samtools/SamReaderFactory.java +++ b/src/main/java/htsjdk/samtools/SamReaderFactory.java @@ -29,13 +29,16 @@ import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.sra.SRAAccession; import htsjdk.samtools.util.*; +import htsjdk.samtools.util.zip.InflaterFactory; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.nio.channels.SeekableByteChannel; import java.nio.file.Path; import java.util.Collections; import java.util.EnumSet; +import java.util.function.Function; import java.util.zip.GZIPInputStream; /** @@ -74,13 +77,31 @@ public abstract class SamReaderFactory { private static ValidationStringency defaultValidationStringency = ValidationStringency.DEFAULT_STRINGENCY; - + abstract public SamReader open(final File file); + /** + * Open the specified path (without using any wrappers). + * + * @param path the SAM or BAM file to open. + */ public SamReader open(final Path path) { - final SamInputResource r = SamInputResource.of(path); + return open(path, null, null); + } + + /** + * Open the specified path, using the specified wrappers for prefetching/caching. + * + * @param path the SAM or BAM file to open + * @param dataWrapper the wrapper for the data (or null for none) + * @param indexWrapper the wrapper for the index (or null for none) + */ + public SamReader open(final Path path, + Function dataWrapper, + Function indexWrapper) { + final SamInputResource r = SamInputResource.of(path, dataWrapper); final Path indexMaybe = SamFiles.findIndex(path); - if (indexMaybe != null) r.index(indexMaybe); + if (indexMaybe != null) r.index(indexMaybe, indexWrapper); return open(r); } @@ -93,6 +114,13 @@ public SamReader open(final Path path) { /** Set this factory's {@link htsjdk.samtools.SAMRecordFactory} to the provided one, then returns itself. */ abstract public SamReaderFactory samRecordFactory(final SAMRecordFactory samRecordFactory); + /** + * Set this factory's {@link htsjdk.samtools.util.zip.InflaterFactory} to the provided one, then returns itself. + * Note: The inflaterFactory provided here is only used for BAM decompression implemented with {@link BAMFileReader}, + * it is not used for CRAM or other formats like a gzipped SAM file. + */ + abstract public SamReaderFactory inflaterFactory(final InflaterFactory inflaterFactory); + /** Enables the provided {@link Option}s, then returns itself. */ abstract public SamReaderFactory enable(final Option... options); @@ -118,18 +146,20 @@ public SamReader open(final Path path) { abstract public SamReaderFactory validationStringency(final ValidationStringency validationStringency); /** Set whether readers created by this factory will use asynchronous IO. - * If this methods is not called, this flag will default to the value of {@link Defaults#USE_ASYNC_IO_FOR_SAMTOOLS}. + * If this methods is not called, this flag will default to the value of {@link Defaults#USE_ASYNC_IO_READ_FOR_SAMTOOLS}. * Note that this option may not be applicable to all readers returned from this factory. * Returns the factory itself. */ abstract public SamReaderFactory setUseAsyncIo(final boolean asynchronousIO); private static SamReaderFactoryImpl DEFAULT = - new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, DefaultSAMRecordFactory.getInstance()); + new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, + DefaultSAMRecordFactory.getInstance(), BlockGunzipper.getDefaultInflaterFactory()); public static void setDefaultValidationStringency(final ValidationStringency defaultValidationStringency) { SamReaderFactory.defaultValidationStringency = defaultValidationStringency; // The default may have changed, so reset the default SamReader - DEFAULT = new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, DefaultSAMRecordFactory.getInstance()); + DEFAULT = new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, + DefaultSAMRecordFactory.getInstance(), BlockGunzipper.getDefaultInflaterFactory()); } /** Creates a copy of the default {@link SamReaderFactory}. */ @@ -138,11 +168,12 @@ public static SamReaderFactory makeDefault() { } /** - * Creates an "empty" factory with no enabled {@link Option}s, {@link ValidationStringency#DEFAULT_STRINGENCY}, and - * {@link htsjdk.samtools.DefaultSAMRecordFactory}. + * Creates an "empty" factory with no enabled {@link Option}s, {@link ValidationStringency#DEFAULT_STRINGENCY}, + * no path wrapper, and {@link htsjdk.samtools.DefaultSAMRecordFactory}. */ public static SamReaderFactory make() { - return new SamReaderFactoryImpl(EnumSet.noneOf(Option.class), ValidationStringency.DEFAULT_STRINGENCY, DefaultSAMRecordFactory.getInstance()); + return new SamReaderFactoryImpl(EnumSet.noneOf(Option.class), ValidationStringency.DEFAULT_STRINGENCY, + DefaultSAMRecordFactory.getInstance(), BlockGunzipper.getDefaultInflaterFactory()); } private static class SamReaderFactoryImpl extends SamReaderFactory { @@ -153,12 +184,14 @@ public static SamReaderFactory make() { private SAMRecordFactory samRecordFactory; private CustomReaderFactory customReaderFactory; private CRAMReferenceSource referenceSource; + private InflaterFactory inflaterFactory; - private SamReaderFactoryImpl(final EnumSet

  • emit nothing for a read base matching corresponding reference base.
  • + *
  • emit a {@link Substitution} read feature for each ACTGN-ACTGN mismatch.
  • + *
  • emit {@link ReadBase} for a non-ACTGN mismatch. The side effect is the quality score stored twice.
  • + *

    + * IMPORTANT: reference and read bases are always compared for match/mismatch in upper case due to BAM limitations. + * + * @param alignmentStart CRAM record alignment start + * @param features a list of read features to add to + * @param fromPosInRead a zero based position in the read to start with + * @param alignmentStartOffset offset into the reference array + * @param nofReadBases how many read bases to process + * @param bases the read bases array + * @param qualityScore the quality score array + */ + void addMismatchReadFeatures(final int alignmentStart, final List features, final int fromPosInRead, final int alignmentStartOffset, final int nofReadBases, final byte[] bases, final byte[] qualityScore) { - int oneBasedPositionInRead; - final boolean noQS = (qualityScore.length == 0); + int oneBasedPositionInRead = fromPosInRead + 1; + int refIndex = alignmentStart + alignmentStartOffset - 1; - int i; - boolean qualityAdded; byte refBase; - for (i = 0; i < nofReadBases; i++) { - oneBasedPositionInRead = i + fromPosInRead + 1; - final int referenceCoordinates = cramRecord.alignmentStart + i + alignmentStartOffset - 1; - qualityAdded = false; - if (referenceCoordinates >= refBases.length) refBase = 'N'; - else refBase = refBases[referenceCoordinates]; - refBase = Utils.normalizeBase(refBase); - - if (bases[i + fromPosInRead] != refBase) { - final Substitution substitution = new Substitution(); - substitution.setPosition(oneBasedPositionInRead); - substitution.setBase(bases[i + fromPosInRead]); - substitution.setReferenceBase(refBase); - - features.add(substitution); - - if (noQS) continue; - } - - if (noQS) continue; - - if (refSNPs != null) { - final byte snpOrNot = refSNPs[referenceCoordinates]; - if (snpOrNot != 0) { - final byte score = (byte) (QS_asciiOffset + qualityScore[i + fromPosInRead]); - features.add(new BaseQualityScore(oneBasedPositionInRead, score)); - qualityAdded = true; - landedRefMaskScores++; + for (int i = 0; i < nofReadBases; i++, oneBasedPositionInRead++, refIndex++) { + if (refIndex >= refBases.length) refBase = 'N'; + else refBase = refBases[refIndex]; + + final byte readBase = bases[i + fromPosInRead]; + + if (readBase != refBase) { + final boolean isSubstitution = SequenceUtil.isUpperACGTN(readBase) && SequenceUtil.isUpperACGTN(refBase); + if (isSubstitution) { + features.add(new Substitution(oneBasedPositionInRead, readBase, refBase)); + } else { + final byte score = qualityScore[i + fromPosInRead]; + features.add(new ReadBase(oneBasedPositionInRead, readBase, score)); } } - - if (qualityAdded) landedTotalScores++; } } - public long getLandedRefMaskScores() { - return landedRefMaskScores; - } - - public long getLandedTotalScores() { - return landedTotalScores; - } - public byte[] getRefBases() { return refBases; } @@ -350,14 +314,6 @@ public void setRefBases(final byte[] refBases) { this.refBases = refBases; } - public byte[] getRefSNPs() { - return refSNPs; - } - - public void setRefSNPs(final byte[] refSNPs) { - this.refSNPs = refSNPs; - } - public Map getReadGroupMap() { return readGroupMap; } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/ByteArrayLenEncoding.java b/src/main/java/htsjdk/samtools/cram/encoding/ByteArrayLenEncoding.java index 0c76a5b6e..0c4557793 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/ByteArrayLenEncoding.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/ByteArrayLenEncoding.java @@ -60,6 +60,7 @@ public static EncodingParams toParam(final EncodingParams lenParams, return new EncodingParams(ID, byteArrayOutputStream.toByteArray()); } + @Override public byte[] toByteArray() { final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); try { @@ -78,6 +79,7 @@ public static EncodingParams toParam(final EncodingParams lenParams, return byteArrayOutputStream.toByteArray(); } + @Override public void fromByteArray(final byte[] data) { final ByteBuffer buffer = ByteBuffer.wrap(data); diff --git a/src/main/java/htsjdk/samtools/cram/encoding/ByteArrayStopEncoding.java b/src/main/java/htsjdk/samtools/cram/encoding/ByteArrayStopEncoding.java index c46d96754..c62334d6a 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/ByteArrayStopEncoding.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/ByteArrayStopEncoding.java @@ -56,6 +56,7 @@ public static EncodingParams toParam(final byte stopByte, final int externalId) return new EncodingParams(ID, e.toByteArray()); } + @Override public byte[] toByteArray() { final ByteBuffer buf = ByteBuffer.allocate(1024); buf.order(ByteOrder.LITTLE_ENDIAN); @@ -69,6 +70,7 @@ public static EncodingParams toParam(final byte stopByte, final int externalId) return array; } + @Override public void fromByteArray(final byte[] data) { final ByteBuffer buf = ByteBuffer.wrap(data); buf.order(ByteOrder.LITTLE_ENDIAN); diff --git a/src/main/java/htsjdk/samtools/cram/encoding/ExternalByteArrayEncoding.java b/src/main/java/htsjdk/samtools/cram/encoding/ExternalByteArrayEncoding.java index 2fc707c5f..107a484e1 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/ExternalByteArrayEncoding.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/ExternalByteArrayEncoding.java @@ -38,10 +38,12 @@ public static EncodingParams toParam(final int contentId) { return new EncodingParams(encodingId, e.toByteArray()); } + @Override public byte[] toByteArray() { return ITF8.writeUnsignedITF8(contentId); } + @Override public void fromByteArray(final byte[] data) { contentId = ITF8.readUnsignedITF8(data); } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/ExternalByteEncoding.java b/src/main/java/htsjdk/samtools/cram/encoding/ExternalByteEncoding.java index 0fed72059..75a63ccd6 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/ExternalByteEncoding.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/ExternalByteEncoding.java @@ -38,10 +38,12 @@ public static EncodingParams toParam(final int contentId) { return new EncodingParams(encodingId, externalByteEncoding.toByteArray()); } + @Override public byte[] toByteArray() { return ITF8.writeUnsignedITF8(contentId); } + @Override public void fromByteArray(final byte[] data) { contentId = ITF8.readUnsignedITF8(data); } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/ExternalIntegerEncoding.java b/src/main/java/htsjdk/samtools/cram/encoding/ExternalIntegerEncoding.java index a7c573668..1f0ecba69 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/ExternalIntegerEncoding.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/ExternalIntegerEncoding.java @@ -38,10 +38,12 @@ public static EncodingParams toParam(final int contentId) { return new EncodingParams(encodingId, externalIntegerEncoding.toByteArray()); } + @Override public byte[] toByteArray() { return ITF8.writeUnsignedITF8(contentId); } + @Override public void fromByteArray(final byte[] data) { contentId = ITF8.readUnsignedITF8(data); } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/ExternalLongEncoding.java b/src/main/java/htsjdk/samtools/cram/encoding/ExternalLongEncoding.java index 402cea888..b3ba54ef6 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/ExternalLongEncoding.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/ExternalLongEncoding.java @@ -38,10 +38,12 @@ public static EncodingParams toParam(final int contentId) { return new EncodingParams(encodingId, externalLongEncoding.toByteArray()); } + @Override public byte[] toByteArray() { return ITF8.writeUnsignedITF8(contentId); } + @Override public void fromByteArray(final byte[] data) { contentId = ITF8.readUnsignedITF8(data); } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/GolombRiceIntegerCodec.java b/src/main/java/htsjdk/samtools/cram/encoding/GolombRiceIntegerCodec.java index e5962a152..579f28b77 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/GolombRiceIntegerCodec.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/GolombRiceIntegerCodec.java @@ -38,6 +38,7 @@ public GolombRiceIntegerCodec(final int offset, final int log2m) { mask = ~(~0 << log2m); } + @Override public final Integer read(final BitInputStream bitInputStream) throws IOException { int unary = 0; diff --git a/src/main/java/htsjdk/samtools/cram/encoding/huffman/HuffmanTree.java b/src/main/java/htsjdk/samtools/cram/encoding/huffman/HuffmanTree.java index 43500c4d3..bd4316d23 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/huffman/HuffmanTree.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/huffman/HuffmanTree.java @@ -24,6 +24,7 @@ frequency = freq; } + @Override public int compareTo(@SuppressWarnings("NullableProblems") final HuffmanTree tree) { return frequency - tree.frequency; } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/BaseQualityScore.java b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/BaseQualityScore.java index 41a69d27f..07ee30502 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/BaseQualityScore.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/BaseQualityScore.java @@ -44,6 +44,7 @@ public int getPosition() { return position; } + @Override public void setPosition(final int position) { this.position = position; } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/HardClip.java b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/HardClip.java index 3c3c7ad04..0e5678bb4 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/HardClip.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/HardClip.java @@ -41,10 +41,12 @@ public byte getOperator() { return operator; } + @Override public int getPosition() { return position; } + @Override public void setPosition(final int position) { this.position = position; } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/InsertBase.java b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/InsertBase.java index 597041337..d4a611e8d 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/InsertBase.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/InsertBase.java @@ -42,10 +42,12 @@ public byte getOperator() { return operator; } + @Override public int getPosition() { return position; } + @Override public void setPosition(final int position) { this.position = position; } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Insertion.java b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Insertion.java index e0182c312..2055ba0fd 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Insertion.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Insertion.java @@ -42,10 +42,12 @@ public byte getOperator() { return operator; } + @Override public int getPosition() { return position; } + @Override public void setPosition(final int position) { this.position = position; } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Padding.java b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Padding.java index 85e90fdf0..f9a201f2d 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Padding.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Padding.java @@ -42,10 +42,12 @@ public byte getOperator() { return operator; } + @Override public int getPosition() { return position; } + @Override public void setPosition(final int position) { this.position = position; } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/ReadBase.java b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/ReadBase.java index 73ae20818..f56d6775a 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/ReadBase.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/ReadBase.java @@ -46,6 +46,7 @@ public int getPosition() { return position; } + @Override public void setPosition(final int position) { this.position = position; } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/RefSkip.java b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/RefSkip.java index 1b99f0969..e9e5ae37e 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/RefSkip.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/RefSkip.java @@ -42,10 +42,12 @@ public byte getOperator() { return operator; } + @Override public int getPosition() { return position; } + @Override public void setPosition(final int position) { this.position = position; } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/SoftClip.java b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/SoftClip.java index b142595dd..7eaac6727 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/SoftClip.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/SoftClip.java @@ -51,10 +51,12 @@ public byte getOperator() { return operator; } + @Override public int getPosition() { return position; } + @Override public void setPosition(final int position) { this.position = position; } diff --git a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Substitution.java b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Substitution.java index b2ed5de62..1747c4474 100644 --- a/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Substitution.java +++ b/src/main/java/htsjdk/samtools/cram/encoding/readfeatures/Substitution.java @@ -22,6 +22,8 @@ /** * A substitution event captured in read coordinates. It is characterized by position in read, read base and reference base. * The class is also responsible for converting combinations of read base and reference base into a byte value (code). + * + * Both reference and read bases must be ACGTN only. */ public class Substitution implements Serializable, ReadFeature { public static final int NO_CODE = -1; @@ -31,11 +33,11 @@ */ private int position; /** - * The read base (ACGTN) + * The read base, allowed values are ACGTN. */ private byte base = -1; /** - * The reference sequence base matching the position of this substitution. + * The reference sequence base matching the position of this substitution, allowed values are ACGTN. */ private byte referenceBase = -1; /** @@ -43,6 +45,15 @@ */ private byte code = NO_CODE; + public Substitution() { + } + + public Substitution(int position, byte base, byte referenceBase) { + this.position = position; + this.base = base; + this.referenceBase = referenceBase; + } + public byte getCode() { return code; } @@ -58,10 +69,12 @@ public byte getOperator() { return operator; } + @Override public int getPosition() { return position; } + @Override public void setPosition(final int position) { this.position = position; } diff --git a/src/main/java/htsjdk/samtools/cram/io/CountingInputStream.java b/src/main/java/htsjdk/samtools/cram/io/CountingInputStream.java index b5e564206..41cb22aef 100644 --- a/src/main/java/htsjdk/samtools/cram/io/CountingInputStream.java +++ b/src/main/java/htsjdk/samtools/cram/io/CountingInputStream.java @@ -37,42 +37,50 @@ public int read() throws IOException { return delegate.read(); } + @Override public int read(@SuppressWarnings("NullableProblems") final byte[] b) throws IOException { final int read = delegate.read(b); count += read; return read; } + @Override public int read(@SuppressWarnings("NullableProblems") final byte[] b, final int off, final int length) throws IOException { final int read = delegate.read(b, off, length); count += read; return read; } + @Override public long skip(final long n) throws IOException { final long skipped = delegate.skip(n); count += skipped; return skipped; } + @Override public int available() throws IOException { return delegate.available(); } + @Override public void close() throws IOException { if (delegate != null) delegate.close(); } + @Override public void mark(final int readLimit) { delegate.mark(readLimit); } + @Override public void reset() throws IOException { delegate.reset(); count = 0; } + @Override public boolean markSupported() { return delegate.markSupported(); } diff --git a/src/main/java/htsjdk/samtools/cram/io/DefaultBitInputStream.java b/src/main/java/htsjdk/samtools/cram/io/DefaultBitInputStream.java index 519cf9da3..fef9e2b08 100644 --- a/src/main/java/htsjdk/samtools/cram/io/DefaultBitInputStream.java +++ b/src/main/java/htsjdk/samtools/cram/io/DefaultBitInputStream.java @@ -41,6 +41,7 @@ public DefaultBitInputStream(final InputStream in) { this.throwEOF = true; } + @Override public final boolean readBit() throws IOException { if (--nofBufferedBits >= 0) return ((byteBuffer >>> nofBufferedBits) & 1) == 1; @@ -55,6 +56,7 @@ public final boolean readBit() throws IOException { return ((byteBuffer >>> 7) & 1) == 1; } + @Override public final int readBits(int n) throws IOException { if (n == 0) return 0; @@ -77,6 +79,7 @@ private static int rightBits(final int n, final int x) { return x & ((1 << n) - 1); } + @Override public final long readLongBits(int n) throws IOException { if (n > 64) throw new RuntimeException("More then 64 bits are requested in one read from bit stream."); @@ -108,6 +111,7 @@ public final long readLongBits(int n) throws IOException { return x | (byteBuffer >>> nofBufferedBits); } + @Override public void reset() { nofBufferedBits = 0; byteBuffer = 0; diff --git a/src/main/java/htsjdk/samtools/cram/io/DefaultBitOutputStream.java b/src/main/java/htsjdk/samtools/cram/io/DefaultBitOutputStream.java index 2d702ee16..95d6789a8 100644 --- a/src/main/java/htsjdk/samtools/cram/io/DefaultBitOutputStream.java +++ b/src/main/java/htsjdk/samtools/cram/io/DefaultBitOutputStream.java @@ -53,6 +53,7 @@ public String toString() { + Integer.toBinaryString(bufferByte).substring(0, bufferedNumberOfBits); } + @Override public void write(final long bitContainer, final int nofBits) throws IOException { if (nofBits == 0) return; @@ -95,6 +96,7 @@ void write_int_LSB_0(final int value, final int nofBitsToWrite) throws IOExcepti } } + @Override public void write(final int bitContainer, final int nofBits) throws IOException { write_int_LSB_0(bitContainer, nofBits); } @@ -109,6 +111,7 @@ private void writeByte(final int value) throws IOException { } } + @Override public void write(byte bitContainer, final int nofBits) throws IOException { if (nofBits < 0 || nofBits > 8) throw new IOException("Expecting 0 to 8 bits."); @@ -145,6 +148,7 @@ public void write(final boolean bit) throws IOException { write(bit ? (byte) 1 : (byte) 0, 1); } + @Override public void write(final boolean bit, final long repeat) throws IOException { for (long i = 0; i < repeat; i++) write(bit); diff --git a/src/main/java/htsjdk/samtools/cram/ref/ReferenceSource.java b/src/main/java/htsjdk/samtools/cram/ref/ReferenceSource.java index e73fb4155..3edcf5dd3 100644 --- a/src/main/java/htsjdk/samtools/cram/ref/ReferenceSource.java +++ b/src/main/java/htsjdk/samtools/cram/ref/ReferenceSource.java @@ -20,6 +20,7 @@ import htsjdk.samtools.Defaults; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.SAMUtils; import htsjdk.samtools.cram.build.Utils; import htsjdk.samtools.cram.io.InputStreamUtils; import htsjdk.samtools.reference.ReferenceSequence; @@ -27,6 +28,7 @@ import htsjdk.samtools.reference.ReferenceSequenceFileFactory; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.SequenceUtil; +import htsjdk.samtools.util.StringUtil; import java.io.File; import java.io.IOException; @@ -124,15 +126,19 @@ public void clearCache() { return null; } - // Upper case and normalize (-> ACGTN) in-place, and add to the cache + // Upper case (in-place), and add to the cache private byte[] addToCache(final String sequenceName, final byte[] bases) { + // Normalize to upper case only. We can't use the cram normalization utility Utils.normalizeBases, since + // we don't want to normalize ambiguity codes, we can't use SamUtils.normalizeBases, since we don't want + // to normalize no-call ('.') bases. for (int i = 0; i < bases.length; i++) { - bases[i] = Utils.normalizeBase(bases[i]); + bases[i] = StringUtil.toUpperCase(bases[i]); } cacheW.put(sequenceName, new WeakReference(bases)); return bases; } + @Override public synchronized byte[] getReferenceBases(final SAMSequenceRecord record, final boolean tryNameVariants) { { // check cache by sequence name: diff --git a/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java b/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java index 8a5afd38a..0c9596a0b 100644 --- a/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java +++ b/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java @@ -58,12 +58,10 @@ public BasicFastqWriter(final PrintStream writer) { @Override public void write(final FastqRecord rec) { - writer.print(FastqConstants.SEQUENCE_HEADER); - writer.println(rec.getReadHeader()); - writer.println(rec.getReadString()); - writer.print(FastqConstants.QUALITY_HEADER); - writer.println(rec.getBaseQualityHeader() == null ? "" : rec.getBaseQualityHeader()); - writer.println(rec.getBaseQualityString()); + // encode without creating a String + FastqEncoder.write(writer, rec); + // and print a new line + writer.println(); if (writer.checkError()) { throw new SAMException("Error in writing fastq file " + path); } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqConstants.java b/src/main/java/htsjdk/samtools/fastq/FastqConstants.java index f5d4150ea..4e9b95e5b 100644 --- a/src/main/java/htsjdk/samtools/fastq/FastqConstants.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqConstants.java @@ -29,7 +29,9 @@ public class FastqConstants { public static final String SEQUENCE_HEADER = "@" ; public static final String QUALITY_HEADER = "+" ; - + public static final String FIRST_OF_PAIR = "/1"; + public static final String SECOND_OF_PAIR = "/2"; + public enum FastqExtensions { FASTQ(".fastq"), FASTQ_GZ(".fastq.gz"), diff --git a/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java b/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java new file mode 100644 index 000000000..fdbd02dcc --- /dev/null +++ b/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java @@ -0,0 +1,113 @@ +/* + * The MIT License + * + * Copyright (c) 2016 Daniel Gomez-Sanchez + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.fastq; + +import htsjdk.samtools.SAMException; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.util.SequenceUtil; + +import java.io.IOException; + +/** + * Codec for encoding records into FASTQ format. + * + * @author Daniel Gomez-Sanchez (magicDGS) + */ +public final class FastqEncoder { + + // cannot be instantiated because it is an utility class + private FastqEncoder() {} + + /** + * Encodes a FastqRecord in the String FASTQ format. + */ + public static String encode(final FastqRecord record) { + // reserve some memory based on the read length + int capacity = record.getReadLength() * 2 + 5; + // reserve some memory based on the read name + if (record.getReadName() != null) { + capacity += record.getReadName().length(); + } + return write(new StringBuilder(capacity), record).toString(); + } + + /** + * Writes a FastqRecord into the Appendable output. + * @throws SAMException if any I/O error occurs. + */ + public static Appendable write(final Appendable out,final FastqRecord record) { + final String readName = record.getReadName(); + final String readString = record.getReadString(); + final String qualHeader = record.getBaseQualityHeader(); + final String qualityString = record.getBaseQualityString(); + try { + return out.append(FastqConstants.SEQUENCE_HEADER) + .append(readName == null ? "" : readName).append('\n') + .append(readString == null ? "" : readString).append('\n') + .append(FastqConstants.QUALITY_HEADER) + .append(qualHeader == null ? "" : qualHeader).append('\n') + .append(qualityString == null ? "" : qualityString); + } catch (IOException e) { + throw new SAMException(e); + } + } + + /** + * Encodes a SAMRecord in the String FASTQ format. + * @see #encode(FastqRecord) + * @see #asSAMRecord(FastqRecord, SAMFileHeader) + */ + public static String encode(final SAMRecord record) { + return encode(asFastqRecord(record)); + } + + /** + * Converts a {@link SAMRecord} into a {@link FastqRecord}. + */ + public static FastqRecord asFastqRecord(final SAMRecord record) { + String readName = record.getReadName(); + if(record.getReadPairedFlag() && (record.getFirstOfPairFlag() || record.getSecondOfPairFlag())) { + readName += (record.getFirstOfPairFlag()) ? FastqConstants.FIRST_OF_PAIR : FastqConstants.SECOND_OF_PAIR; + } + return new FastqRecord(readName, record.getReadString(), null, record.getBaseQualityString()); + } + + /** + * Converts a {@link FastqRecord} into a simple unmapped {@link SAMRecord}. + */ + public static SAMRecord asSAMRecord(final FastqRecord record, final SAMFileHeader header) { + // construct the SAMRecord and set the unmapped flag + final SAMRecord samRecord = new SAMRecord(header); + samRecord.setReadUnmappedFlag(true); + // get the read name from the FastqRecord correctly formatted + final String readName = SequenceUtil.getSamReadNameFromFastqHeader(record.getReadName()); + // set the basic information from the FastqRecord + samRecord.setReadName(readName); + samRecord.setReadBases(record.getReadBases()); + samRecord.setBaseQualities(record.getBaseQualities()); + return samRecord; + } + +} diff --git a/src/main/java/htsjdk/samtools/fastq/FastqReader.java b/src/main/java/htsjdk/samtools/fastq/FastqReader.java index 8086dfaee..c5d52f8dc 100755 --- a/src/main/java/htsjdk/samtools/fastq/FastqReader.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqReader.java @@ -41,6 +41,22 @@ * directly. It is provided so that this class can be used in Java for-each loop. */ public class FastqReader implements Iterator, Iterable, Closeable { + /** Enum of the types of lines we see in Fastq. */ + protected enum LineType { + SequenceHeader("Sequence Header"), + SequenceLine("Sequence Line"), + QualityHeader("Quality Header"), + QualityLine("Quality Line"); + + private String printable; + + LineType(String printable) { + this.printable = printable; + } + + @Override public String toString() { return this.printable; } + } + final private File fastqFile; final private BufferedReader reader; private FastqRecord nextRecord; @@ -58,10 +74,7 @@ public FastqReader(final File file) { * @param skipBlankLines should we skip blank lines ? */ public FastqReader(final File file, final boolean skipBlankLines) { - this.skipBlankLines=skipBlankLines; - fastqFile = file; - reader = IOUtil.openFileForBufferedReading(fastqFile); - nextRecord = readNextRecord(); + this(file, IOUtil.openFileForBufferedReading(file), skipBlankLines); } public FastqReader(final BufferedReader reader) { @@ -77,8 +90,8 @@ public FastqReader(final BufferedReader reader) { public FastqReader(final File file, final BufferedReader reader,boolean skipBlankLines) { this.fastqFile = file; this.reader = reader; - this.nextRecord = readNextRecord(); this.skipBlankLines = skipBlankLines; + this.nextRecord = readNextRecord(); } public FastqReader(final File file, final BufferedReader reader) { @@ -87,7 +100,6 @@ public FastqReader(final File file, final BufferedReader reader) { private FastqRecord readNextRecord() { try { - // Read sequence header final String seqHeader = readLineConditionallySkippingBlanks(); if (seqHeader == null) return null ; @@ -95,23 +107,23 @@ private FastqRecord readNextRecord() { throw new SAMException(error("Missing sequence header")); } if (!seqHeader.startsWith(FastqConstants.SEQUENCE_HEADER)) { - throw new SAMException(error("Sequence header must start with "+ FastqConstants.SEQUENCE_HEADER+": "+seqHeader)); + throw new SAMException(error("Sequence header must start with " + FastqConstants.SEQUENCE_HEADER + ": " + seqHeader)); } // Read sequence line final String seqLine = readLineConditionallySkippingBlanks(); - checkLine(seqLine,"sequence line"); + checkLine(seqLine, LineType.SequenceLine); // Read quality header final String qualHeader = readLineConditionallySkippingBlanks(); - checkLine(qualHeader,"quality header"); + checkLine(qualHeader, LineType.QualityHeader); if (!qualHeader.startsWith(FastqConstants.QUALITY_HEADER)) { - throw new SAMException(error("Quality header must start with "+ FastqConstants.QUALITY_HEADER+": "+qualHeader)); + throw new SAMException(error("Quality header must start with " + FastqConstants.QUALITY_HEADER + ": "+ qualHeader)); } // Read quality line final String qualLine = readLineConditionallySkippingBlanks(); - checkLine(qualLine,"quality line"); + checkLine(qualLine, LineType.QualityLine); // Check sequence and quality lines are same length if (seqLine.length() != qualLine.length()) { @@ -124,12 +136,14 @@ private FastqRecord readNextRecord() { return frec ; } catch (IOException e) { - throw new SAMException(String.format("Error reading fastq '%s'", getAbsolutePath()), e); + throw new SAMException(error(e.getMessage()), e); } } + @Override public boolean hasNext() { return nextRecord != null; } + @Override public FastqRecord next() { if (!hasNext()) { throw new NoSuchElementException("next() called when !hasNext()"); @@ -139,6 +153,7 @@ public FastqRecord next() { return rec; } + @Override public void remove() { throw new UnsupportedOperationException("Unsupported operation"); } /** @@ -146,6 +161,7 @@ public FastqRecord next() { * start iteration from the beginning of the file. Developers should probably not call iterator() * directly. It is provided so that this class can be used in Java for-each loop. */ + @Override public Iterator iterator() { return this; } public int getLineNumber() { return line ; } @@ -161,21 +177,23 @@ public void close() { try { reader.close(); } catch (IOException e) { - throw new SAMException("IO problem in fastq file "+getAbsolutePath(), e); + throw new SAMException(error(e.getMessage()), e); } } - private void checkLine(final String line, final String kind) { + /** Checks that the line is neither null (representing EOF) or empty (blank line in file). */ + protected void checkLine(final String line, final LineType kind) { if (line == null) { - throw new SAMException(error("File is too short - missing "+kind+" line")); + throw new SAMException(error("File is too short - missing " + kind)); } if (StringUtil.isBlank(line)) { - throw new SAMException(error("Missing "+kind)); + throw new SAMException(error("Missing " + kind)); } } - private String error(final String msg) { - return msg + " at line "+line+" in fastq "+getAbsolutePath(); + /** Generates an error message with line number information. */ + protected String error(final String msg) { + return msg + " at line " + line + " in fastq " + getAbsolutePath(); } private String getAbsolutePath() { @@ -194,6 +212,6 @@ private String readLineConditionallySkippingBlanks() throws IOException { @Override public String toString() { - return "FastqReader["+(this.fastqFile == null?"":this.fastqFile)+ " Line:"+getLineNumber()+"]"; + return "FastqReader[" + (this.fastqFile == null ? "" : this.fastqFile) + " Line:" + getLineNumber() + "]"; } } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java index b1d3f7507..9fbcd3912 100755 --- a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java @@ -23,62 +23,169 @@ */ package htsjdk.samtools.fastq; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMUtils; +import htsjdk.samtools.util.StringUtil; + import java.io.Serializable; /** - * Represents a fastq record, fairly literally, i.e. without any conversion. + * Simple representation of a FASTQ record, without any conversion */ public class FastqRecord implements Serializable { private static final long serialVersionUID = 1L; - private final String seqHeaderPrefix; - private final String seqLine; - private final String qualHeaderPrefix; - private final String qualLine; - - public FastqRecord(final String seqHeaderPrefix, final String seqLine, final String qualHeaderPrefix, final String qualLine) { - if (seqHeaderPrefix != null && !seqHeaderPrefix.isEmpty()) this.seqHeaderPrefix = seqHeaderPrefix; - else this.seqHeaderPrefix = null; - if (qualHeaderPrefix != null && !qualHeaderPrefix.isEmpty()) this.qualHeaderPrefix = qualHeaderPrefix; - else this.qualHeaderPrefix = null; - this.seqLine = seqLine ; - this.qualLine = qualLine ; - } - - /** copy constructor */ + private final String readName; + private final String readString; + private final String qualityHeader; + private final String baseQualityString; + + /** + * Default constructor + * + * @param readName the read name (without {@link FastqConstants#SEQUENCE_HEADER}) + * @param readBases the read sequence bases + * @param qualityHeader the quality header (without {@link FastqConstants#SEQUENCE_HEADER}) + * @param baseQualities the base quality scores + */ + public FastqRecord(final String readName, final String readBases, final String qualityHeader, final String baseQualities) { + if (readName != null && !readName.isEmpty()) { + this.readName = readName; + } else { + this.readName = null; + } + if (qualityHeader != null && !qualityHeader.isEmpty()) { + this.qualityHeader = qualityHeader; + } else { + this.qualityHeader = null; + } + this.readString = readBases; + this.baseQualityString = baseQualities; + } + + /** + * Constructor for byte[] arrays + * + * @param readName the read name (without {@link FastqConstants#SEQUENCE_HEADER}) + * @param readBases the read sequence bases as ASCII bytes ACGTN=. + * @param qualityHeader the quality header (without {@link FastqConstants#SEQUENCE_HEADER}) + * @param baseQualities the base qualities as binary PHRED scores (not ASCII) + */ + public FastqRecord(final String readName, final byte[] readBases, final String qualityHeader, final byte[] baseQualities) { + this(readName, StringUtil.bytesToString(readBases), qualityHeader, SAMUtils.phredToFastq(baseQualities)); + } + + /** + * Copy constructor + * + * @param other record to copy + */ public FastqRecord(final FastqRecord other) { - if( other == null ) throw new IllegalArgumentException("new FastqRecord(null)"); - this.seqHeaderPrefix = other.seqHeaderPrefix; - this.seqLine = other.seqLine; - this.qualHeaderPrefix = other.qualHeaderPrefix; - this.qualLine = other.qualLine; - } - - /** @return the read name */ - public String getReadHeader() { return seqHeaderPrefix; } - /** @return the read DNA sequence */ - public String getReadString() { return seqLine; } - /** @return the quality header */ - public String getBaseQualityHeader() { return qualHeaderPrefix; } - /** @return the quality string */ - public String getBaseQualityString() { return qualLine; } - /** shortcut to getReadString().length() */ - public int length() { return this.seqLine==null?0:this.seqLine.length();} - + if (other == null) { + throw new IllegalArgumentException("new FastqRecord(null)"); + } + this.readName = other.readName; + this.readString = other.readString; + this.qualityHeader = other.qualityHeader; + this.baseQualityString = other.baseQualityString; + } + + /** + * @return the read name + * @deprecated since 02/2017. Use {@link #getReadName()} instead + */ + @Deprecated + public String getReadHeader() { + return getReadName(); + } + + /** + * Get the read name + * + * @return the read name + */ + public String getReadName() { + return readName; + } + + /** + * Get the DNA sequence + * + * @return read sequence as a string of ACGTN=. + */ + public String getReadString() { + return readString; + } + + /** + * Get the DNA sequence. + * + * @return read sequence as ASCII bytes ACGTN=; {@link SAMRecord#NULL_SEQUENCE} if no bases are present. + */ + public byte[] getReadBases() { + return (readString == null) ? SAMRecord.NULL_SEQUENCE : StringUtil.stringToBytes(readString); + } + + /** + * Get the base qualities encoded as a FASTQ string + * + * @return the quality string + */ + public String getBaseQualityString() { + return baseQualityString; + } + + /** + * Get the base qualities as binary PHRED scores (not ASCII) + * + * @return the base quality; {@link SAMRecord#NULL_QUALS} if no bases are present. + */ + public byte[] getBaseQualities() { + return (baseQualityString == null) ? SAMRecord.NULL_QUALS : SAMUtils.fastqToPhred(baseQualityString); + } + + /** + * Get the read length + * + * @return number of bases in the read + */ + public int getReadLength() { + return (readString == null) ? 0 : readString.length(); + } + + /** + * Get the base quality header + * + * @return the base quality header + */ + public String getBaseQualityHeader() { + return qualityHeader; + } + + /** + * shortcut to getReadString().length() + * + * @deprecated since 02/2017. Use {@link #getReadLength()} instead + */ + @Deprecated + public int length() { + return getReadLength(); + } + @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result - + ((qualHeaderPrefix == null) ? 0 : qualHeaderPrefix.hashCode()); + + ((qualityHeader == null) ? 0 : qualityHeader.hashCode()); result = prime * result - + ((qualLine == null) ? 0 : qualLine.hashCode()); + + ((baseQualityString == null) ? 0 : baseQualityString.hashCode()); result = prime * result - + ((seqHeaderPrefix == null) ? 0 : seqHeaderPrefix.hashCode()); - result = prime * result + ((seqLine == null) ? 0 : seqLine.hashCode()); + + ((readName == null) ? 0 : readName.hashCode()); + result = prime * result + ((readString == null) ? 0 : readString.hashCode()); return result; } - + @Override public boolean equals(Object obj) { if (this == obj) @@ -88,37 +195,45 @@ public boolean equals(Object obj) { if (getClass() != obj.getClass()) return false; FastqRecord other = (FastqRecord) obj; - if (seqLine == null) { - if (other.seqLine != null) + if (readString == null) { + if (other.readString != null) return false; - } else if (!seqLine.equals(other.seqLine)) + } else if (!readString.equals(other.readString)) return false; - if (qualHeaderPrefix == null) { - if (other.qualHeaderPrefix != null) + if (qualityHeader == null) { + if (other.qualityHeader != null) return false; - } else if (!qualHeaderPrefix.equals(other.qualHeaderPrefix)) + } else if (!qualityHeader.equals(other.qualityHeader)) return false; - if (qualLine == null) { - if (other.qualLine != null) + if (baseQualityString == null) { + if (other.baseQualityString != null) return false; - } else if (!qualLine.equals(other.qualLine)) + } else if (!baseQualityString.equals(other.baseQualityString)) return false; - if (seqHeaderPrefix == null) { - if (other.seqHeaderPrefix != null) + if (readName == null) { + if (other.readName != null) return false; - } else if (!seqHeaderPrefix.equals(other.seqHeaderPrefix)) + } else if (!readName.equals(other.readName)) return false; - + return true; } - + + /** + * Returns the record as the String FASTQ format. + * @see FastqEncoder#encode(FastqRecord) + */ + public String toFastQString() { + return FastqEncoder.encode(this); + } + + /** + * Returns {@link #toFastQString()} + */ @Override public String toString() { - return new StringBuilder(). - append(FastqConstants.SEQUENCE_HEADER).append(this.seqHeaderPrefix==null?"":this.seqHeaderPrefix).append('\n'). - append(this.seqLine==null?"":this.seqLine).append('\n'). - append(FastqConstants.QUALITY_HEADER).append(this.qualHeaderPrefix==null?"":this.qualHeaderPrefix).append('\n'). - append(this.qualLine==null?"":this.qualLine). - toString(); - } + // TODO: this should be change in the future for a simpler and more informative form such as + // TODO: return String.format("%s: %s bp", readName, getReadLength()); + return toFastQString(); + } } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqWriter.java b/src/main/java/htsjdk/samtools/fastq/FastqWriter.java index e37aec57d..3b2a1b688 100644 --- a/src/main/java/htsjdk/samtools/fastq/FastqWriter.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqWriter.java @@ -9,5 +9,6 @@ */ public interface FastqWriter extends Closeable { void write(final FastqRecord rec); + @Override void close(); } diff --git a/src/main/java/htsjdk/samtools/filter/AggregateFilter.java b/src/main/java/htsjdk/samtools/filter/AggregateFilter.java index f396c593f..62b804b79 100644 --- a/src/main/java/htsjdk/samtools/filter/AggregateFilter.java +++ b/src/main/java/htsjdk/samtools/filter/AggregateFilter.java @@ -51,6 +51,7 @@ public AggregateFilter(final List filters) { * @param record the SAMRecord to evaluate * @return true if the SAMRecord matches at least one filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord record) { for (final SamRecordFilter filter : filters) { if (filter.filterOut(record)) { @@ -68,6 +69,7 @@ public boolean filterOut(final SAMRecord record) { * * @return true if the SAMRecords matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { for (final SamRecordFilter filter : filters) { if (filter.filterOut(first, second)) { diff --git a/src/main/java/htsjdk/samtools/filter/AlignedFilter.java b/src/main/java/htsjdk/samtools/filter/AlignedFilter.java index c70453d00..cebdc0b95 100644 --- a/src/main/java/htsjdk/samtools/filter/AlignedFilter.java +++ b/src/main/java/htsjdk/samtools/filter/AlignedFilter.java @@ -45,6 +45,7 @@ public AlignedFilter(final boolean includeAligned) { * * @return true if the SAMRecord matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord record) { if (includeAligned) { if (!record.getReadUnmappedFlag()) { @@ -68,6 +69,7 @@ public boolean filterOut(final SAMRecord record) { * * @return true if the SAMRecords matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { if (includeAligned) { diff --git a/src/main/java/htsjdk/samtools/filter/DuplicateReadFilter.java b/src/main/java/htsjdk/samtools/filter/DuplicateReadFilter.java index c79b3ccfd..2fe773f11 100644 --- a/src/main/java/htsjdk/samtools/filter/DuplicateReadFilter.java +++ b/src/main/java/htsjdk/samtools/filter/DuplicateReadFilter.java @@ -34,6 +34,7 @@ * @param record the SAMRecord to evaluate * @return true if the SAMRecord matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord record) { return record.getDuplicateReadFlag(); } @@ -46,6 +47,7 @@ public boolean filterOut(final SAMRecord record) { * * @return true if the SAMRecords matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { throw new UnsupportedOperationException("Paired DuplicateReadFilter filter not implemented!"); } diff --git a/src/main/java/htsjdk/samtools/filter/FailsVendorReadQualityFilter.java b/src/main/java/htsjdk/samtools/filter/FailsVendorReadQualityFilter.java index 7c6825cba..661286df3 100644 --- a/src/main/java/htsjdk/samtools/filter/FailsVendorReadQualityFilter.java +++ b/src/main/java/htsjdk/samtools/filter/FailsVendorReadQualityFilter.java @@ -38,6 +38,7 @@ * @param record the SAMRecord to evaluate * @return true if the SAMRecord matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord record) { return record.getReadFailsVendorQualityCheckFlag(); } @@ -50,6 +51,7 @@ public boolean filterOut(final SAMRecord record) { * * @return true if the SAMRecords matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { // if either fails, exclude them both return (first.getReadFailsVendorQualityCheckFlag() || second.getReadFailsVendorQualityCheckFlag()); diff --git a/src/main/java/htsjdk/samtools/filter/FilteringIterator.java b/src/main/java/htsjdk/samtools/filter/FilteringIterator.java index 3ce9f96ce..4cdaebe89 100644 --- a/src/main/java/htsjdk/samtools/filter/FilteringIterator.java +++ b/src/main/java/htsjdk/samtools/filter/FilteringIterator.java @@ -36,7 +36,7 @@ * * @author Kathleen Tibbetts * - * use {@link FilteringSamIterator} instead + * @deprecated use {@link FilteringSamIterator} instead */ @Deprecated /** use {@link FilteringSamIterator} instead **/ diff --git a/src/main/java/htsjdk/samtools/filter/FilteringSamIterator.java b/src/main/java/htsjdk/samtools/filter/FilteringSamIterator.java index 7ac1c0aaa..a70156ad6 100644 --- a/src/main/java/htsjdk/samtools/filter/FilteringSamIterator.java +++ b/src/main/java/htsjdk/samtools/filter/FilteringSamIterator.java @@ -87,6 +87,7 @@ public FilteringSamIterator(final Iterator iterator, final SamRecordF * * @return true if the iteration has more elements. Otherwise returns false. */ + @Override public boolean hasNext() { return next != null; } @@ -98,6 +99,7 @@ public boolean hasNext() { * @throws java.util.NoSuchElementException * */ + @Override public SAMRecord next() { if (next == null) { throw new NoSuchElementException("Iterator has no more elements."); @@ -112,10 +114,12 @@ public SAMRecord next() { * * @throws UnsupportedOperationException */ + @Override public void remove() { throw new UnsupportedOperationException("Remove() not supported by FilteringSamIterator"); } + @Override public void close() { CloserUtil.close(iterator); } diff --git a/src/main/java/htsjdk/samtools/filter/IntervalFilter.java b/src/main/java/htsjdk/samtools/filter/IntervalFilter.java index ff3620ae9..ef5c98a3f 100644 --- a/src/main/java/htsjdk/samtools/filter/IntervalFilter.java +++ b/src/main/java/htsjdk/samtools/filter/IntervalFilter.java @@ -65,6 +65,7 @@ public IntervalFilter(final List intervals, final SAMFileHeader samHea * @param record the SAMRecord to evaluate * @return true if the SAMRecord matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord record) { while (currentInterval != null && (currentSequenceIndex < record.getReferenceIndex() || @@ -93,6 +94,7 @@ private void advanceInterval() { * * @return true if the SAMRecords matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { // This can never be implemented because if the bam is coordinate sorted, // which it has to be for this filter, it will never get both the first and second reads together diff --git a/src/main/java/htsjdk/samtools/filter/IntervalKeepPairFilter.java b/src/main/java/htsjdk/samtools/filter/IntervalKeepPairFilter.java index 5a7961bbb..c4e01aae2 100644 --- a/src/main/java/htsjdk/samtools/filter/IntervalKeepPairFilter.java +++ b/src/main/java/htsjdk/samtools/filter/IntervalKeepPairFilter.java @@ -65,6 +65,7 @@ public IntervalKeepPairFilter(final List intervals) { * @param record the SAMRecord to evaluate * @return true if the SAMRecord matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord record) { if (record.isSecondaryOrSupplementary()) { return true; @@ -102,6 +103,7 @@ private boolean hasOverlaps(final String refSequence, final int start, final int * * @return true if both SAMRecords do not overlap the interval list */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { return filterOut(first) && filterOut(second); } diff --git a/src/main/java/htsjdk/samtools/filter/NotPrimaryAlignmentFilter.java b/src/main/java/htsjdk/samtools/filter/NotPrimaryAlignmentFilter.java index 0f2364c92..cda45e045 100644 --- a/src/main/java/htsjdk/samtools/filter/NotPrimaryAlignmentFilter.java +++ b/src/main/java/htsjdk/samtools/filter/NotPrimaryAlignmentFilter.java @@ -35,6 +35,7 @@ * @param record the SAMRecord to evaluate * @return true if the SAMRecord matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord record) { return record.getNotPrimaryAlignmentFlag(); } @@ -47,6 +48,7 @@ public boolean filterOut(final SAMRecord record) { * * @return true if the SAMRecords matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { // if either fails, exclude them both return (first.getNotPrimaryAlignmentFlag() || second.getNotPrimaryAlignmentFlag()); diff --git a/src/main/java/htsjdk/samtools/filter/ReadNameFilter.java b/src/main/java/htsjdk/samtools/filter/ReadNameFilter.java index e4b2a20d9..94a4397a8 100644 --- a/src/main/java/htsjdk/samtools/filter/ReadNameFilter.java +++ b/src/main/java/htsjdk/samtools/filter/ReadNameFilter.java @@ -79,6 +79,7 @@ public ReadNameFilter(final Set readNameFilterSet, final boolean include * * @return true if the SAMRecord matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord record) { if (includeReads) { if (readNameFilterSet.contains(record.getReadName())) { @@ -101,6 +102,7 @@ public boolean filterOut(final SAMRecord record) { * * @return true if the pair of records matches filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { if (includeReads) { if (readNameFilterSet.contains(first.getReadName()) && diff --git a/src/main/java/htsjdk/samtools/filter/SecondaryAlignmentFilter.java b/src/main/java/htsjdk/samtools/filter/SecondaryAlignmentFilter.java index d91212d40..22741ae0d 100644 --- a/src/main/java/htsjdk/samtools/filter/SecondaryAlignmentFilter.java +++ b/src/main/java/htsjdk/samtools/filter/SecondaryAlignmentFilter.java @@ -9,11 +9,13 @@ /** * Returns true if the read is marked as secondary. */ + @Override public boolean filterOut(final SAMRecord record) { return record.getNotPrimaryAlignmentFlag(); } /** * Returns true if either read is marked as secondary. */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { return first.getNotPrimaryAlignmentFlag() || second.getNotPrimaryAlignmentFlag(); } diff --git a/src/main/java/htsjdk/samtools/filter/SecondaryOrSupplementaryFilter.java b/src/main/java/htsjdk/samtools/filter/SecondaryOrSupplementaryFilter.java index ae57fd9d3..b7d21d157 100644 --- a/src/main/java/htsjdk/samtools/filter/SecondaryOrSupplementaryFilter.java +++ b/src/main/java/htsjdk/samtools/filter/SecondaryOrSupplementaryFilter.java @@ -13,6 +13,7 @@ * @param record the SAMRecord to evaluate * @return true if the SAMRecord matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord record) { return record.isSecondaryOrSupplementary(); } @@ -25,6 +26,7 @@ public boolean filterOut(final SAMRecord record) { * * @return true if the SAMRecords matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { // if either fails, exclude them both return first.isSecondaryOrSupplementary() || second.isSecondaryOrSupplementary(); diff --git a/src/main/java/htsjdk/samtools/filter/SolexaNoiseFilter.java b/src/main/java/htsjdk/samtools/filter/SolexaNoiseFilter.java index bfb31d6d4..ce169ef83 100644 --- a/src/main/java/htsjdk/samtools/filter/SolexaNoiseFilter.java +++ b/src/main/java/htsjdk/samtools/filter/SolexaNoiseFilter.java @@ -40,6 +40,7 @@ * @param record the SAMRecord to evaluate * @return true if the SAMRecord matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord record) { final byte[] sequence = record.getReadBases(); for (final byte base : sequence) { @@ -59,6 +60,7 @@ public boolean filterOut(final SAMRecord record) { * * @return true if the SAMRecords matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { // only filter out the pair if both first and second reads have all As return (filterOut(first) && filterOut(second)); diff --git a/src/main/java/htsjdk/samtools/filter/TagFilter.java b/src/main/java/htsjdk/samtools/filter/TagFilter.java index 5182e836c..00ca8a46c 100644 --- a/src/main/java/htsjdk/samtools/filter/TagFilter.java +++ b/src/main/java/htsjdk/samtools/filter/TagFilter.java @@ -66,6 +66,7 @@ public TagFilter(String tag, List values) { * @param record the SAMRecord to evaluate * @return true if the SAMRecord matches the filter, otherwise false */ + @Override public boolean filterOut(SAMRecord record) { return values.contains(record.getAttribute(tag)); } @@ -78,6 +79,7 @@ public boolean filterOut(SAMRecord record) { * * @return true if the SAMRecords matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { // both first and second must have the tag in order for it to be filtered out return values.contains(first.getAttribute(tag)) && values.contains(second.getAttribute(tag)); diff --git a/src/main/java/htsjdk/samtools/filter/WholeReadClippedFilter.java b/src/main/java/htsjdk/samtools/filter/WholeReadClippedFilter.java index 2a1566ce0..6df3c4454 100644 --- a/src/main/java/htsjdk/samtools/filter/WholeReadClippedFilter.java +++ b/src/main/java/htsjdk/samtools/filter/WholeReadClippedFilter.java @@ -57,6 +57,7 @@ public boolean filterOut(final SAMRecord record) { * * @return true if the SAMRecords matches the filter, otherwise false */ + @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { // if either fails, exclude them both return (filterOut(first) || filterOut(second)); diff --git a/src/main/java/htsjdk/samtools/metrics/StringHeader.java b/src/main/java/htsjdk/samtools/metrics/StringHeader.java index ced159598..949dd4153 100644 --- a/src/main/java/htsjdk/samtools/metrics/StringHeader.java +++ b/src/main/java/htsjdk/samtools/metrics/StringHeader.java @@ -43,6 +43,7 @@ public StringHeader(String value) { setValue(value); } + @Override public void parse(String in) { value = in.trim(); } public String toString() { return value; } diff --git a/src/main/java/htsjdk/samtools/metrics/VersionHeader.java b/src/main/java/htsjdk/samtools/metrics/VersionHeader.java index ae0845502..82093aaa5 100644 --- a/src/main/java/htsjdk/samtools/metrics/VersionHeader.java +++ b/src/main/java/htsjdk/samtools/metrics/VersionHeader.java @@ -37,6 +37,7 @@ private String versionedItem; private String versionString; + @Override public void parse(String in) { String[] fields = in.split("\t"); this.versionedItem = fields[0]; diff --git a/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java index 86f11fead..736107bb1 100644 --- a/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java @@ -93,29 +93,16 @@ protected static Path findSequenceDictionary(final Path path) { if (path == null) { return null; } - // Try and locate the dictionary - Path dictionary = path.toAbsolutePath(); - Path dictionaryExt = path.toAbsolutePath(); - boolean fileTypeSupported = false; - for (final String extension : ReferenceSequenceFileFactory.FASTA_EXTENSIONS) { - String filename = dictionary.getFileName().toString(); - if (filename.endsWith(extension)) { - dictionaryExt = dictionary.resolveSibling(filename + IOUtil - .DICT_FILE_EXTENSION); - String filenameNoExt = filename.substring(0, filename.lastIndexOf(extension)); - dictionary = dictionary.resolveSibling(filenameNoExt+ IOUtil.DICT_FILE_EXTENSION); - fileTypeSupported = true; - break; - } - } - if (!fileTypeSupported) - throw new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath()); - - if (Files.exists(dictionary)) + // Try and locate the dictionary with the default method + final Path dictionary = ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(path); path.toAbsolutePath(); + if (Files.exists(dictionary)) { return dictionary; + } // try without removing the file extension - if (Files.exists(dictionaryExt)) + final Path dictionaryExt = path.resolveSibling(path.getFileName().toString() + IOUtil.DICT_FILE_EXTENSION); + if (Files.exists(dictionaryExt)) { return dictionaryExt; + } else return null; } @@ -128,6 +115,7 @@ protected Path getPath() { * Returns the list of sequence records associated with the reference sequence if found * otherwise null. */ + @Override public SAMSequenceDictionary getSequenceDictionary() { return this.sequenceDictionary; } @@ -143,14 +131,17 @@ public String toString() { } /** default implementation -- override if index is supported */ + @Override public boolean isIndexed() {return false;} /** default implementation -- override if index is supported */ + @Override public ReferenceSequence getSequence( String contig ) { throw new UnsupportedOperationException(); } /** default implementation -- override if index is supported */ + @Override public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) { throw new UnsupportedOperationException("Index does not appear to exist for " + getAbsolutePath() + ". samtools faidx can be used to create an index"); } diff --git a/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java index 72c0583bb..744d79773 100644 --- a/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java @@ -62,10 +62,12 @@ public FastaSequenceFile(final Path path, final boolean truncateNamesAtWhitespac /** * It's good to call this to free up memory. */ + @Override public void close() { in.close(); } + @Override public ReferenceSequence nextSequence() { this.sequenceIndex += 1; @@ -83,6 +85,7 @@ public ReferenceSequence nextSequence() { return new ReferenceSequence(name, this.sequenceIndex, bases); } + @Override public void reset() { this.sequenceIndex = -1; this.in.close(); diff --git a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java index e314fccbe..3668fe671 100644 --- a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java +++ b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java @@ -31,6 +31,9 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; +import java.nio.file.Files; import java.nio.file.Path; import java.util.Iterator; import java.util.LinkedHashMap; @@ -39,7 +42,7 @@ import java.util.regex.MatchResult; /** - * Reads a fasta index file (.fai), as generated by `samtools faidx`. + * Reads/writes a fasta index file (.fai), as generated by `samtools faidx`. */ public class FastaSequenceIndex implements Iterable { /** @@ -159,6 +162,27 @@ private void parseIndexFile(Path indexFile) { } /** + * Writes this index to the specified path. + * + * @param indexFile index file to output the index in the .fai format + * + * @throws IOException if an IO error occurs. + */ + public void write(final Path indexFile) throws IOException { + try (final PrintStream writer = new PrintStream(Files.newOutputStream(indexFile))) { + sequenceEntries.values().forEach(se -> + writer.println(String.join("\t", + se.getContig(), + String.valueOf(se.getSize()), + String.valueOf(se.getLocation()), + String.valueOf(se.getBasesPerLine()), + String.valueOf(se.getBytesPerLine())) + ) + ); + } + } + + /** * Does the given contig name have a corresponding entry? * @param contigName The contig name for which to search. * @return True if contig name is present; false otherwise. @@ -184,6 +208,7 @@ public FastaSequenceIndexEntry getIndexEntry( String contigName ) { * Creates an iterator which can iterate through all entries in a fasta index. * @return iterator over all fasta index entries. */ + @Override public Iterator iterator() { return sequenceEntries.values().iterator(); } diff --git a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexCreator.java b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexCreator.java new file mode 100644 index 000000000..ee425ffd5 --- /dev/null +++ b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexCreator.java @@ -0,0 +1,180 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2017 Daniel Gomez-Sanchez + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package htsjdk.samtools.reference; + +import htsjdk.samtools.SAMException; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.util.IOUtil; +import htsjdk.tribble.readers.AsciiLineReader; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +/** + * Static methods to create an {@link FastaSequenceIndex}. + * + * @author Daniel Gomez-Sanchez (magicDGS) + */ +public final class FastaSequenceIndexCreator { + + // cannot be instantiated because it is an utility class + private FastaSequenceIndexCreator() {} + + /** + * Creates a FASTA .fai index for the provided FASTA. + * + * @param fastaFile the file to build the index from. + * @param overwrite if the .fai index already exists override it if {@code true}; otherwise, throws a {@link SAMException}. + * + * @throws SAMException if the fai file already exists or the file is malformed. + * @throws IOException if an IO error occurs. + */ + public static void create(final Path fastaFile, final boolean overwrite) throws IOException { + // get the index to write the file in + final Path indexFile = ReferenceSequenceFileFactory.getFastaIndexFileName(fastaFile); + if (!overwrite && Files.exists(indexFile)) { + // throw an exception if the file already exists + throw new SAMException("Index file " + indexFile + " already exists for " + fastaFile); + } + // build the index + final FastaSequenceIndex index = buildFromFasta(fastaFile); + index.write(indexFile); + } + + /** + * Builds a FastaSequenceIndex on the fly from a FASTA file. + * + * Note: this also alows to create an index for a compressed file, but does not generate the + * .gzi index required for use it with samtools. + * + * @param fastaFile the FASTA file. + * + * @return a fai index. + * + * @throws SAMException for formatting errors. + * @throws IOException if an IO error occurs. + */ + public static FastaSequenceIndex buildFromFasta(final Path fastaFile) throws IOException { + try(final AsciiLineReader in = new AsciiLineReader(IOUtil.openFileForReading(fastaFile))) { + + // sanity check reference format: + // 1. Non-empty file + // 2. Header name starts with > + String previous = in.readLine(); + if (previous == null) { + throw new SAMException("Cannot index empty file: " + fastaFile); + } else if (previous.charAt(0) != '>') { + throw new SAMException("Wrong sequence header: " + previous); + } + + // initialize the sequence index + int sequenceIndex = -1; + // the location should be kept before iterating over the rest of the lines + long location = in.getPosition(); + + // initialize an empty index and the entry builder to null + final FastaSequenceIndex index = new FastaSequenceIndex(); + FaiEntryBuilder entry = null; + + // read the lines two by two + for (String line = in.readLine(); previous != null; line = in.readLine()) { + // in this case, the previous line contains a header and the current line the first sequence + if (previous.charAt(0) == '>') { + // first entry should be skipped; otherwise it should be added to the index + if (entry != null) index.add(entry.build()); + // creates a new entry (and update sequence index) + entry = new FaiEntryBuilder(sequenceIndex++, previous, line, in.getLineTerminatorLength(), location); + } else if (line != null && line.charAt(0) == '>') { + // update the location, next iteration the sequence will be handled + location = in.getPosition(); + } else if (line != null && !line.isEmpty()) { + // update in case it is not a blank-line + entry.updateWithSequence(line, in.getLineTerminatorLength()); + } + // set the previous to the current line + previous = line; + } + // add the last entry + index.add(entry.build()); + + // and return the index + return index; + } + } + + // utility class for building the FastaSequenceIndexEntry + private static class FaiEntryBuilder { + private final int index; + private final String contig; + private final long location; + // the bytes per line is the bases per line plus the length of the end of the line + private final int basesPerLine; + private final int endOfLineLength; + + // the size is updated for each line in the input using updateWithSequence + private long size; + // flag to check if the supposedly last line was already reached + private boolean lessBasesFound; + + private FaiEntryBuilder(final int index, final String header, final String firstSequenceLine, final int endOfLineLength, final long location) { + if (header == null || header.charAt(0) != '>') { + throw new SAMException("Wrong sequence header: " + header); + } else if (firstSequenceLine == null) { + throw new SAMException("Empty sequences could not be indexed"); + } + this.index = index; + // parse the contig name (without the starting '>' and truncating white-spaces) + this.contig = SAMSequenceRecord.truncateSequenceName(header.substring(1).trim()); + this.location = location; + this.basesPerLine = firstSequenceLine.length(); + this.endOfLineLength = endOfLineLength; + this.size = firstSequenceLine.length(); + this.lessBasesFound = false; + } + + private void updateWithSequence(final String sequence, final int endOfLineLength) { + if (this.endOfLineLength != endOfLineLength) { + throw new SAMException(String.format("Different end of line for the same sequence was found.")); + } + if (sequence.length() > basesPerLine) { + throw new SAMException(String.format("Sequence line for {} was longer than the expected length ({}): {}", + contig, basesPerLine, sequence)); + } else if (sequence.length() < basesPerLine) { + if (lessBasesFound) { + throw new SAMException(String.format("Only last line could have less than {} bases for '{}' sequence, but at least two are different. Last sequence line: {}", + basesPerLine, contig, sequence)); + } + lessBasesFound = true; + } + // update size + this.size += sequence.length(); + } + + private FastaSequenceIndexEntry build() { + return new FastaSequenceIndexEntry(contig, location, size, basesPerLine, basesPerLine + endOfLineLength, index); + } + } +} diff --git a/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java index 60cc3b1b7..5c318782e 100644 --- a/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java @@ -111,6 +111,7 @@ public IndexedFastaSequenceFile(final Path path) throws FileNotFoundException { this(path, new FastaSequenceIndex((findRequiredFastaIndexFile(path)))); } + @Override public boolean isIndexed() {return true;} private static File findFastaIndex(File fastaFile) { @@ -135,18 +136,14 @@ public static boolean canCreateIndexedFastaReader(final File fastaFile) { } private static Path findFastaIndex(Path fastaFile) { - Path indexFile = getFastaIndexFileName(fastaFile); + Path indexFile = ReferenceSequenceFileFactory.getFastaIndexFileName(fastaFile); if (!Files.exists(indexFile)) return null; return indexFile; } - private static Path getFastaIndexFileName(Path fastaFile) { - return fastaFile.resolveSibling(fastaFile.getFileName() + ".fai"); - } - private static Path findRequiredFastaIndexFile(Path fastaFile) throws FileNotFoundException { Path ret = findFastaIndex(fastaFile); - if (ret == null) throw new FileNotFoundException(getFastaIndexFileName(fastaFile) + " not found."); + if (ret == null) throw new FileNotFoundException(ReferenceSequenceFileFactory.getFastaIndexFileName(fastaFile) + " not found."); return ret; } @@ -190,6 +187,7 @@ protected static void sanityCheckDictionaryAgainstIndex(final String fastaFile, * Retrieves the sequence dictionary for the fasta file. * @return sequence dictionary of the fasta. */ + @Override public SAMSequenceDictionary getSequenceDictionary() { return sequenceDictionary; } @@ -199,6 +197,7 @@ public SAMSequenceDictionary getSequenceDictionary() { * @param contig contig whose data should be returned. * @return The full sequence associated with this contig. */ + @Override public ReferenceSequence getSequence( String contig ) { return getSubsequenceAt( contig, 1, (int)index.getIndexEntry(contig).getSize() ); } @@ -210,6 +209,7 @@ public ReferenceSequence getSequence( String contig ) { * @param stop inclusive, 1-based stop of region. * @return The partial reference sequence associated with this range. */ + @Override public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) { if(start > stop + 1) throw new SAMException(String.format("Malformed query; start point %d lies after end point %d",start,stop)); @@ -300,6 +300,7 @@ private static int readFromPosition(final SeekableByteChannel channel, final Byt * Gets the next sequence if available, or null if not present. * @return next sequence if available, or null if not present. */ + @Override public ReferenceSequence nextSequence() { if( !indexIterator.hasNext() ) return null; @@ -309,6 +310,7 @@ public ReferenceSequence nextSequence() { /** * Reset the iterator over the index. */ + @Override public void reset() { indexIterator = index.iterator(); } diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFile.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFile.java index e7d3c288c..49f526cbc 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFile.java @@ -86,5 +86,6 @@ */ public String toString(); + @Override public void close() throws IOException; } diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java index 5978072d7..654706819 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java @@ -24,6 +24,8 @@ package htsjdk.samtools.reference; +import htsjdk.samtools.util.IOUtil; + import java.io.File; import java.io.FileNotFoundException; import java.nio.file.Path; @@ -113,24 +115,61 @@ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, fi * @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader */ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) { - final String name = path.getFileName().toString(); - for (final String ext : FASTA_EXTENSIONS) { - if (name.endsWith(ext)) { - // Using faidx requires truncateNamesAtWhitespace - if (truncateNamesAtWhitespace && preferIndexed && IndexedFastaSequenceFile.canCreateIndexedFastaReader(path)) { - try { - return new IndexedFastaSequenceFile(path); - } - catch (final FileNotFoundException e) { - throw new IllegalStateException("Should never happen, because existence of files has been checked.", e); - } - } - else { - return new FastaSequenceFile(path, truncateNamesAtWhitespace); - } + // this should thrown an exception if the fasta file is not supported + getFastaExtension(path); + // Using faidx requires truncateNamesAtWhitespace + if (truncateNamesAtWhitespace && preferIndexed && IndexedFastaSequenceFile.canCreateIndexedFastaReader(path)) { + try { + return new IndexedFastaSequenceFile(path); } + catch (final FileNotFoundException e) { + throw new IllegalStateException("Should never happen, because existence of files has been checked.", e); + } + } else { + return new FastaSequenceFile(path, truncateNamesAtWhitespace); } + } + + /** + * Returns the default dictionary name for a FASTA file. + * + * @param file the reference sequence file on disk. + */ + public static File getDefaultDictionaryForReferenceSequence(final File file) { + return getDefaultDictionaryForReferenceSequence(file.toPath()).toFile(); + } + + /** + * Returns the default dictionary name for a FASTA file. + * + * @param path the reference sequence file path. + */ + public static Path getDefaultDictionaryForReferenceSequence(final Path path) { + final String name = path.getFileName().toString(); + final int extensionIndex = name.length() - getFastaExtension(path).length(); + return path.resolveSibling(name.substring(0, extensionIndex) + IOUtil.DICT_FILE_EXTENSION); + } - throw new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath()); + /** + * Returns the FASTA extension for the path. + * + * @param path the reference sequence file path. + * + * @throws IllegalArgumentException if the file is not a supported reference file. + */ + public static String getFastaExtension(final Path path) { + final String name = path.getFileName().toString(); + return FASTA_EXTENSIONS.stream().filter(name::endsWith).findFirst() + .orElseGet(() -> {throw new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath());}); } + + /** + * Returns the index name for a FASTA file. + * + * @param fastaFile the reference sequence file path. + */ + public static Path getFastaIndexFileName(Path fastaFile) { + return fastaFile.resolveSibling(fastaFile.getFileName() + ".fai"); + } + } diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java index d66f0f870..936f14c86 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java @@ -1,7 +1,7 @@ /* * The MIT License * - * Copyright (c) 2009 The Broad Institute + * Copyright (c) 2009-2016 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -83,7 +83,7 @@ public ReferenceSequence get(final int sequenceIndex) { } referenceSequence = null; - if(referenceSequenceFile.isIndexed()) { + if(referenceSequenceFile.isIndexed() && referenceSequenceFile.getSequenceDictionary() != null) { final SAMSequenceRecord samSequenceRecord = referenceSequenceFile.getSequenceDictionary().getSequence(sequenceIndex); if(samSequenceRecord != null) { referenceSequence = referenceSequenceFile.getSequence(samSequenceRecord.getSequenceName()) ; @@ -105,6 +105,7 @@ public SAMSequenceDictionary getSequenceDictionary() { return referenceSequenceFile.getSequenceDictionary(); } + @Override public void close() throws IOException { referenceSequenceFile.close(); } diff --git a/src/main/java/htsjdk/samtools/seekablestream/ByteArraySeekableStream.java b/src/main/java/htsjdk/samtools/seekablestream/ByteArraySeekableStream.java index 4f8c322c5..bb3b95af0 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/ByteArraySeekableStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/ByteArraySeekableStream.java @@ -1,7 +1,7 @@ /* * The MIT License * - * Copyright (c) 2016 The Broad Institute + * Copyright (c) 2015 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -24,13 +24,11 @@ package htsjdk.samtools.seekablestream; -import htsjdk.samtools.seekablestream.SeekableStream; - import java.io.IOException; /** -* Created by vadim on 23/03/2015. -*/ + * Created by vadim on 23/03/2015. + */ public class ByteArraySeekableStream extends SeekableStream { private byte[] bytes; private long position = 0; @@ -51,21 +49,27 @@ public long position() throws IOException { @Override public void seek(long position) throws IOException { - this.position = position; + if (position < 0) { + throw new IllegalArgumentException("Cannot seek to a negative position, position=" + position + "."); + } else { + this.position = position; + } } @Override public int read() throws IOException { - if (position < bytes.length) + if (position < bytes.length) { return 0xFF & bytes[((int) position++)]; - else return -1; + } else { + return -1; + } } @Override public int read(byte[] b, int off, int len) throws IOException { if (b == null) { throw new NullPointerException(); - } else if (off < 0 || len < 0 || len > b.length - off) { + } else if (off < 0 || len < 0 || len + off > b.length) { throw new IndexOutOfBoundsException(); } if (position >= bytes.length) { @@ -85,6 +89,7 @@ public int read(byte[] b, int off, int len) throws IOException { @Override public void close() throws IOException { bytes = null; + position = -1; } @Override diff --git a/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java b/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java index dff28b0a0..71807cf46 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java +++ b/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java @@ -2,6 +2,8 @@ import java.io.IOException; import java.net.URL; +import java.nio.channels.SeekableByteChannel; +import java.util.function.Function; /** * Factory for creating {@link SeekableStream}s based on URLs/paths. @@ -30,4 +32,24 @@ * @return */ public SeekableStream getBufferedStream(SeekableStream stream, int bufferSize); + + /** + * Open a stream from the input path, applying the wrapper to the stream. + * + * The wrapper allows applying operations directly to the byte stream so that things like caching, prefetching, or decryption + * can be done at the raw byte level. + * + * The default implementation throws if wrapper != null, but implementations may support this wrapping operation + * + * @param path a uri like String representing a resource to open + * @param wrapper a wrapper to apply to the stream + * @return a stream opened path + */ + default SeekableStream getStreamFor(String path, Function wrapper) throws IOException { + if(wrapper != null) { + throw new UnsupportedOperationException("This factory doesn't support adding wrappers"); + } else { + return this.getStreamFor(path); + } + } } diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableBufferedStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableBufferedStream.java index 56b4d0c9c..0c89b0166 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableBufferedStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableBufferedStream.java @@ -67,6 +67,7 @@ public SeekableBufferedStream(final SeekableStream stream) { this(stream, DEFAULT_BUFFER_SIZE); } + @Override public long length() { return wrappedStream.length(); } @@ -84,18 +85,21 @@ public long skip(final long skipLength) throws IOException { } } + @Override public void seek(final long position) throws IOException { this.position = position; wrappedStream.seek(position); bufferedStream = new ExtBufferedInputStream(wrappedStream, bufferSize); } + @Override public int read() throws IOException { int b = bufferedStream.read(); position++; return b; } + @Override public int read(final byte[] buffer, final int offset, final int length) throws IOException { int nBytesRead = bufferedStream.read(buffer, offset, length); if (nBytesRead > 0) { @@ -112,10 +116,12 @@ public int read(final byte[] buffer, final int offset, final int length) throws return nBytesRead; } + @Override public void close() throws IOException { wrappedStream.close(); } + @Override public boolean eof() throws IOException { return position >= wrappedStream.length(); } diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStream.java index 0a64a7c09..1723747d5 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStream.java @@ -39,10 +39,12 @@ public SeekableFTPStream(URL url, UserPasswordInput userPasswordInput) throws IO helper = new SeekableFTPStreamHelper(url, userPasswordInput); } + @Override public void seek(long position) { helper.seek(position); } + @Override public long position() { return helper.position(); } @@ -75,10 +77,12 @@ public int read(byte[] buffer, int offset, int len) throws IOException { } + @Override public void close() throws IOException { helper.close(); } + @Override public int read() throws IOException { return helper.read(); } diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableFileStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableFileStream.java index 38191d769..b790732a9 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableFileStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableFileStream.java @@ -48,18 +48,22 @@ public SeekableFileStream(final File file) throws FileNotFoundException { allInstances.add(this); } + @Override public long length() { return file.length(); } + @Override public boolean eof() throws IOException { return fis.length() == fis.getFilePointer(); } + @Override public void seek(final long position) throws IOException { fis.seek(position); } + @Override public long position() throws IOException { return fis.getChannel().position(); } @@ -71,6 +75,7 @@ public long skip(long n) throws IOException { return position() - initPos; } + @Override public int read(final byte[] buffer, final int offset, final int length) throws IOException { if (length < 0) { throw new IndexOutOfBoundsException(); @@ -91,6 +96,7 @@ public int read(final byte[] buffer, final int offset, final int length) throws } + @Override public int read() throws IOException { return fis.read(); } @@ -106,6 +112,7 @@ public String getSource() { } + @Override public void close() throws IOException { allInstances.remove(this); fis.close(); diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableHTTPStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableHTTPStream.java index 4a864b77e..640a14d98 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableHTTPStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableHTTPStream.java @@ -67,10 +67,12 @@ public SeekableHTTPStream(final URL url, Proxy proxy) { } + @Override public long position() { return position; } + @Override public long length() { return contentLength; } @@ -82,14 +84,17 @@ public long skip(long n) throws IOException { return bytesToSkip; } + @Override public boolean eof() throws IOException { return contentLength > 0 && position >= contentLength; } + @Override public void seek(final long position) { this.position = position; } + @Override public int read(byte[] buffer, int offset, int len) throws IOException { if (offset < 0 || len < 0 || (offset + len) > buffer.length) { @@ -168,11 +173,13 @@ public int read(byte[] buffer, int offset, int len) throws IOException { } + @Override public void close() throws IOException { // Nothing to do } + @Override public int read() throws IOException { byte []tmp=new byte[1]; read(tmp,0,1); diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekablePathStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekablePathStream.java index 18a41e7a2..cb88512df 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekablePathStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekablePathStream.java @@ -9,6 +9,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashSet; +import java.util.function.Function; /** * An implementation of {@link SeekableStream} for {@link Path}. @@ -29,8 +30,16 @@ private final ByteBuffer oneByteBuf = ByteBuffer.allocate(1); public SeekablePathStream(final Path path) throws IOException { + this(path, null); + } + + public SeekablePathStream(final Path path, Function wrapper) throws IOException { this.path = path; - this.sbc = Files.newByteChannel(path); + if (null==wrapper) { + this.sbc = Files.newByteChannel(path); + } else { + this.sbc = wrapper.apply(Files.newByteChannel(path)); + } ALL_INSTANCES.add(this); } diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableStream.java index 673f08c48..45f699043 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableStream.java @@ -35,8 +35,10 @@ public abstract void seek(long position) throws IOException; + @Override public abstract int read(byte[] buffer, int offset, int length) throws IOException; + @Override public abstract void close() throws IOException; public abstract boolean eof() throws IOException; diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java index fe8f42a56..19d40e5e3 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java @@ -23,9 +23,12 @@ */ package htsjdk.samtools.seekablestream; +import htsjdk.samtools.util.IOUtil; import java.io.File; import java.io.IOException; import java.net.URL; +import java.nio.channels.SeekableByteChannel; +import java.util.function.Function; /** * Singleton class for getting {@link SeekableStream}s from URL/paths @@ -64,11 +67,27 @@ public static boolean isFilePath(final String path) { private static class DefaultSeekableStreamFactory implements ISeekableStreamFactory { + @Override public SeekableStream getStreamFor(final URL url) throws IOException { return getStreamFor(url.toExternalForm()); } + @Override public SeekableStream getStreamFor(final String path) throws IOException { + return getStreamFor(path, null); + } + + /** + * The wrapper will only be applied to the stream if the stream is treated as a {@link java.nio.file.Path} + * + * This currently means any uri with a scheme that is not http, https, ftp, or file will have the wrapper applied to it + * + * @param path a uri like String representing a resource to open + * @param wrapper a wrapper to apply to the stream allowing direct transformations on the byte stream to be applied + */ + @Override + public SeekableStream getStreamFor(final String path, + Function wrapper) throws IOException { // todo -- add support for SeekableBlockInputStream if (path.startsWith("http:") || path.startsWith("https:")) { @@ -78,15 +97,19 @@ public SeekableStream getStreamFor(final String path) throws IOException { return new SeekableFTPStream(new URL(path)); } else if (path.startsWith("file:")) { return new SeekableFileStream(new File(new URL(path).getPath())); + } else if (IOUtil.hasScheme(path)) { + return new SeekablePathStream(IOUtil.getPath(path), wrapper); } else { return new SeekableFileStream(new File(path)); } } + @Override public SeekableStream getBufferedStream(SeekableStream stream){ return getBufferedStream(stream, SeekableBufferedStream.DEFAULT_BUFFER_SIZE); } + @Override public SeekableStream getBufferedStream(SeekableStream stream, int bufferSize){ if (bufferSize == 0) return stream; else return new SeekableBufferedStream(stream, bufferSize); diff --git a/src/main/java/htsjdk/samtools/sra/SRALazyRecord.java b/src/main/java/htsjdk/samtools/sra/SRALazyRecord.java index 4391857e6..c5067116e 100644 --- a/src/main/java/htsjdk/samtools/sra/SRALazyRecord.java +++ b/src/main/java/htsjdk/samtools/sra/SRALazyRecord.java @@ -675,6 +675,7 @@ protected SAMBinaryTagAndValue getBinaryAttributes() { return super.getBinaryAttributes(); } + @Override public boolean isUnsignedArrayAttribute(final String tag) { Short binaryTag = SAMTagUtil.getSingleton().makeBinaryTag(tag); LazyAttribute attr = lazyAttributeTags.get(binaryTag); diff --git a/src/main/java/htsjdk/samtools/util/AbstractAsyncWriter.java b/src/main/java/htsjdk/samtools/util/AbstractAsyncWriter.java index ef1803bce..f69578c07 100644 --- a/src/main/java/htsjdk/samtools/util/AbstractAsyncWriter.java +++ b/src/main/java/htsjdk/samtools/util/AbstractAsyncWriter.java @@ -66,6 +66,7 @@ public void write(final T item) { * Attempts to finish draining the queue and then calls synchronouslyClose() to allow implementation * to do any one time clean up. */ + @Override public void close() { checkAndRethrow(); @@ -110,6 +111,7 @@ private final void checkAndRethrow() { * synchronous writer. */ private class WriterRunnable implements Runnable { + @Override public void run() { try { //The order of the two conditions is important, see https://github.com/samtools/htsjdk/issues/564 diff --git a/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java b/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java index 4e020071d..d699dce8f 100644 --- a/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java +++ b/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java @@ -83,6 +83,7 @@ public void add(E recordAndOffset) { /** * @return the index of reference sequence */ + @Override public int getSequenceIndex() { return referenceSequence.getSequenceIndex(); } @@ -90,6 +91,7 @@ public int getSequenceIndex() { /** * @return 1-based reference position */ + @Override public int getPosition() { return position; } diff --git a/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java b/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java index 6ff8e835c..e35087405 100644 --- a/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java +++ b/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java @@ -182,6 +182,7 @@ public AbstractLocusIterator(final SamReader samReader, final IntervalList inter * @return iterator over all/all covered locus position in reference according to emitUncoveredLoci * value. */ + @Override public Iterator iterator() { if (samIterator != null) { throw new IllegalStateException("Cannot call iterator() more than once on " + this.getClass().getSimpleName()); @@ -202,6 +203,7 @@ public AbstractLocusIterator(final SamReader samReader, final IntervalList inter /** * Closes inner SamIterator. */ + @Override public void close() { this.samIterator.close(); } @@ -216,6 +218,7 @@ private boolean samHasMore() { * 2) there are AbstractLocusInfos in some stage of accumulation * 3) there are loci in the target mask that have yet to be accumulated (even if there are no reads covering them) */ + @Override public boolean hasNext() { if (this.samIterator == null) { iterator(); @@ -253,6 +256,7 @@ private boolean hasRemainingMaskBases() { * * @return information about next locus position in reference sequence */ + @Override public K next() { // if we don't have any completed entries to return, try and make some! while (complete.isEmpty() && samHasMore()) { @@ -475,6 +479,7 @@ protected SAMSequenceRecord getReferenceSequence(final int referenceSequenceInde return samReader.getFileHeader().getSequence(referenceSequenceIndex); } + @Override public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } diff --git a/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java b/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java index 28b9d34b3..e76b66683 100644 --- a/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java +++ b/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java @@ -49,16 +49,6 @@ /** * @param record inner SAMRecord * @param offset from the start of the read - * @param length of alignment block - * @param refPos corresponding to read offset reference position - */ - public AbstractRecordAndOffset(final SAMRecord record, final int offset, int length, int refPos) { - this(record, offset); - } - - /** - * @param record inner SAMRecord - * @param offset from the start of the read */ public AbstractRecordAndOffset(final SAMRecord record, final int offset) { this.offset = offset; @@ -94,13 +84,6 @@ public int getLength() { } /** - * @return the position in reference sequence, to which the start of alignment block is aligned. - */ - public int getRefPos() { - return -1; - } - - /** * @return read name of inner SAMRecord. */ public String getReadName() { diff --git a/src/main/java/htsjdk/samtools/util/AsciiWriter.java b/src/main/java/htsjdk/samtools/util/AsciiWriter.java index 00c6f7f1f..50b08d844 100644 --- a/src/main/java/htsjdk/samtools/util/AsciiWriter.java +++ b/src/main/java/htsjdk/samtools/util/AsciiWriter.java @@ -50,6 +50,7 @@ public AsciiWriter(final OutputStream os) { /** * flushes and closes underlying OutputStream. */ + @Override public void close() throws IOException { flush(); os.close(); @@ -58,6 +59,7 @@ public void close() throws IOException { /** * flushes underlying OutputStream */ + @Override public void flush() throws IOException { os.write(buffer, 0, numBytes); numBytes = 0; @@ -67,6 +69,7 @@ public void flush() throws IOException { /** * All other Writer methods vector through this, so this is the only one that must be overridden. */ + @Override public void write(final char[] chars, int offset, int length) throws IOException { while (length > 0) { final int charsToConvert = Math.min(length, buffer.length - numBytes); diff --git a/src/main/java/htsjdk/samtools/util/AsyncBlockCompressedInputStream.java b/src/main/java/htsjdk/samtools/util/AsyncBlockCompressedInputStream.java new file mode 100644 index 000000000..4f71ef581 --- /dev/null +++ b/src/main/java/htsjdk/samtools/util/AsyncBlockCompressedInputStream.java @@ -0,0 +1,234 @@ +/* + * The MIT License + * + * Copyright (c) 2016 Daniel Cameron + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.util; + + +import htsjdk.samtools.Defaults; +import htsjdk.samtools.seekablestream.SeekableStream; +import htsjdk.samtools.util.zip.InflaterFactory; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.concurrent.Semaphore; +import java.util.concurrent.ThreadFactory; + +/** + * Asynchronous read-ahead implementation of {@link htsjdk.samtools.util.BlockCompressedInputStream}. + * + * Note that this implementation is not synchronized. If multiple threads access an instance concurrently, it must be synchronized externally. + */ +public class AsyncBlockCompressedInputStream extends BlockCompressedInputStream { + private static final int READ_AHEAD_BUFFERS = (int)Math.ceil(Defaults.NON_ZERO_BUFFER_SIZE / BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE); + private static final Executor threadpool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors(),new ThreadFactory() { + @Override + public Thread newThread(Runnable r) { + Thread t = Executors.defaultThreadFactory().newThread(r); + t.setDaemon(true); + return t; + } + }); + /** + * Next blocks (in stream order) that have already been decompressed. + */ + private final BlockingQueue mResult = new ArrayBlockingQueue<>(READ_AHEAD_BUFFERS); + /** + * Buffers used to decompress previous blocks that are no longer in use. + * These buffers are reused if possible. + * Note that no blocking occurs on this buffer and a blocking queue is used purely + * because it is a base library synchronized queue implementation + * (and Collections.synchronizedQueue() does not exist). + */ + private final BlockingQueue freeBuffers = new ArrayBlockingQueue<>(READ_AHEAD_BUFFERS); + /** + * Indicates whether a read-ahead task has been scheduled to run. Only one read-ahead task + * per stream can be scheduled at any one time. + */ + private final Semaphore running = new Semaphore(1); + /** + * Indicates whether any scheduled task should abort processing and terminate + * as soon as possible since the result will be discarded anyway. + */ + private volatile boolean mAbort = false; + + public AsyncBlockCompressedInputStream(final InputStream stream) { + super(stream, true); + } + + public AsyncBlockCompressedInputStream(final InputStream stream, InflaterFactory inflaterFactory) { + super(stream, true, inflaterFactory); + } + + public AsyncBlockCompressedInputStream(final File file) + throws IOException { + super(file); + } + + public AsyncBlockCompressedInputStream(final File file, InflaterFactory inflaterFactory) + throws IOException { + super(file, inflaterFactory); + } + + public AsyncBlockCompressedInputStream(final URL url) { + super(url); + } + + public AsyncBlockCompressedInputStream(final URL url, InflaterFactory inflaterFactory) { + super(url, inflaterFactory); + } + + public AsyncBlockCompressedInputStream(final SeekableStream strm) { + super(strm); + } + + public AsyncBlockCompressedInputStream(final SeekableStream strm, InflaterFactory inflaterFactory) { + super(strm, inflaterFactory); + } + + @Override + protected DecompressedBlock nextBlock(byte[] bufferAvailableForReuse) { + if (bufferAvailableForReuse != null) { + freeBuffers.offer(bufferAvailableForReuse); + } + return nextBlockSync(); + } + + @Override + protected void prepareForSeek() { + flushReadAhead(); + super.prepareForSeek(); + } + + @Override + public void close() throws IOException { + // Suppress interrupts while we close. + final boolean isInterrupted = Thread.interrupted(); + mAbort = true; + try { + flushReadAhead(); + super.close(); + } finally { + if (isInterrupted) Thread.currentThread().interrupt(); + } + } + /** + * Foreground thread blocking operation that aborts all read-ahead tasks + * and flushes all read-ahead results. + */ + private void flushReadAhead() { + final boolean abortStatus = mAbort; + mAbort = true; + try { + // block until the thread pool operation has completed + running.acquire(); + } catch (InterruptedException e) { + throw new RuntimeException("Interrupted waiting for decompression thread", e); + } + // flush any read-ahead results + mResult.clear(); + mAbort = abortStatus; + running.release(); + } + /** + * Ensures that a read-ahead task for this stream exists in the thread pool. + */ + private void ensureReadAhead() { + if (running.tryAcquire()) { + tryQueueTask(); + } + } + /** + * Try to queue another read-ahead buffer + * This method should only be invoked by the owner of the running semaphore + */ + private void tryQueueTask() { + if (mAbort) { + // Potential deadlock between getNextBlock() and flushReadAhead() here + // This requires seek()/close() and another method to be called + // at the same time. Since the parent class is not thread-safe + // this is an acceptable behavior. + running.release(); + return; + } + if (mResult.remainingCapacity() == 0) { + // read-ahead has already filled the results buffer + running.release(); + if (mResult.remainingCapacity() > 0) { + // race condition this second check fixes: + // - worker thread context switch after checking remaining capacity is zero + // - foreground thread calls getNextBlock() repeatedly until blocking + // - worker thread switches back in and releases mutex + // = foreground blocking on mResult.take(), mutex free, no worker + // -> try to take back mutex and start worker + // if that fails, the someone else took the lock and would + // have started the background worker. (except if flushReadAhead() + // took the lock with getNextBlock() still blocking: not thread-safe + // so we don't care) + ensureReadAhead(); + return; + } else { + return; + } + } + // we are able to perform a read-ahead operation + // ownership of the running mutex is now with the threadpool task + threadpool.execute(new AsyncBlockCompressedInputStreamRunnable()); + } + /** + * Foreground thread blocking operation that retrieves the next read-ahead buffer. + * Lazy initiation of read-ahead is performed if required. + * @return next decompressed block in input stream + */ + private DecompressedBlock nextBlockSync() { + ensureReadAhead(); + DecompressedBlock nextBlock; + try { + nextBlock = mResult.take(); + } catch (InterruptedException e) { + return new DecompressedBlock(0, 0, e); + } + ensureReadAhead(); + return nextBlock; + } + private class AsyncBlockCompressedInputStreamRunnable implements Runnable { + /** + * Thread pool operation that fills the read-ahead queue + */ + @Override + public void run() { + final DecompressedBlock decompressed = processNextBlock(freeBuffers.poll()); + if (!mResult.offer(decompressed)) { + // offer should never block since we never queue a task when the results buffer is full + running.release(); // safety release to ensure foreground close() does not block indefinitely + throw new IllegalStateException("Decompression buffer full"); + } + tryQueueTask(); + } + } +} diff --git a/src/main/java/htsjdk/samtools/util/BinaryCodec.java b/src/main/java/htsjdk/samtools/util/BinaryCodec.java index 8933ee35d..fdef93196 100644 --- a/src/main/java/htsjdk/samtools/util/BinaryCodec.java +++ b/src/main/java/htsjdk/samtools/util/BinaryCodec.java @@ -587,6 +587,7 @@ public long readUInt() { /** * Close the appropriate stream */ + @Override public void close() { try { if (this.isWriting) { diff --git a/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java b/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java index b0ac0018e..622ca67ac 100755 --- a/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java +++ b/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java @@ -30,22 +30,25 @@ import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.seekablestream.SeekableHTTPStream; import htsjdk.samtools.seekablestream.SeekableStream; +import htsjdk.samtools.util.zip.InflaterFactory; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.RandomAccessFile; +import java.io.*; import java.net.URL; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.Arrays; -/* +/** * Utility class for reading BGZF block compressed files. The caller can treat this file like any other InputStream. * It probably is not necessary to wrap this stream in a buffering stream, because there is internal buffering. * The advantage of BGZF over conventional GZip format is that BGZF allows for seeking without having to read the - * entire file up to the location being sought. Note that seeking is only possible if the ctor(File) is used. + * entire file up to the location being sought. Note that seeking is only possible if the input stream is seekable. + * + * Note that this implementation is not synchronized. If multiple threads access an instance concurrently, it must be synchronized externally. * * c.f. http://samtools.sourceforge.net/SAM1.pdf for details of BGZF format */ @@ -54,30 +57,52 @@ public final static String INCORRECT_HEADER_SIZE_MSG = "Incorrect header size for file: "; public final static String UNEXPECTED_BLOCK_LENGTH_MSG = "Unexpected compressed block length: "; public final static String PREMATURE_END_MSG = "Premature end of file: "; - public final static String CANNOT_SEEK_STREAM_MSG = "Cannot seek on stream based file "; + public final static String CANNOT_SEEK_STREAM_MSG = "Cannot seek a position for a non-file stream"; + public final static String CANNOT_SEEK_CLOSED_STREAM_MSG = "Cannot seek a position for a closed stream"; public final static String INVALID_FILE_PTR_MSG = "Invalid file pointer: "; private InputStream mStream = null; + private boolean mIsClosed = false; private SeekableStream mFile = null; private byte[] mFileBuffer = null; - private byte[] mCurrentBlock = null; + private DecompressedBlock mCurrentBlock = null; private int mCurrentOffset = 0; - private long mBlockAddress = 0; - private int mLastBlockLength = 0; - private final BlockGunzipper blockGunzipper = new BlockGunzipper(); - + private long mStreamOffset = 0; + private final BlockGunzipper blockGunzipper; /** * Note that seek() is not supported if this ctor is used. + * @param stream source of bytes */ public BlockCompressedInputStream(final InputStream stream) { - this(stream, true); + this(stream, true, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Note that seek() is not supported if this ctor is used. + * @param stream source of bytes + * @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper} + */ + public BlockCompressedInputStream(final InputStream stream, final InflaterFactory inflaterFactory) { + this(stream, true, inflaterFactory); } /** * Note that seek() is not supported if this ctor is used. + * @param stream source of bytes + * @param allowBuffering if true, allow buffering */ public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering) { + this(stream, allowBuffering, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Note that seek() is not supported if this ctor is used. + * @param stream source of bytes + * @param allowBuffering if true, allow buffering + * @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper} + */ + public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering, final InflaterFactory inflaterFactory) { if (allowBuffering) { mStream = IOUtil.toBufferedStream(stream); } @@ -86,30 +111,68 @@ public BlockCompressedInputStream(final InputStream stream, final boolean allowB } mFile = null; + blockGunzipper = new BlockGunzipper(inflaterFactory); } /** * Use this ctor if you wish to call seek() + * @param file source of bytes + * @throws IOException */ public BlockCompressedInputStream(final File file) throws IOException { + this(file, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Use this ctor if you wish to call seek() + * @param file source of bytes + * @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper} + * @throws IOException + */ + public BlockCompressedInputStream(final File file, final InflaterFactory inflaterFactory) throws IOException { mFile = new SeekableFileStream(file); mStream = null; - + blockGunzipper = new BlockGunzipper(inflaterFactory); } + /** + * @param url source of bytes + */ public BlockCompressedInputStream(final URL url) { + this(url, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * @param url source of bytes + * @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper} + */ + public BlockCompressedInputStream(final URL url, final InflaterFactory inflaterFactory) { mFile = new SeekableBufferedStream(new SeekableHTTPStream(url)); mStream = null; + blockGunzipper = new BlockGunzipper(inflaterFactory); } /** * For providing some arbitrary data source. No additional buffering is * provided, so if the underlying source is not buffered, wrap it in a * SeekableBufferedStream before passing to this ctor. + * @param strm source of bytes */ public BlockCompressedInputStream(final SeekableStream strm) { + this(strm, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * For providing some arbitrary data source. No additional buffering is + * provided, so if the underlying source is not buffered, wrap it in a + * SeekableBufferedStream before passing to this ctor. + * @param strm source of bytes + * @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper} + */ + public BlockCompressedInputStream(final SeekableStream strm, final InflaterFactory inflaterFactory) { mFile = strm; mStream = null; + blockGunzipper = new BlockGunzipper(inflaterFactory); } /** @@ -127,14 +190,15 @@ public void setCheckCrcs(final boolean check) { * Note that although the next caller can read this many bytes without blocking, the available() method call itself * may block in order to fill an internal buffer if it has been exhausted. */ + @Override public int available() throws IOException { - if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.length) { + if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.mBlock.length) { readBlock(); } if (mCurrentBlock == null) { return 0; } - return mCurrentBlock.length - mCurrentOffset; + return mCurrentBlock.mBlock.length - mCurrentOffset; } /** @@ -142,12 +206,13 @@ public int available() throws IOException { * false otherwise. */ public boolean endOfBlock() { - return (mCurrentBlock != null && mCurrentOffset == mCurrentBlock.length); + return (mCurrentBlock != null && mCurrentOffset == mCurrentBlock.mBlock.length); } /** * Closes the underlying InputStream or RandomAccessFile */ + @Override public void close() throws IOException { if (mFile != null) { mFile.close(); @@ -159,6 +224,9 @@ public void close() throws IOException { // Encourage garbage collection mFileBuffer = null; mCurrentBlock = null; + + // Mark as closed + mIsClosed = true; } /** @@ -168,8 +236,9 @@ public void close() throws IOException { * @return the next byte of data, or -1 if the end of the stream is reached. */ + @Override public int read() throws IOException { - return (available() > 0) ? (mCurrentBlock[mCurrentOffset++] & 0xFF) : -1; + return (available() > 0) ? (mCurrentBlock.mBlock[mCurrentOffset++] & 0xFF) : -1; } /** @@ -183,6 +252,7 @@ public int read() throws IOException { * @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of * the stream has been reached. */ + @Override public int read(final byte[] buffer) throws IOException { return read(buffer, 0, buffer.length); } @@ -199,48 +269,47 @@ public int read(final byte[] buffer) throws IOException { * character, or null if the end of the stream has been reached * * @exception IOException If an I/O error occurs - * @ */ public String readLine() throws IOException { - int available = available(); + int available = available(); if (available == 0) { return null; } if(null == buf){ // lazy initialisation - buf = new ByteArrayOutputStream(8192); + buf = new ByteArrayOutputStream(8192); } buf.reset(); - boolean done = false; - boolean foundCr = false; // \r found flag + boolean done = false; + boolean foundCr = false; // \r found flag while (!done) { - int linetmpPos = mCurrentOffset; - int bCnt = 0; - while((available-- > 0)){ - final byte c = mCurrentBlock[linetmpPos++]; - if(c == eol){ // found \n - done = true; - break; - } else if(foundCr){ // previous char was \r - --linetmpPos; // current char is not \n so put it back - done = true; - break; - } else if(c == eolCr){ // found \r - foundCr = true; - continue; // no ++bCnt - } - ++bCnt; - } - if(mCurrentOffset < linetmpPos){ - buf.write(mCurrentBlock, mCurrentOffset, bCnt); - mCurrentOffset = linetmpPos; - } - available = available(); - if(available == 0){ - // EOF - done = true; - } - } - return buf.toString(); + int linetmpPos = mCurrentOffset; + int bCnt = 0; + while((available-- > 0)){ + final byte c = mCurrentBlock.mBlock[linetmpPos++]; + if(c == eol){ // found \n + done = true; + break; + } else if(foundCr){ // previous char was \r + --linetmpPos; // current char is not \n so put it back + done = true; + break; + } else if(c == eolCr){ // found \r + foundCr = true; + continue; // no ++bCnt + } + ++bCnt; + } + if(mCurrentOffset < linetmpPos) { + buf.write(mCurrentBlock.mBlock, mCurrentOffset, bCnt); + mCurrentOffset = linetmpPos; + } + available = available(); + if(available == 0) { + // EOF + done = true; + } + } + return buf.toString(); } /** @@ -255,6 +324,7 @@ public String readLine() throws IOException { * @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of * the stream has been reached. */ + @Override public int read(final byte[] buffer, int offset, int length) throws IOException { final int originalLength = length; while (length > 0) { @@ -267,7 +337,7 @@ public int read(final byte[] buffer, int offset, int length) throws IOException break; } final int copyLength = Math.min(length, available); - System.arraycopy(mCurrentBlock, mCurrentOffset, buffer, offset, copyLength); + System.arraycopy(mCurrentBlock.mBlock, mCurrentOffset, buffer, offset, copyLength); mCurrentOffset += copyLength; offset += copyLength; length -= copyLength; @@ -279,40 +349,57 @@ public int read(final byte[] buffer, int offset, int length) throws IOException * Seek to the given position in the file. Note that pos is a special virtual file pointer, * not an actual byte offset. * - * @param pos virtual file pointer + * @param pos virtual file pointer position + * @throws IOException if stream is closed or not a file based stream */ public void seek(final long pos) throws IOException { + // Must be before the mFile == null check because mFile == null for closed files and streams + if (mIsClosed) { + throw new IOException(CANNOT_SEEK_CLOSED_STREAM_MSG); + } + + // Cannot seek on streams that are not file based if (mFile == null) { throw new IOException(CANNOT_SEEK_STREAM_MSG); } + // Decode virtual file pointer - // Upper 48 bits is the byte offset into the compressed stream of a block. - // Lower 16 bits is the byte offset into the uncompressed stream inside the block. + // Upper 48 bits is the byte offset into the compressed stream of a + // block. + // Lower 16 bits is the byte offset into the uncompressed stream inside + // the block. final long compressedOffset = BlockCompressedFilePointerUtil.getBlockAddress(pos); final int uncompressedOffset = BlockCompressedFilePointerUtil.getBlockOffset(pos); final int available; - if (mBlockAddress == compressedOffset && mCurrentBlock != null) { - available = mCurrentBlock.length; + if (mCurrentBlock != null && mCurrentBlock.mBlockAddress == compressedOffset) { + available = mCurrentBlock.mBlock.length; } else { + prepareForSeek(); mFile.seek(compressedOffset); - mBlockAddress = compressedOffset; - mLastBlockLength = 0; - readBlock(); + mStreamOffset = compressedOffset; + mCurrentBlock = nextBlock(getBufferForReuse(mCurrentBlock)); + mCurrentOffset = 0; available = available(); } - if (uncompressedOffset > available || - (uncompressedOffset == available && !eof())) { + if (uncompressedOffset > available || (uncompressedOffset == available && !eof())) { throw new IOException(INVALID_FILE_PTR_MSG + pos + " for " + mFile.getSource()); } mCurrentOffset = uncompressedOffset; } + + /** + * Performs cleanup required before seek is called on the underlying stream + */ + protected void prepareForSeek() { + } private boolean eof() throws IOException { if (mFile.eof()) { return true; } // If the last remaining block is the size of the EMPTY_GZIP_BLOCK, this is the same as being at EOF. - return (mFile.length() - (mBlockAddress + mLastBlockLength) == BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length); + return (mFile.length() - (mCurrentBlock.mBlockAddress + + mCurrentBlock.mBlockCompressedSize) == BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length); } /** @@ -321,12 +408,17 @@ private boolean eof() throws IOException { * the two. */ public long getFilePointer() { - if (mCurrentOffset == mCurrentBlock.length) { - // If current offset is at the end of the current block, file pointer should point + if (mCurrentBlock == null) { + // Haven't read anything yet = at start of stream + return BlockCompressedFilePointerUtil.makeFilePointer(0, 0); + } + if (mCurrentOffset > 0 && mCurrentOffset == mCurrentBlock.mBlock.length) { + // If current offset is at the end of the current block, file + // pointer should point // to the beginning of the next block. - return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress + mLastBlockLength, 0); + return BlockCompressedFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress + mCurrentBlock.mBlockCompressedSize, 0); } - return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress, mCurrentOffset); + return BlockCompressedFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress, mCurrentOffset); } @Override @@ -363,49 +455,100 @@ private static boolean isValidBlockHeader(final byte[] buffer) { } private void readBlock() throws IOException { - + mCurrentBlock = nextBlock(getBufferForReuse(mCurrentBlock)); + mCurrentOffset = 0; + checkAndRethrowDecompressionException(); + } + /** + * Reads and decompresses the next block + * @param bufferAvailableForReuse decompression buffer available for reuse + * @return next block in the decompressed stream + */ + protected DecompressedBlock nextBlock(byte[] bufferAvailableForReuse) { + return processNextBlock(bufferAvailableForReuse); + } + /** + * Rethrows an exception encountered during decompression + * @throws IOException + */ + private void checkAndRethrowDecompressionException() throws IOException { + if (mCurrentBlock.mException != null) { + if (mCurrentBlock.mException instanceof IOException) { + throw (IOException) mCurrentBlock.mException; + } else if (mCurrentBlock.mException instanceof RuntimeException) { + throw (RuntimeException) mCurrentBlock.mException; + } else { + throw new RuntimeException(mCurrentBlock.mException); + } + } + } + + /** + * Attempt to reuse the buffer of the given block + * @param block owning block + * @return null decompressing buffer to reuse, null if no buffer is available + */ + private byte[] getBufferForReuse(DecompressedBlock block) { + if (block == null) return null; + return block.mBlock; + } + + /** + * Decompress the next block from the input stream. When using asynchronous + * IO, this will be called by the background thread. + * @param bufferAvailableForReuse buffer in which to place decompressed block. A null or + * incorrectly sized buffer will result in the buffer being ignored and + * a new buffer allocated for decompression. + * @return next block in input stream + */ + protected DecompressedBlock processNextBlock(byte[] bufferAvailableForReuse) { if (mFileBuffer == null) { mFileBuffer = new byte[BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE]; } - int count = readBytes(mFileBuffer, 0, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH); - if (count == 0) { - // Handle case where there is no empty gzip block at end. - mCurrentOffset = 0; - mBlockAddress += mLastBlockLength; - mCurrentBlock = new byte[0]; - return; - } - if (count != BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH) { - throw new IOException(INCORRECT_HEADER_SIZE_MSG + mFile.getSource()); - } - final int blockLength = unpackInt16(mFileBuffer, BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET) + 1; - if (blockLength < BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) { - throw new IOException(UNEXPECTED_BLOCK_LENGTH_MSG + blockLength + " for " + mFile.getSource()); - } - final int remaining = blockLength - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH; - count = readBytes(mFileBuffer, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, remaining); - if (count != remaining) { - throw new FileTruncatedException(PREMATURE_END_MSG + mFile.getSource()); + long blockAddress = mStreamOffset; + try { + final int headerByteCount = readBytes(mFileBuffer, 0, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH); + mStreamOffset += headerByteCount; + if (headerByteCount == 0) { + // Handle case where there is no empty gzip block at end. + return new DecompressedBlock(blockAddress, new byte[0], 0); + } + if (headerByteCount != BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH) { + return new DecompressedBlock(blockAddress, headerByteCount, new IOException(INCORRECT_HEADER_SIZE_MSG + mFile.getSource())); + } + final int blockLength = unpackInt16(mFileBuffer, BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET) + 1; + if (blockLength < BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) { + return new DecompressedBlock(blockAddress, blockLength, + new IOException(UNEXPECTED_BLOCK_LENGTH_MSG + blockLength + " for " + mFile.getSource())); + } + final int remaining = blockLength - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH; + final int dataByteCount = readBytes(mFileBuffer, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, + remaining); + mStreamOffset += dataByteCount; + if (dataByteCount != remaining) { + return new DecompressedBlock(blockAddress, blockLength, + new FileTruncatedException(PREMATURE_END_MSG + mFile.getSource())); + } + final byte[] decompressed = inflateBlock(mFileBuffer, blockLength, bufferAvailableForReuse); + return new DecompressedBlock(blockAddress, decompressed, blockLength); + } catch (IOException e) { + return new DecompressedBlock(blockAddress, 0, e); } - inflateBlock(mFileBuffer, blockLength); - mCurrentOffset = 0; - mBlockAddress += mLastBlockLength; - mLastBlockLength = blockLength; } - private void inflateBlock(final byte[] compressedBlock, final int compressedLength) throws IOException { - final int uncompressedLength = unpackInt32(compressedBlock, compressedLength-4); - byte[] buffer = mCurrentBlock; - mCurrentBlock = null; - if (buffer == null || buffer.length != uncompressedLength) { - try { - buffer = new byte[uncompressedLength]; - } catch (final NegativeArraySizeException e) { - throw new RuntimeIOException(mFile.getSource() + " has invalid uncompressedLength: " + uncompressedLength, e); - } + private byte[] inflateBlock(final byte[] compressedBlock, final int compressedLength, + final byte[] bufferAvailableForReuse) throws IOException { + final int uncompressedLength = unpackInt32(compressedBlock, compressedLength - 4); + if (uncompressedLength < 0) { + throw new RuntimeIOException(mFile.getSource() + " has invalid uncompressedLength: " + uncompressedLength); + } + byte[] buffer = bufferAvailableForReuse; + if (buffer == null || uncompressedLength != buffer.length) { + // can't reuse the buffer since the size is incorrect + buffer = new byte[uncompressedLength]; } blockGunzipper.unzipBlock(buffer, compressedBlock, compressedLength); - mCurrentBlock = buffer; + return buffer; } private int readBytes(final byte[] buffer, final int offset, final int length) throws IOException { @@ -456,41 +599,98 @@ private int unpackInt32(final byte[] buffer, final int offset) { public enum FileTermination {HAS_TERMINATOR_BLOCK, HAS_HEALTHY_LAST_BLOCK, DEFECTIVE} + /** + * + * @param file the file to check + * @return status of the last compressed block + * @throws IOException + */ public static FileTermination checkTermination(final File file) throws IOException { - final long fileSize = file.length(); + return checkTermination(file == null ? null : file.toPath()); + } + + /** + * + * @param path to the file to check + * @return status of the last compressed block + * @throws IOException + */ + public static FileTermination checkTermination(final Path path) throws IOException { + try( final SeekableByteChannel channel = Files.newByteChannel(path, StandardOpenOption.READ) ){ + return checkTermination(channel); + } + } + + /** + * check the status of the final bzgipped block for the given bgzipped resource + * + * @param channel an open channel to read from, + * the channel will remain open and the initial position will be restored when the operation completes + * this makes no guarantee about the state of the channel if an exception is thrown during reading + * + * @return the status of the last compressed black + * @throws IOException + */ + public static FileTermination checkTermination(SeekableByteChannel channel) throws IOException { + final long fileSize = channel.size(); if (fileSize < BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length) { return FileTermination.DEFECTIVE; } - final RandomAccessFile raFile = new RandomAccessFile(file, "r"); + final long initialPosition = channel.position(); + boolean exceptionThrown = false; try { - raFile.seek(fileSize - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length); - byte[] buf = new byte[BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length]; - raFile.readFully(buf); - if (Arrays.equals(buf, BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK)) { + channel.position(fileSize - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length); + + //Check if the end of the file is an empty gzip block which is used as the terminator for a bgzipped file + final ByteBuffer lastBlockBuffer = ByteBuffer.allocate(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length); + readFully(channel, lastBlockBuffer); + if (Arrays.equals(lastBlockBuffer.array(), BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK)) { return FileTermination.HAS_TERMINATOR_BLOCK; } - final int bufsize = (int)Math.min(fileSize, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE); - buf = new byte[bufsize]; - raFile.seek(fileSize - bufsize); - raFile.read(buf); - for (int i = buf.length - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length; - i >= 0; --i) { + + //if the last block isn't an empty gzip block, check to see if it is a healthy compressed block or if it's corrupted + final int bufsize = (int) Math.min(fileSize, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE); + final byte[] bufferArray = new byte[bufsize]; + channel.position(fileSize - bufsize); + readFully(channel, ByteBuffer.wrap(bufferArray)); + for (int i = bufferArray.length - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length; + i >= 0; --i) { if (!preambleEqual(BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE, - buf, i, BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length)) { + bufferArray, i, BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length)) { continue; } - final ByteBuffer byteBuffer = ByteBuffer.wrap(buf, i + BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length, 4); + final ByteBuffer byteBuffer = ByteBuffer.wrap(bufferArray, + i + BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length, + 4); byteBuffer.order(ByteOrder.LITTLE_ENDIAN); - final int totalBlockSizeMinusOne = byteBuffer.getShort() & 0xFFFF; - if (buf.length - i == totalBlockSizeMinusOne + 1) { + final int totalBlockSizeMinusOne = byteBuffer.getShort() & 0xFFFF; + if (bufferArray.length - i == totalBlockSizeMinusOne + 1) { return FileTermination.HAS_HEALTHY_LAST_BLOCK; } else { return FileTermination.DEFECTIVE; } } return FileTermination.DEFECTIVE; + } catch (final Throwable e) { + exceptionThrown = true; + throw e; } finally { - raFile.close(); + //if an exception was thrown we don't want to reset the position because that would be likely to throw again + //and suppress the initial exception + if(!exceptionThrown) { + channel.position(initialPosition); + } + } + } + + /** + * read as many bytes as dst's capacity into dst or throw if that's not possible + * @throws EOFException if channel has fewer bytes available than dst's capacity + */ + static void readFully(SeekableByteChannel channel, ByteBuffer dst) throws IOException { + final int bytesRead = channel.read(dst); + if (bytesRead < dst.capacity()){ + throw new EOFException(); } } @@ -508,6 +708,38 @@ private static boolean preambleEqual(final byte[] preamble, final byte[] buf, fi } return true; } -} + protected static class DecompressedBlock { + /** + * Decompressed block + */ + private final byte[] mBlock; + /** + * Compressed size of block (the uncompressed size can be found using + * mBlock.length) + */ + private final int mBlockCompressedSize; + /** + * Stream offset of start of block + */ + private final long mBlockAddress; + /** + * Exception thrown (if any) when attempting to decompress block + */ + private final Exception mException; + + public DecompressedBlock(long blockAddress, byte[] block, int compressedSize) { + mBlock = block; + mBlockAddress = blockAddress; + mBlockCompressedSize = compressedSize; + mException = null; + } + public DecompressedBlock(long blockAddress, int compressedSize, Exception exception) { + mBlock = new byte[0]; + mBlockAddress = blockAddress; + mBlockCompressedSize = compressedSize; + mException = exception; + } + } +} diff --git a/src/main/java/htsjdk/samtools/util/BlockCompressedOutputStream.java b/src/main/java/htsjdk/samtools/util/BlockCompressedOutputStream.java index 408282f1f..a1fc6c80a 100644 --- a/src/main/java/htsjdk/samtools/util/BlockCompressedOutputStream.java +++ b/src/main/java/htsjdk/samtools/util/BlockCompressedOutputStream.java @@ -28,6 +28,7 @@ import java.io.File; import java.io.IOException; import java.io.OutputStream; +import java.nio.file.Files; import java.util.zip.CRC32; import java.util.zip.Deflater; @@ -282,7 +283,7 @@ public void close() throws IOException { codec.writeBytes(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK); codec.close(); // Can't re-open something that is not a regular file, e.g. a named pipe or an output stream - if (this.file == null || !this.file.isFile()) return; + if (this.file == null || !this.file.isFile() || !Files.isRegularFile(this.file.toPath())) return; if (BlockCompressedInputStream.checkTermination(this.file) != BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) { throw new IOException("Terminator block not found after closing BGZF file " + this.file); @@ -296,6 +297,7 @@ public void close() throws IOException { * @param bite * @throws IOException */ + @Override public void write(final int bite) throws IOException { singleByteArray[0] = (byte)bite; write(singleByteArray); diff --git a/src/main/java/htsjdk/samtools/util/BlockGunzipper.java b/src/main/java/htsjdk/samtools/util/BlockGunzipper.java index 18e9285d5..bf763ba08 100644 --- a/src/main/java/htsjdk/samtools/util/BlockGunzipper.java +++ b/src/main/java/htsjdk/samtools/util/BlockGunzipper.java @@ -24,6 +24,7 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMFormatException; +import htsjdk.samtools.util.zip.InflaterFactory; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -43,10 +44,42 @@ * @author alecw@broadinstitute.org */ public class BlockGunzipper { - private final Inflater inflater = new Inflater(true); // GZIP mode + private static InflaterFactory defaultInflaterFactory = new InflaterFactory(); + private final Inflater inflater; private final CRC32 crc32 = new CRC32(); private boolean checkCrcs = false; + /** + * Create a BlockGunzipper using the default inflaterFactory + */ + BlockGunzipper() { + inflater = defaultInflaterFactory.makeInflater(true); // GZIP mode + } + + /** + * Create a BlockGunzipper using the provided inflaterFactory + * @param inflaterFactory + */ + BlockGunzipper(InflaterFactory inflaterFactory) { + inflater = inflaterFactory.makeInflater(true); // GZIP mode + } + + /** + * Sets the default {@link InflaterFactory} that will be used for all instances unless specified otherwise in the constructor. + * If this method is not called the default is a factory that will create the JDK {@link Inflater}. + * @param inflaterFactory non-null default factory. + */ + public static void setDefaultInflaterFactory(final InflaterFactory inflaterFactory) { + if (inflaterFactory == null) { + throw new IllegalArgumentException("null inflaterFactory"); + } + defaultInflaterFactory = inflaterFactory; + } + + public static InflaterFactory getDefaultInflaterFactory() { + return defaultInflaterFactory; + } + /** Allows the caller to decide whether or not to check CRCs on when uncompressing blocks. */ public void setCheckCrcs(final boolean check) { this.checkCrcs = check; diff --git a/src/main/java/htsjdk/samtools/util/BufferedLineReader.java b/src/main/java/htsjdk/samtools/util/BufferedLineReader.java index de1115dc4..18a4d05c0 100644 --- a/src/main/java/htsjdk/samtools/util/BufferedLineReader.java +++ b/src/main/java/htsjdk/samtools/util/BufferedLineReader.java @@ -59,6 +59,7 @@ public BufferedLineReader(final InputStream is, final int bufferSize) { * * @return the line read, or null if EOF has been reached. */ + @Override public String readLine() { ++lineNumber; try { @@ -78,6 +79,7 @@ public String readLine() { /** * @return 1-based number of line most recently read */ + @Override public int getLineNumber() { return lineNumber; } @@ -87,6 +89,7 @@ public int getLineNumber() { * * @return If not eof, the next character that would be read. If eof, -1. */ + @Override public int peek() { if (peekedLine == null) { try { @@ -104,6 +107,7 @@ public int peek() { return peekedLine.charAt(0); } + @Override public void close() { peekedLine = null; try { diff --git a/src/main/java/htsjdk/samtools/util/CloseableIterator.java b/src/main/java/htsjdk/samtools/util/CloseableIterator.java index d26443e0c..fa657be22 100755 --- a/src/main/java/htsjdk/samtools/util/CloseableIterator.java +++ b/src/main/java/htsjdk/samtools/util/CloseableIterator.java @@ -45,6 +45,7 @@ */ public interface CloseableIterator extends Iterator, Closeable { /** Should be implemented to close/release any underlying resources. */ + @Override void close(); /** Consumes the contents of the iterator and returns it as a List. */ diff --git a/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java b/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java index cb3652ed5..16f99501c 100644 --- a/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java +++ b/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java @@ -11,13 +11,13 @@ * @author Tim Fennell */ public class CustomGzipOutputStream extends GZIPOutputStream { - CustomGzipOutputStream(final OutputStream outputStream, final int bufferSize, final int compressionLevel) throws + public CustomGzipOutputStream(final OutputStream outputStream, final int bufferSize, final int compressionLevel) throws IOException { super(outputStream, bufferSize); this.def.setLevel(compressionLevel); } - CustomGzipOutputStream(final OutputStream outputStream, final int compressionLevel) throws IOException { + public CustomGzipOutputStream(final OutputStream outputStream, final int compressionLevel) throws IOException { super(outputStream); this.def.setLevel(compressionLevel); } diff --git a/src/main/java/htsjdk/samtools/util/DateParser.java b/src/main/java/htsjdk/samtools/util/DateParser.java index 02a960986..f2d9481c7 100644 --- a/src/main/java/htsjdk/samtools/util/DateParser.java +++ b/src/main/java/htsjdk/samtools/util/DateParser.java @@ -277,47 +277,6 @@ public static String getIsoDate(Date date) { .append("Z").toString(); } - public static void test(String isodate) { - System.out.println("----------------------------------"); - try { - Date date = parse(isodate); - System.out.println(">> "+isodate); - System.out.println(">> "+date.toString()+" ["+date.getTime()+"]"); - System.out.println(">> "+getIsoDate(date)); - } catch (InvalidDateException ex) { - System.err.println(isodate+" is invalid"); - System.err.println(ex.getMessage()); - } - System.out.println("----------------------------------"); - } - - public static void test(Date date) { - String isodate = null; - System.out.println("----------------------------------"); - try { - System.out.println(">> "+date.toString()+" ["+date.getTime()+"]"); - isodate = getIsoDate(date); - System.out.println(">> "+isodate); - date = parse(isodate); - System.out.println(">> "+date.toString()+" ["+date.getTime()+"]"); - } catch (InvalidDateException ex) { - System.err.println(isodate+" is invalid"); - System.err.println(ex.getMessage()); - } - System.out.println("----------------------------------"); - } - - public static void main(String args[]) { - test("1997-07-16T19:20:30.45-02:00"); - test("1997-07-16T19:20:30+01:00"); - test("1997-07-16T19:20:30+01:00"); - test("1997-07-16T19:20"); - test("1997-07-16"); - test("1997-07"); - test("1997"); - test(new Date()); - } - public static class InvalidDateException extends SAMException { public InvalidDateException() { } diff --git a/src/main/java/htsjdk/samtools/util/DelegatingIterator.java b/src/main/java/htsjdk/samtools/util/DelegatingIterator.java index 054352bac..9d5174a93 100644 --- a/src/main/java/htsjdk/samtools/util/DelegatingIterator.java +++ b/src/main/java/htsjdk/samtools/util/DelegatingIterator.java @@ -15,20 +15,24 @@ public DelegatingIterator(final Iterator iterator) { this.iterator = iterator; } + @Override public void close() { if (iterator instanceof CloseableIterator) { ((CloseableIterator) this.iterator).close(); } } + @Override public boolean hasNext() { return this.iterator.hasNext(); } + @Override public T next() { return this.iterator.next(); } + @Override public void remove() { this.iterator.remove(); } diff --git a/src/main/java/htsjdk/samtools/util/DiskBackedQueue.java b/src/main/java/htsjdk/samtools/util/DiskBackedQueue.java index bbf38188b..22fca1138 100644 --- a/src/main/java/htsjdk/samtools/util/DiskBackedQueue.java +++ b/src/main/java/htsjdk/samtools/util/DiskBackedQueue.java @@ -130,6 +130,7 @@ public boolean headRecordIsFromDisk() { * @return true (if add successful) * @throws IllegalStateException if the queue cannot be added to */ + @Override public boolean add(final E record) throws IllegalStateException { if (!canAdd) throw new IllegalStateException("Cannot add to DiskBackedQueue whose canAdd() method returns false"); @@ -192,6 +193,7 @@ public E peek() { /** * Return the total number of elements in the queue, both in memory and on disk */ + @Override public int size() { return (this.headRecord == null) ? 0 : (1 + this.ramRecords.size() + this.numRecordsOnDisk); } @@ -238,6 +240,7 @@ public void clear() { * * @throws Throwable */ + @Override protected void finalize() throws Throwable { this.closeIOResources(); super.finalize(); // NB: intellij wanted me to do this. Need I? I'm not extending anything diff --git a/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java b/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java index b83a169e6..df282b00f 100644 --- a/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java +++ b/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java @@ -56,6 +56,8 @@ private EdgingRecordAndOffset(SAMRecord record, int offset) { public abstract byte getBaseQuality(int position); + public abstract int getRefPos(); + public static EdgingRecordAndOffset createBeginRecord(SAMRecord record, int offset, int length, int refPos) { return new StartEdgingRecordAndOffset(record, offset, length, refPos); } @@ -103,6 +105,7 @@ protected StartEdgingRecordAndOffset(SAMRecord record, int offset, int length, i * @param position in the reference * @return base quality of a read base, corresponding to a given reference position */ + @Override public byte getBaseQuality(int position) { int rOffset = getRelativeOffset(position); byte[] baseQualities = record.getBaseQualities(); @@ -174,6 +177,7 @@ private int getRelativeOffset(int position) { * @param position in the reference * @return base quality of a read base, corresponding to a given reference position */ + @Override public byte getBaseQuality(int position) { return start.getBaseQuality(position); } diff --git a/src/main/java/htsjdk/samtools/util/FastLineReader.java b/src/main/java/htsjdk/samtools/util/FastLineReader.java index d802fad22..95d620267 100644 --- a/src/main/java/htsjdk/samtools/util/FastLineReader.java +++ b/src/main/java/htsjdk/samtools/util/FastLineReader.java @@ -79,6 +79,7 @@ public boolean skipNewlines() { return sawEoln; } + @Override public void close() { CloserUtil.close(in); in = null; diff --git a/src/main/java/htsjdk/samtools/util/FileAppendStreamLRUCache.java b/src/main/java/htsjdk/samtools/util/FileAppendStreamLRUCache.java index bc8bc01cd..500b93182 100644 --- a/src/main/java/htsjdk/samtools/util/FileAppendStreamLRUCache.java +++ b/src/main/java/htsjdk/samtools/util/FileAppendStreamLRUCache.java @@ -47,6 +47,7 @@ public FileAppendStreamLRUCache(final int cacheSize) { } private static class Functor implements ResourceLimitedMapFunctor { + @Override public OutputStream makeValue(final File file) { try { return IOUtil.maybeBufferOutputStream(new FileOutputStream(file, true)); @@ -65,6 +66,7 @@ public OutputStream makeValue(final File file) { } } + @Override public void finalizeValue(final File file, final OutputStream out) { try { out.flush(); diff --git a/src/main/java/htsjdk/samtools/util/IOUtil.java b/src/main/java/htsjdk/samtools/util/IOUtil.java index 07ae9006a..8e06c039d 100644 --- a/src/main/java/htsjdk/samtools/util/IOUtil.java +++ b/src/main/java/htsjdk/samtools/util/IOUtil.java @@ -48,14 +48,20 @@ import java.io.Reader; import java.io.Writer; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.nio.charset.Charset; +import java.nio.file.FileSystemNotFoundException; +import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Scanner; @@ -80,8 +86,11 @@ public static final long TWO_GBS = 2 * ONE_GB; public static final long FIVE_GBS = 5 * ONE_GB; + public static final String VCF_FILE_EXTENSION = ".vcf"; + public static final String BCF_FILE_EXTENSION = ".bcf"; + public static final String COMPRESSED_VCF_FILE_EXTENSION = ".vcf.gz"; /** Possible extensions for VCF files and related formats. */ - public static final String[] VCF_EXTENSIONS = new String[] {".vcf", ".vcf.gz", ".bcf"}; + public static final String[] VCF_EXTENSIONS = {VCF_FILE_EXTENSION, COMPRESSED_VCF_FILE_EXTENSION, BCF_FILE_EXTENSION}; public static final String INTERVAL_LIST_FILE_EXTENSION = IntervalList.INTERVAL_LIST_FILE_EXTENSION; @@ -360,7 +369,7 @@ public static void assertFilesAreReadable(final List files) { * and if it is a file then not a directory and is readable. If any * condition is false then a runtime exception is thrown. * - * @param files the list of files to check for readability + * @param inputs the list of files to check for readability */ public static void assertInputsAreValid(final List inputs) { for (final String input : inputs) assertInputIsValid(input); @@ -459,7 +468,7 @@ else if (!dir.canRead()) { public static void assertFilesEqual(final File f1, final File f2) { try { if (f1.length() != f2.length()) { - throw new SAMException("Files " + f1 + " and " + f2 + " are different lengths."); + throw new SAMException("File " + f1 + " is " + f1.length() + " bytes but file " + f2 + " is " + f2.length() + " bytes."); } final FileInputStream s1 = new FileInputStream(f1); final FileInputStream s2 = new FileInputStream(f2); @@ -690,6 +699,7 @@ public static void copyFile(final File input, final File output) { public static File[] getFilesMatchingRegexp(final File directory, final Pattern regexp) { return directory.listFiles( new FilenameFilter() { + @Override public boolean accept(final File dir, final String name) { return regexp.matcher(name).matches(); } @@ -771,9 +781,14 @@ public static File createTempDir(final String prefix, final String suffix) { /** Checks that a file exists and is readable, and then returns a buffered reader for it. */ public static BufferedReader openFileForBufferedReading(final File file) { - return new BufferedReader(new InputStreamReader(openFileForReading(file)), Defaults.NON_ZERO_BUFFER_SIZE); + return openFileForBufferedReading(file.toPath()); } + /** Checks that a path exists and is readable, and then returns a buffered reader for it. */ + public static BufferedReader openFileForBufferedReading(final Path path) { + return new BufferedReader(new InputStreamReader(openFileForReading(path)), Defaults.NON_ZERO_BUFFER_SIZE); + } + /** Takes a string and replaces any characters that are not safe for filenames with an underscore */ public static String makeFileNameSafe(final String str) { return str.trim().replaceAll("[\\s!\"#$%&'()*/:;<=>?@\\[\\]\\\\^`{|}~]", "_"); @@ -943,4 +958,42 @@ public static String slurp(final InputStream is, final Charset charSet) { return output; } + + /** + * Check if the given URI has a scheme. + * + * @param uriString the URI to check + * @return true if the given URI has a scheme, false if + * not, or if the URI is malformed. + */ + public static boolean hasScheme(String uriString) { + try { + return new URI(uriString).getScheme() != null; + } catch (URISyntaxException e) { + return false; + } + } + + /** + * Converts the given URI to a {@link Path} object. If the filesystem cannot be found in the usual way, then attempt + * to load the filesystem provider using the thread context classloader. This is needed when the filesystem + * provider is loaded using a URL classloader (e.g. in spark-submit). + * + * @param uriString the URI to convert + * @return the resulting {@code Path} + * @throws IOException an I/O error occurs creating the file system + */ + public static Path getPath(String uriString) throws IOException { + URI uri = URI.create(uriString); + try { + // if the URI has no scheme, then treat as a local file, otherwise use the scheme to determine the filesystem to use + return uri.getScheme() == null ? Paths.get(uriString) : Paths.get(uri); + } catch (FileSystemNotFoundException e) { + ClassLoader cl = Thread.currentThread().getContextClassLoader(); + if (cl == null) { + throw e; + } + return FileSystems.newFileSystem(uri, new HashMap<>(), cl).provider().getPath(uri); + } + } } diff --git a/src/main/java/htsjdk/samtools/util/Interval.java b/src/main/java/htsjdk/samtools/util/Interval.java index 779bb25c9..51e91270a 100644 --- a/src/main/java/htsjdk/samtools/util/Interval.java +++ b/src/main/java/htsjdk/samtools/util/Interval.java @@ -141,6 +141,7 @@ public static long countBases(final Collection intervals) { * Sort based on sequence.compareTo, then start pos, then end pos * with null objects coming lexically last */ + @Override public int compareTo(final Interval that) { if (that == null) return -1; // nulls last diff --git a/src/main/java/htsjdk/samtools/util/IntervalList.java b/src/main/java/htsjdk/samtools/util/IntervalList.java index 76cb5084c..26403c512 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalList.java +++ b/src/main/java/htsjdk/samtools/util/IntervalList.java @@ -34,6 +34,7 @@ import java.io.File; import java.io.IOException; import java.io.Serializable; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -84,6 +85,7 @@ public IntervalList(final SAMSequenceDictionary dict) { public SAMFileHeader getHeader() { return header; } /** Returns an iterator over the intervals. */ + @Override public Iterator iterator() { return this.intervals.iterator(); } /** Adds an interval to the list of intervals. */ @@ -355,11 +357,9 @@ static Interval merge(final SortedSet intervals, final boolean concate end = Math.max(end, i.getEnd()); } - if (concatenateNames) { - if (names.isEmpty()) name = null; - else name = StringUtil.join("|", names); - } - else { name = names.iterator().next(); } + if (names.isEmpty()) name = null; + else if (concatenateNames) name = StringUtil.join("|", names); + else name = names.iterator().next(); return new Interval(chrom, start, end, neg, name); } @@ -396,12 +396,21 @@ public static IntervalList copyOf(final IntervalList list){ * @return an IntervalList object that contains the headers and intervals from the file */ public static IntervalList fromFile(final File file) { - final BufferedReader reader= IOUtil.openFileForBufferedReading(file); + return fromPath(file.toPath()); + } + + /** + * Parses an interval list from a path. + * @param path the path containing the intervals + * @return an IntervalList object that contains the headers and intervals from the path + */ + public static IntervalList fromPath(final Path path) { + final BufferedReader reader = IOUtil.openFileForBufferedReading(path); final IntervalList list = fromReader(reader); try { reader.close(); } catch (final IOException e) { - throw new SAMException(String.format("Failed to close file %s after reading",file)); + throw new SAMException(String.format("Failed to close file %s after reading", path.toUri().toString())); } return list; @@ -730,6 +739,64 @@ public static IntervalList difference(final Collection lists1, fin subtract(lists2, lists1)); } + /** + * A utility function for finding the intervals in the first list that have at least 1bp overlap with any interval + * in the second list. + * + * @param lhs the first collection of IntervalLists + * @param lhs the second collection of IntervalLists + * @return an IntervalList comprising of all intervals in the first IntervalList that have at least 1bp overlap with + * any interval in the second. + */ + public static IntervalList overlaps(final IntervalList lhs, final IntervalList rhs) { + return overlaps(Collections.singletonList(lhs), Collections.singletonList(rhs)); + } + + /** + * A utility function for finding the intervals in the first list that have at least 1bp overlap with any interval + * in the second list. + * + * @param lists1 the first collection of IntervalLists + * @param lists2 the second collection of IntervalLists + * @return an IntervalList comprising of all intervals in the first collection of lists that have at least 1bp + * overlap with any interval in the second lists. + */ + public static IntervalList overlaps(final Collection lists1, final Collection lists2) { + if(lists1.isEmpty()){ + throw new SAMException("Cannot call overlaps with the first collection having empty list of IntervalLists."); + } + + final SAMFileHeader header = lists1.iterator().next().getHeader().clone(); + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + + // Create an overlap detector on list2 + final IntervalList overlapIntervals = new IntervalList(header); + for (final IntervalList list : lists2) { + SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), + list.getHeader().getSequenceDictionary()); + overlapIntervals.addall(list.getIntervals()); + } + final OverlapDetector detector = new OverlapDetector<>(0, 0); + final int dummy = -1; // NB: since we don't actually use the returned objects, we can use a dummy value + for (final Interval interval : overlapIntervals.sorted().uniqued()) { + detector.addLhs(dummy, interval); + } + + // Go through each input interval in in lists1 and see if overlaps any interval in lists2 + final IntervalList merged = new IntervalList(header); + for (final IntervalList list : lists1) { + SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), + list.getHeader().getSequenceDictionary()); + for (final Interval interval : list.getIntervals()) { + if (detector.overlapsAny(interval)) { + merged.add(interval); + } + } + } + + return merged; + } + @Override public boolean equals(final Object o) { if (this == o) return true; @@ -762,6 +829,7 @@ public int hashCode() { this.header = header; } + @Override public int compare(final Interval lhs, final Interval rhs) { final int lhsIndex = this.header.getSequenceIndex(lhs.getContig()); final int rhsIndex = this.header.getSequenceIndex(rhs.getContig()); diff --git a/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java b/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java index 1ddd164c6..08c2dd5e1 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java +++ b/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java @@ -66,6 +66,7 @@ public IntervalListReferenceSequenceMask(final IntervalList intervalList) { * * @return true if the mask is set for the given sequence and position */ + @Override public boolean get(final int sequenceIndex, final int position) { ensureSequenceLoaded(sequenceIndex); return currentBitSet.get(position); @@ -76,6 +77,7 @@ public boolean get(final int sequenceIndex, final int position) { * * @return the next pos on the given sequence >= position that is set, or -1 if there are no more set positions */ + @Override public int nextPosition(final int sequenceIndex, final int position) { ensureSequenceLoaded(sequenceIndex); // nextSetBit returns the first set bit on or after the starting index, therefore position+1 @@ -108,6 +110,7 @@ private void ensureSequenceLoaded(final int sequenceIndex) { /** * @return Largest sequence index for which there are set bits. */ + @Override public int getMaxSequenceIndex() { return lastSequenceIndex; } @@ -115,6 +118,7 @@ public int getMaxSequenceIndex() { /** * @return the largest position on the last sequence index */ + @Override public int getMaxPosition() { return lastPosition; } diff --git a/src/main/java/htsjdk/samtools/util/IntervalTree.java b/src/main/java/htsjdk/samtools/util/IntervalTree.java index 49c3017e1..3efc4dfbb 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalTree.java +++ b/src/main/java/htsjdk/samtools/util/IntervalTree.java @@ -340,6 +340,7 @@ public int getIndex( final int start, final int end ) * Return an iterator over the entire tree. * @return An iterator. */ + @Override public Iterator> iterator() { return new FwdIterator(min()); @@ -482,7 +483,7 @@ public int getEnd() public int getLength() { - return mEnd - mStart; + return mEnd - mStart + 1 ; } public int getRelationship( final Node interval ) @@ -492,14 +493,14 @@ public int getRelationship( final Node interval ) result = HAS_LESSER_PART; if ( mEnd > interval.getEnd() ) result |= HAS_GREATER_PART; - if ( mStart < interval.getEnd() && interval.getStart() < mEnd ) + if ( mStart <= interval.getEnd() && interval.getStart() <= mEnd ) result |= HAS_OVERLAPPING_PART; return result; } public boolean isAdjacent( final Node interval ) { - return mStart == interval.getEnd() || mEnd == interval.getStart(); + return mStart == interval.getEnd() + 1 || mEnd + 1 == interval.getStart(); } public V1 getValue() @@ -1069,11 +1070,13 @@ public FwdIterator( final Node node ) mNext = node; } + @Override public boolean hasNext() { return mNext != null; } + @Override public Node next() { if ( mNext == null ) @@ -1092,6 +1095,7 @@ public boolean hasNext() return mLast; } + @Override public void remove() { if ( mLast == null ) @@ -1115,11 +1119,13 @@ public RevIterator( final Node node ) mNext = node; } + @Override public boolean hasNext() { return mNext != null; } + @Override public Node next() { if ( mNext == null ) @@ -1135,6 +1141,7 @@ public boolean hasNext() return mLast; } + @Override public void remove() { if ( mLast == null ) @@ -1160,11 +1167,13 @@ public OverlapIterator( final int start, final int end ) mEnd = end; } + @Override public boolean hasNext() { return mNext != null; } + @Override public Node next() { if ( mNext == null ) @@ -1182,6 +1191,7 @@ public boolean hasNext() return mLast; } + @Override public void remove() { if ( mLast == null ) @@ -1207,16 +1217,19 @@ public ValuesIterator( final Iterator> itr ) mItr = itr; } + @Override public boolean hasNext() { return mItr.hasNext(); } + @Override public V1 next() { return mItr.next().getValue(); } + @Override public void remove() { mItr.remove(); diff --git a/src/main/java/htsjdk/samtools/util/IntervalTreeMap.java b/src/main/java/htsjdk/samtools/util/IntervalTreeMap.java index ffeae9439..ebec2f484 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalTreeMap.java +++ b/src/main/java/htsjdk/samtools/util/IntervalTreeMap.java @@ -60,10 +60,12 @@ public IntervalTreeMap(final Map map) { } } + @Override public void clear() { mSequenceMap.clear(); } + @Override public boolean containsKey(final Object object) { if (!(object instanceof Interval)) { return false; @@ -79,6 +81,7 @@ public boolean containsKey(final Interval key) { return (tree.find(key.getStart(), key.getEnd()) != null); } + @Override public Set> entrySet() { return mEntrySet; } @@ -95,6 +98,7 @@ public int hashCode() { return mSequenceMap.hashCode(); } + @Override public T get(final Object object) { if (!(object instanceof Interval)) { return null; @@ -114,6 +118,7 @@ public T get(final Interval key) { return node.getValue(); } + @Override public boolean isEmpty() { for (final IntervalTree tree : mSequenceMap.values()) { if (tree.size() > 0) { @@ -123,6 +128,7 @@ public boolean isEmpty() { return true; } + @Override public T put(final Interval key, final T value) { IntervalTree tree = mSequenceMap.get(key.getContig()); if (tree == null) { @@ -132,6 +138,7 @@ public T put(final Interval key, final T value) { return tree.put(key.getStart(), key.getEnd(), value); } + @Override public T remove(final Object object) { if (!(object instanceof Interval)) { return null; @@ -147,6 +154,7 @@ public T remove(final Interval key) { return tree.remove(key.getStart(), key.getEnd()); } + @Override public int size() { // Note: We should think about caching the size to avoid having to recompute it. int size = 0; @@ -157,16 +165,16 @@ public int size() { } /** * Test overlapping interval - * @param key the interval + * @param key the Locatable * @return true if it contains an object overlapping the interval */ - public boolean containsOverlapping(final Interval key) { + public boolean containsOverlapping(final Locatable key) { final IntervalTree tree = mSequenceMap.get(key.getContig()); return tree!=null && tree.overlappers(key.getStart(), key.getEnd()).hasNext(); } - public Collection getOverlapping(final Interval key) { + public Collection getOverlapping(final Locatable key) { final List result = new ArrayList(); final IntervalTree tree = mSequenceMap.get(key.getContig()); if (tree != null) { @@ -179,10 +187,10 @@ public boolean containsOverlapping(final Interval key) { } /** * Test if this contains an object that is contained by 'key' - * @param key the interval + * @param key the Locatable * @return true if it contains an object is contained by 'key' */ - public boolean containsContained(final Interval key) { + public boolean containsContained(final Locatable key) { final IntervalTree tree = mSequenceMap.get(key.getContig()); if(tree==null) return false; final Iterator> iterator = tree.overlappers(key.getStart(), key.getEnd()); @@ -196,7 +204,7 @@ public boolean containsContained(final Interval key) { } - public Collection getContained(final Interval key) { + public Collection getContained(final Locatable key) { final List result = new ArrayList(); final IntervalTree tree = mSequenceMap.get(key.getContig()); if (tree != null) { @@ -214,6 +222,7 @@ public boolean containsContained(final Interval key) { private class EntrySet extends AbstractSet> { + @Override public void clear() { IntervalTreeMap.this.clear(); } @@ -225,14 +234,17 @@ public boolean contains(final Map.Entry entry) { return entry.getValue().equals(IntervalTreeMap.this.get(entry.getKey())); } + @Override public boolean isEmpty() { return IntervalTreeMap.this.isEmpty(); } + @Override public Iterator> iterator() { return new EntryIterator(); } + @Override @SuppressWarnings("unchecked") public boolean remove(final Object object) { // Note: Could not figure out how to eliminate the unchecked cast. @@ -251,6 +263,7 @@ public boolean remove(final Map.Entry entry) { } } + @Override public int size() { return IntervalTreeMap.this.size(); } @@ -268,10 +281,12 @@ public int size() { advanceSequence(); } + @Override public boolean hasNext() { return (mTreeIterator != null && mTreeIterator.hasNext()); } + @Override public Map.Entry next() { if (!hasNext()) { throw new NoSuchElementException("Iterator exhausted"); @@ -286,6 +301,7 @@ public boolean hasNext() { return new MapEntry(key, value); } + @Override public void remove() { if (mTreeIterator == null) { throw new IllegalStateException("Iterator.next() has not been called"); @@ -315,14 +331,17 @@ private void advanceSequence() { mValue = value; } + @Override public Interval getKey() { return mKey; } + @Override public T getValue() { return mValue; } + @Override public T setValue(final T value) { mValue = value; return IntervalTreeMap.this.put(mKey, mValue); diff --git a/src/main/java/htsjdk/samtools/util/Iso8601Date.java b/src/main/java/htsjdk/samtools/util/Iso8601Date.java index 912886724..e173bd385 100644 --- a/src/main/java/htsjdk/samtools/util/Iso8601Date.java +++ b/src/main/java/htsjdk/samtools/util/Iso8601Date.java @@ -36,6 +36,7 @@ */ public class Iso8601Date extends Date { private static final ThreadLocal iso8601DateFormatter = new ThreadLocal() { + @Override protected synchronized DateFormat initialValue() { return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"); } diff --git a/src/main/java/htsjdk/samtools/util/Lazy.java b/src/main/java/htsjdk/samtools/util/Lazy.java index 13726b878..fca53a6a2 100644 --- a/src/main/java/htsjdk/samtools/util/Lazy.java +++ b/src/main/java/htsjdk/samtools/util/Lazy.java @@ -1,5 +1,7 @@ package htsjdk.samtools.util; +import java.util.function.Supplier; + /** * Simple utility for building an on-demand (lazy) object-initializer. * @@ -9,29 +11,36 @@ * @author mccowan */ public class Lazy { - private final LazyInitializer initializer; + private final Supplier initializer; private boolean isInitialized = false; private T instance; - /** Simple cons */ - public Lazy(final LazyInitializer initializer) { + public Lazy(final Supplier initializer) { this.initializer = initializer; } /** Returns the instance associated with this {@link Lazy}, initializing it if necessary. */ public synchronized T get() { if (!isInitialized) { - this.instance = initializer.make(); + this.instance = initializer.get(); isInitialized = true; } return instance; } - /** Describes how to build the instance of the lazy object. */ + /** Describes how to build the instance of the lazy object. + * @deprecated since 1/2017 use a {@link Supplier} instead + * */ @FunctionalInterface - public interface LazyInitializer { + @Deprecated + public interface LazyInitializer extends Supplier { /** Returns the desired object instance. */ T make(); + + @Override + default T get(){ + return make(); + } } public boolean isInitialized() { diff --git a/src/main/java/htsjdk/samtools/util/LineReader.java b/src/main/java/htsjdk/samtools/util/LineReader.java index 018570083..4a07f15b8 100644 --- a/src/main/java/htsjdk/samtools/util/LineReader.java +++ b/src/main/java/htsjdk/samtools/util/LineReader.java @@ -47,5 +47,6 @@ */ int peek(); + @Override public void close(); } diff --git a/src/main/java/htsjdk/samtools/util/LocusComparator.java b/src/main/java/htsjdk/samtools/util/LocusComparator.java index e0f04d922..efbe09f26 100644 --- a/src/main/java/htsjdk/samtools/util/LocusComparator.java +++ b/src/main/java/htsjdk/samtools/util/LocusComparator.java @@ -34,6 +34,7 @@ public class LocusComparator implements Comparator, Serializable { private static final long serialVersionUID = 1L; + @Override public int compare(T thing1, T thing2) { int refCompare = thing1.getSequenceIndex() - thing2.getSequenceIndex(); return refCompare == 0 ? thing1.getPosition() - thing2.getPosition() : refCompare; diff --git a/src/main/java/htsjdk/samtools/util/LocusImpl.java b/src/main/java/htsjdk/samtools/util/LocusImpl.java index 862907854..5986a6c94 100644 --- a/src/main/java/htsjdk/samtools/util/LocusImpl.java +++ b/src/main/java/htsjdk/samtools/util/LocusImpl.java @@ -36,6 +36,7 @@ public LocusImpl(int sequenceIndex, int position) { this.sequenceIndex = sequenceIndex; } + @Override public int getSequenceIndex() { return sequenceIndex; } @@ -43,6 +44,7 @@ public int getSequenceIndex() { /** * @return 1-based position */ + @Override public int getPosition() { return position; } diff --git a/src/main/java/htsjdk/samtools/util/Log.java b/src/main/java/htsjdk/samtools/util/Log.java index acbd3c425..dfe758a39 100644 --- a/src/main/java/htsjdk/samtools/util/Log.java +++ b/src/main/java/htsjdk/samtools/util/Log.java @@ -41,13 +41,13 @@ */ public final class Log { /** Enumeration for setting log levels. */ - public static enum LogLevel { ERROR, WARNING, INFO, DEBUG } + public enum LogLevel { ERROR, WARNING, INFO, DEBUG } private static LogLevel globalLogLevel = LogLevel.INFO; + private static PrintStream out = System.err; private final Class clazz; private final String className; - private final PrintStream out = System.err; /** * Private constructor @@ -67,10 +67,41 @@ public static Log getInstance(final Class clazz) { return new Log(clazz); } + /** + * Set the log level. + * + * @param logLevel The log level enumeration + */ public static void setGlobalLogLevel(final LogLevel logLevel) { globalLogLevel = logLevel; } + /** + * Get the log level. + * + * @return The enumeration for setting log levels. + */ + public static LogLevel getGlobalLogLevel() { + return globalLogLevel; + } + + /** + * Set the {@link PrintStream} for writing. + * + * @param stream {@link PrintStream} to write to. + */ + public static void setGlobalPrintStream(final PrintStream stream) { out = stream; } + + /** + * Get the {@link PrintStream} for writing. + * + * @return {@link PrintStream} to write to. + */ + public static PrintStream getGlobalPrintStream() { + return out; + } + + /** Returns true if the specified log level is enabled otherwise false. */ public static final boolean isEnabled(final LogLevel level) { return level.ordinal() <= globalLogLevel.ordinal(); diff --git a/src/main/java/htsjdk/samtools/util/Md5CalculatingInputStream.java b/src/main/java/htsjdk/samtools/util/Md5CalculatingInputStream.java index e0e7cd520..47ea9ff3b 100755 --- a/src/main/java/htsjdk/samtools/util/Md5CalculatingInputStream.java +++ b/src/main/java/htsjdk/samtools/util/Md5CalculatingInputStream.java @@ -65,12 +65,14 @@ public Md5CalculatingInputStream(InputStream is, File digestFile) { } } + @Override public int read() throws IOException { int result = is.read(); if (result != -1) md5.update((byte)result); return result; } + @Override public int read(byte[] b) throws IOException { int result = is.read(b); if (result != -1) md5.update(b, 0, result); @@ -78,6 +80,7 @@ public int read(byte[] b) throws IOException { } + @Override public int read(byte[] b, int off, int len) throws IOException { int result = is.read(b, off, len); if (result != -1) md5.update(b, off, result); @@ -104,6 +107,7 @@ private String makeHash() { } } + @Override public void close() throws IOException { is.close(); makeHash(); @@ -116,18 +120,23 @@ public void close() throws IOException { } // Methods not supported or overridden because they would not result in a valid hash + @Override public boolean markSupported() { return false; } + @Override public void mark(int readlimit) { throw new UnsupportedOperationException("mark() is not supported by the MD5CalculatingInputStream"); } + @Override public void reset() throws IOException { throw new UnsupportedOperationException("reset() is not supported by the MD5CalculatingInputStream"); } + @Override public long skip(long n) throws IOException { throw new UnsupportedOperationException("skip() is not supported by the MD5CalculatingInputStream"); } // Methods delegated to the wrapped InputStream + @Override public int available() throws IOException { return is.available(); } } diff --git a/src/main/java/htsjdk/samtools/util/Md5CalculatingOutputStream.java b/src/main/java/htsjdk/samtools/util/Md5CalculatingOutputStream.java index 3c5a492c7..8b4c643a3 100755 --- a/src/main/java/htsjdk/samtools/util/Md5CalculatingOutputStream.java +++ b/src/main/java/htsjdk/samtools/util/Md5CalculatingOutputStream.java @@ -31,6 +31,8 @@ import java.io.IOException; import java.io.OutputStream; import java.math.BigInteger; +import java.nio.file.Files; +import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -43,14 +45,14 @@ private final OutputStream os; private final MessageDigest md5; - private final File digestFile; + private final Path digestFile; private String hash; /** * Constructor that takes in the OutputStream that we are wrapping * and creates the MD5 MessageDigest */ - public Md5CalculatingOutputStream(OutputStream os, File digestFile) { + public Md5CalculatingOutputStream(OutputStream os, Path digestFile) { super(); this.hash = null; this.os = os; @@ -65,17 +67,24 @@ public Md5CalculatingOutputStream(OutputStream os, File digestFile) { } } + public Md5CalculatingOutputStream(OutputStream os, File digestFile) { + this(os, digestFile == null ? (Path) null : digestFile.toPath()); + } + + @Override public void write(int b) throws IOException { md5.update((byte)b); os.write(b); } + @Override public void write(byte[] b) throws IOException { md5.update(b); os.write(b); } + @Override public void write(byte[] b, int off, int len) throws IOException { md5.update(b, off, len); os.write(b, off, len); @@ -102,18 +111,20 @@ private String makeHash() { } } + @Override public void close() throws IOException { os.close(); makeHash(); if(digestFile != null) { - BufferedWriter writer = new BufferedWriter(new FileWriter(digestFile)); + BufferedWriter writer = Files.newBufferedWriter(digestFile); writer.write(hash); writer.close(); } } // Pass-through method + @Override public void flush() throws IOException { os.flush(); } } diff --git a/src/main/java/htsjdk/samtools/util/PeekIterator.java b/src/main/java/htsjdk/samtools/util/PeekIterator.java index 9f16a5143..3a43ba54b 100644 --- a/src/main/java/htsjdk/samtools/util/PeekIterator.java +++ b/src/main/java/htsjdk/samtools/util/PeekIterator.java @@ -41,6 +41,7 @@ public PeekIterator(final Iterator underlyingIterator) { * @return true if the iteration has more elements. (In other words, returns true if next would return an element * rather than throwing an exception.) */ + @Override public boolean hasNext() { return peekedElement != null || underlyingIterator.hasNext(); } @@ -49,6 +50,7 @@ public boolean hasNext() { * @return the next element in the iteration. Calling this method repeatedly until the hasNext() method returns * false will return each element in the underlying collection exactly once. */ + @Override public T next() { if (peekedElement != null) { final T ret = peekedElement; @@ -72,6 +74,7 @@ public T peek() { /** * Unsupported */ + @Override public void remove() { throw new UnsupportedOperationException(); } diff --git a/src/main/java/htsjdk/samtools/util/PeekableIterator.java b/src/main/java/htsjdk/samtools/util/PeekableIterator.java index 1587dd299..3df4c42ca 100644 --- a/src/main/java/htsjdk/samtools/util/PeekableIterator.java +++ b/src/main/java/htsjdk/samtools/util/PeekableIterator.java @@ -39,16 +39,19 @@ public PeekableIterator(Iterator iterator) { } /** Closes the underlying iterator. */ + @Override public void close() { CloserUtil.close(iterator); } /** True if there are more items, in which case both next() and peek() will return a value. */ + @Override public boolean hasNext() { return this.nextObject != null; } /** Returns the next object and advances the iterator. */ + @Override public Object next() { Object retval = this.nextObject; advance(); @@ -73,6 +76,7 @@ private void advance(){ } /** Unsupported Operation. */ + @Override public void remove() { throw new UnsupportedOperationException("Not supported: remove"); } diff --git a/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java b/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java index ef28be610..a4643db42 100644 --- a/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java +++ b/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java @@ -41,20 +41,24 @@ public PositionalOutputStream(final OutputStream out) { this.out = out; } + @Override public final void write(final byte[] bytes) throws IOException { write(bytes, 0, bytes.length); } + @Override public final void write(final byte[] bytes, final int startIndex, final int numBytes) throws IOException { position += numBytes; out.write(bytes, startIndex, numBytes); } + @Override public final void write(final int c) throws IOException { position++; out.write(c); } + @Override public final long getPosition() { return position; } @Override diff --git a/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java b/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java index b0a965ca1..0147daa35 100644 --- a/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java +++ b/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java @@ -270,6 +270,7 @@ public boolean isDeterminationAmbiguous() { } } + @Override public boolean hasNext() { // If this returns true, the head of the queue will have a next element while (!queue.isEmpty()) { @@ -281,6 +282,7 @@ public boolean hasNext() { return false; } + @Override public FastqRecord next() { if (!hasNext()) throw new NoSuchElementException(); final Iterator i = queue.poll(); @@ -289,6 +291,7 @@ public FastqRecord next() { return result; } + @Override public void remove() { throw new UnsupportedOperationException(); } diff --git a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java index 7a60756d6..dc6745f5d 100644 --- a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java +++ b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java @@ -268,13 +268,13 @@ public void addInserted(final SAMRecord read, int firstPosition) { public List getInsertedInRecord() { return (insertedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(insertedInRecord); } - - /** - * @return the number of records overlapping the position, with deletions included if they are being tracked. + + /** + * @return the number of records overlapping the position, with deletions included if they are being tracked. */ @Override - public int size() { - return super.size() + ((deletedInRecord == null) ? 0 : deletedInRecord.size()); + public int size() { + return super.size() + ((deletedInRecord == null) ? 0 : deletedInRecord.size()); } @@ -284,7 +284,7 @@ public int size() { */ @Override public boolean isEmpty() { - return getRecordAndPositions().isEmpty() && + return getRecordAndOffsets().isEmpty() && (deletedInRecord == null || deletedInRecord.isEmpty()) && (insertedInRecord == null || insertedInRecord.isEmpty()); } diff --git a/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java b/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java index 5d173a5a4..5dd7589d7 100644 --- a/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java +++ b/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java @@ -107,6 +107,7 @@ private StopAfterFilteringIterator(Iterator iterator, SamRecordFilter * * @return true if the iteration has more elements. Otherwise returns false. */ + @Override public boolean hasNext() { return next != null; } @@ -117,6 +118,7 @@ public boolean hasNext() { * @return the next element in the iteration * @throws java.util.NoSuchElementException */ + @Override public SAMRecord next() { if (next == null) { throw new NoSuchElementException("Iterator has no more elements."); @@ -131,10 +133,12 @@ public SAMRecord next() { * * @throws UnsupportedOperationException */ + @Override public void remove() { throw new UnsupportedOperationException("Remove() not supported by FilteringSamIterator"); } + @Override public void close() { CloserUtil.close(iterator); } diff --git a/src/main/java/htsjdk/samtools/util/SequenceUtil.java b/src/main/java/htsjdk/samtools/util/SequenceUtil.java index 3108cee0d..8e399c17f 100644 --- a/src/main/java/htsjdk/samtools/util/SequenceUtil.java +++ b/src/main/java/htsjdk/samtools/util/SequenceUtil.java @@ -32,6 +32,7 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.SAMTag; +import htsjdk.samtools.fastq.FastqConstants; import java.io.File; import java.math.BigInteger; @@ -49,6 +50,25 @@ public static final byte[] VALID_BASES_UPPER = new byte[]{A, C, G, T}; public static final byte[] VALID_BASES_LOWER = new byte[]{a, c, g, t}; + private static final byte[] ACGTN_BASES = new byte[]{A, C, G, T, N}; + private static final String IUPAC_CODES_STRING = ".aAbBcCdDgGhHkKmMnNrRsStTvVwWyY"; + /** + * A set of bases supported by BAM in reads, see http://samtools.github.io/hts-specs/SAMv1.pdf chapter 4.2 on 'seq' field. + * Effectively these are upper cased IUPAC codes with equals sign ('=') and without dot ('.'). + */ + private static final byte[] BAM_READ_BASE_SET = "=ABCDGHKMNRSTVWY".getBytes(); + + /** + * A lookup table to find a corresponding BAM read base. + */ + private static final byte[] bamReadBaseLookup = new byte[127]; + static { + Arrays.fill(bamReadBaseLookup, N); + for (final byte base: BAM_READ_BASE_SET) { + bamReadBaseLookup[base] = base; + bamReadBaseLookup[base + 32] = base; + } + } private static final byte A_MASK = 1; private static final byte C_MASK = 2; @@ -56,13 +76,13 @@ private static final byte T_MASK = 8; private static final byte[] bases = new byte[127]; - + private static final byte NON_IUPAC_CODE = 0; /* * Definition of IUPAC codes: * http://www.bioinformatics.org/sms2/iupac.html */ static { - Arrays.fill(bases, (byte) 0); + Arrays.fill(bases, NON_IUPAC_CODE); bases[A] = A_MASK; bases[C] = C_MASK; bases[G] = G_MASK; @@ -141,7 +161,24 @@ private static boolean isValidBase(final byte b, final byte[] validBases) { return false; } - /** Calculates the fraction of bases that are G/C in the sequence. */ + /** + * Check if the given base is one of upper case ACGTN */ + public static boolean isUpperACGTN(final byte base) { + return isValidBase(base, ACGTN_BASES); + } + + + /** Returns all IUPAC codes as a string */ + public static String getIUPACCodesString() { + return IUPAC_CODES_STRING; + } + + /** Checks if the given base is a IUPAC code */ + public static boolean isIUPAC(final byte base) { + return bases[base] != NON_IUPAC_CODE; + } + + /** Calculates the fraction of bases that are G/C in the sequence */ public static double calculateGc(final byte[] bases) { int gcs = 0; for (int i = 0; i < bases.length; ++i) { @@ -152,6 +189,18 @@ public static double calculateGc(final byte[] bases) { return gcs / (double) bases.length; } + /** Check if the given base belongs to BAM read base set '=ABCDGHKMNRSTVWY' */ + public static boolean isBamReadBase(final byte base) { + return isValidBase(base, BAM_READ_BASE_SET); + } + + /** Update and return the given array of bases by upper casing and then replacing all non-BAM read bases with N */ + public static byte[] toBamReadBasesInPlace(final byte[] bases) { + for (int i = 0; i < bases.length; i++) + bases[i] = bamReadBaseLookup[bases[i]]; + return bases; + } + /** * default signature that forces the lists to be the same size * @@ -620,32 +669,7 @@ public static byte complement(final byte b) { } } - /** Reverses and complements the bases in place. */ - public static void reverseComplement(final byte[] bases) { - final int lastIndex = bases.length - 1; - - int i, j; - for (i = 0, j = lastIndex; i < j; ++i, --j) { - final byte tmp = complement(bases[i]); - bases[i] = complement(bases[j]); - bases[j] = tmp; - } - if (bases.length % 2 == 1) { - bases[i] = complement(bases[i]); - } - } - - /** Reverses the quals in place. */ - public static void reverseQualities(final byte[] quals) { - final int lastIndex = quals.length - 1; - int i, j; - for (i = 0, j = lastIndex; i < j; ++i, --j) { - final byte tmp = quals[i]; - quals[i] = quals[j]; - quals[j] = tmp; - } - } /** * Returns true if the bases are equal OR if the mismatch can be accounted for by @@ -836,6 +860,16 @@ else if (cigElOp.consumesReferenceBases()) { return ret; } + /** Reverses and complements the bases in place. */ + public static void reverseComplement(final byte[] bases) { + reverseComplement(bases, 0, bases.length); + } + + /** Reverses the quals in place. */ + public static void reverseQualities(final byte[] quals) { + reverse(quals, 0, quals.length); + } + public static void reverse(final byte[] array, final int offset, final int len) { final int lastIndex = len - 1; @@ -1020,7 +1054,7 @@ public static String getSamReadNameFromFastqHeader(final String fastqHeader) { // NOTE: the while loop isn't necessarily the most efficient way to handle this but we don't // expect this to ever happen more than once, just trapping pathological cases - while ((readName.endsWith("/1") || readName.endsWith("/2"))) { + while ((readName.endsWith(FastqConstants.FIRST_OF_PAIR) || readName.endsWith(FastqConstants.SECOND_OF_PAIR))) { // If this is an unpaired run we want to make sure that "/1" isn't tacked on the end of the read name, // as this can cause problems down the road (ex. in Picard's MergeBamAlignment). readName = readName.substring(0, readName.length() - 2); diff --git a/src/main/java/htsjdk/samtools/util/SortingCollection.java b/src/main/java/htsjdk/samtools/util/SortingCollection.java index 6babd4e35..69ce2556b 100644 --- a/src/main/java/htsjdk/samtools/util/SortingCollection.java +++ b/src/main/java/htsjdk/samtools/util/SortingCollection.java @@ -259,6 +259,7 @@ private File newTempFile() throws IOException { * Prepare to iterate through the records in order. This method may be called more than once, * but add() may not be called after this method has been called. */ + @Override public CloseableIterator iterator() { if (this.cleanedUp) { throw new IllegalStateException("Cannot call iterator() after cleanup() was called."); @@ -354,14 +355,17 @@ public void cleanup() { SortingCollection.this.comparator); } + @Override public void close() { // nothing to do } + @Override public boolean hasNext() { return this.iterationIndex < SortingCollection.this.numRecordsInRam; } + @Override public T next() { if (!hasNext()) { throw new NoSuchElementException(); @@ -372,6 +376,7 @@ public T next() { return ret; } + @Override public void remove() { throw new UnsupportedOperationException(); } @@ -409,10 +414,12 @@ public void remove() { } } + @Override public boolean hasNext() { return !this.queue.isEmpty(); } + @Override public T next() { if (!hasNext()) { throw new NoSuchElementException(); @@ -430,10 +437,12 @@ public T next() { return ret; } + @Override public void remove() { throw new UnsupportedOperationException(); } + @Override public void close() { while (!this.queue.isEmpty()) { final PeekFileRecordIterator it = this.queue.pollFirst(); @@ -464,10 +473,12 @@ public void close() { } } + @Override public boolean hasNext() { return this.currentRecord != null; } + @Override public T next() { if (!hasNext()) { throw new NoSuchElementException(); @@ -477,6 +488,7 @@ public T next() { return ret; } + @Override public void remove() { throw new UnsupportedOperationException(); } @@ -485,6 +497,7 @@ private void advance() { this.currentRecord = this.codec.decode(); } + @Override public void close() { CloserUtil.close(this.is); } @@ -505,6 +518,7 @@ public void close() { class PeekFileRecordIteratorComparator implements Comparator, Serializable { private static final long serialVersionUID = 1L; + @Override public int compare(final PeekFileRecordIterator lhs, final PeekFileRecordIterator rhs) { final int result = comparator.compare(lhs.peek(), rhs.peek()); if (result == 0) return lhs.n - rhs.n; diff --git a/src/main/java/htsjdk/samtools/util/SortingLongCollection.java b/src/main/java/htsjdk/samtools/util/SortingLongCollection.java index 4cf0c367f..e75c3362e 100644 --- a/src/main/java/htsjdk/samtools/util/SortingLongCollection.java +++ b/src/main/java/htsjdk/samtools/util/SortingLongCollection.java @@ -336,6 +336,7 @@ void close() { private static class PeekFileValueIteratorComparator implements Comparator, Serializable { private static final long serialVersionUID = 1L; + @Override public int compare(final PeekFileValueIterator it1, final PeekFileValueIterator it2) { if (it1.peek() < it2.peek()) { return -1; diff --git a/src/main/java/htsjdk/samtools/util/StringLineReader.java b/src/main/java/htsjdk/samtools/util/StringLineReader.java index ed383a2f2..cca3d9531 100644 --- a/src/main/java/htsjdk/samtools/util/StringLineReader.java +++ b/src/main/java/htsjdk/samtools/util/StringLineReader.java @@ -46,6 +46,7 @@ public StringLineReader(final String s) { /** * Read a line and remove the line terminator */ + @Override public String readLine() { return readLine(false); } @@ -77,6 +78,7 @@ private String readLine(final boolean includeTerminators) { /** * @return 1-based number of line most recently read */ + @Override public int getLineNumber() { return lineNumber; } @@ -86,6 +88,7 @@ public int getLineNumber() { * * @return If not eof, the next character that would be read. If eof, -1. */ + @Override public int peek() { if (curPos == theString.length()) { return -1; @@ -93,6 +96,7 @@ public int peek() { return theString.charAt(curPos); } + @Override public void close() { curPos = theString.length(); } diff --git a/src/main/java/htsjdk/samtools/util/StringUtil.java b/src/main/java/htsjdk/samtools/util/StringUtil.java index 90492533e..a885ba2db 100644 --- a/src/main/java/htsjdk/samtools/util/StringUtil.java +++ b/src/main/java/htsjdk/samtools/util/StringUtil.java @@ -312,6 +312,9 @@ public static String bytesToString(final byte[] buffer, final int offset, final } return byteBuffer; */ + if (s == null) { + return null; + } final byte[] byteBuffer = new byte[s.length()]; s.getBytes(0, byteBuffer.length, byteBuffer, 0); return byteBuffer; @@ -319,6 +322,9 @@ public static String bytesToString(final byte[] buffer, final int offset, final @SuppressWarnings("deprecation") public static byte[] stringToBytes(final String s, final int offset, final int length) { + if (s == null) { + return null; + } final byte[] byteBuffer = new byte[length]; s.getBytes(offset, offset + length, byteBuffer, 0); return byteBuffer; diff --git a/src/main/java/htsjdk/samtools/util/WholeGenomeReferenceSequenceMask.java b/src/main/java/htsjdk/samtools/util/WholeGenomeReferenceSequenceMask.java index 1263285a8..b9ef975a8 100644 --- a/src/main/java/htsjdk/samtools/util/WholeGenomeReferenceSequenceMask.java +++ b/src/main/java/htsjdk/samtools/util/WholeGenomeReferenceSequenceMask.java @@ -41,6 +41,7 @@ public WholeGenomeReferenceSequenceMask(final SAMFileHeader header) { /** * @return true if the mask is set for the given sequence and position */ + @Override public boolean get(final int sequenceIndex, final int position) { if (sequenceIndex < 0) { throw new IllegalArgumentException("Negative sequence index " + sequenceIndex); @@ -55,6 +56,7 @@ public boolean get(final int sequenceIndex, final int position) { /** * @return the next pos on the given sequence >= position that is set, or -1 if there are no more set positions */ + @Override public int nextPosition(final int sequenceIndex, final int position) { if (get(sequenceIndex, position + 1)) { return position + 1; @@ -66,6 +68,7 @@ public int nextPosition(final int sequenceIndex, final int position) { /** * @return Largest sequence index for which there are set bits. */ + @Override public int getMaxSequenceIndex() { return header.getSequenceDictionary().size() - 1; } @@ -73,6 +76,7 @@ public int getMaxSequenceIndex() { /** * @return the largest position on the last sequence index */ + @Override public int getMaxPosition() { SAMSequenceRecord lastSequenceRecord = header.getSequence(getMaxSequenceIndex()); return lastSequenceRecord.getSequenceLength(); diff --git a/src/main/java/htsjdk/samtools/util/zip/DeflaterFactory.java b/src/main/java/htsjdk/samtools/util/zip/DeflaterFactory.java index c15e7c663..fb3ac9a18 100644 --- a/src/main/java/htsjdk/samtools/util/zip/DeflaterFactory.java +++ b/src/main/java/htsjdk/samtools/util/zip/DeflaterFactory.java @@ -40,9 +40,9 @@ public DeflaterFactory() { * Returns a deflater object that will be used when writing BAM files. * Subclasses may override to provide their own deflater implementation. * @param compressionLevel the compression level (0-9) - * @param nowrap if true then use GZIP compatible compression + * @param gzipCompatible if true then use GZIP compatible compression */ - public Deflater makeDeflater(final int compressionLevel, final boolean nowrap) { - return new Deflater(compressionLevel, nowrap); + public Deflater makeDeflater(final int compressionLevel, final boolean gzipCompatible) { + return new Deflater(compressionLevel, gzipCompatible); } } diff --git a/src/main/java/htsjdk/samtools/util/zip/InflaterFactory.java b/src/main/java/htsjdk/samtools/util/zip/InflaterFactory.java new file mode 100644 index 000000000..c03dc9a0f --- /dev/null +++ b/src/main/java/htsjdk/samtools/util/zip/InflaterFactory.java @@ -0,0 +1,49 @@ +/* + * The MIT License + * + * Copyright (c) 2013 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.util.zip; + +import htsjdk.samtools.util.BlockGunzipper; +import java.util.zip.Inflater; + +/** + * Factory for {@link Inflater} objects used by {@link BlockGunzipper}. + * This class may be extended to provide alternative inflaters (e.g., for improved performance). + * The default implementation returns a JDK {@link Inflater} + */ +public class InflaterFactory { + + public InflaterFactory() { + //Note: made explicit constructor to make searching for references easier + } + + /** + * Returns an inflater object that will be used when reading DEFLATE compressed files. + * Subclasses may override to provide their own inflater implementation. + * The default implementation returns a JDK {@link Inflater} + * @param gzipCompatible if true then use GZIP compatible compression + */ + public Inflater makeInflater(final boolean gzipCompatible) { + return new Inflater(gzipCompatible); + } +} diff --git a/src/main/java/htsjdk/tribble/AbstractFeatureReader.java b/src/main/java/htsjdk/tribble/AbstractFeatureReader.java index 80d9a6c62..d65783fee 100644 --- a/src/main/java/htsjdk/tribble/AbstractFeatureReader.java +++ b/src/main/java/htsjdk/tribble/AbstractFeatureReader.java @@ -25,11 +25,13 @@ import java.io.File; import java.io.IOException; import java.net.URI; +import java.nio.channels.SeekableByteChannel; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.Set; +import java.util.function.Function; /** * jrobinso @@ -43,6 +45,11 @@ // the path to underlying data source String path; + // a wrapper to apply to the raw stream of the Feature file to allow features like prefetching and caching to be injected + final Function wrapper; + // a wrapper to apply to the raw stream of the index file + final Function indexWrapper; + // the query source, codec, and header // protected final QuerySource querySource; protected final FeatureCodec codec; @@ -60,38 +67,51 @@ } /** - * {@link #getFeatureReader(String, String, FeatureCodec, boolean)} with {@code null} for indexResource + * {@link #getFeatureReader(String, String, FeatureCodec, boolean, Function, Function)} with {@code null} for indexResource, wrapper, and indexWrapper * @throws TribbleException */ public static AbstractFeatureReader getFeatureReader(final String featureResource, final FeatureCodec codec, final boolean requireIndex) throws TribbleException { - return getFeatureReader(featureResource, null, codec, requireIndex); + return getFeatureReader(featureResource, null, codec, requireIndex, null, null); + } + + + /** + * {@link #getFeatureReader(String, String, FeatureCodec, boolean, Function, Function)} with {@code null} for wrapper, and indexWrapper + * @throws TribbleException + */ + public static AbstractFeatureReader getFeatureReader(final String featureResource, String indexResource, final FeatureCodec codec, final boolean requireIndex) throws TribbleException { + return getFeatureReader(featureResource, indexResource, codec, requireIndex, null, null); } /** * * @param featureResource the feature file to create from * @param indexResource the index for the feature file. If null, will auto-generate (if necessary) - * @param codec + * @param codec the codec to use to decode the individual features * @param requireIndex whether an index is required for this file - * @return + * @param wrapper a wrapper to apply to the byte stream from the featureResource allowing injecting features + * like caching and prefetching of the stream, may be null, will only be applied if featureResource + * is a uri representing a {@link java.nio.file.Path} + * @param indexWrapper a wrapper to apply to the byte stream from the indexResource, may be null, will only be + * applied if indexResource is a uri representing a {@link java.nio.file.Path} + * * @throws TribbleException */ - public static AbstractFeatureReader getFeatureReader(final String featureResource, String indexResource, final FeatureCodec codec, final boolean requireIndex) throws TribbleException { - + public static AbstractFeatureReader getFeatureReader(final String featureResource, String indexResource, final FeatureCodec codec, final boolean requireIndex, Function wrapper, Function indexWrapper) throws TribbleException { try { // Test for tabix index if (methods.isTabix(featureResource, indexResource)) { if ( ! (codec instanceof AsciiFeatureCodec) ) throw new TribbleException("Tabix indexed files only work with ASCII codecs, but received non-Ascii codec " + codec.getClass().getSimpleName()); - return new TabixFeatureReader(featureResource, indexResource, (AsciiFeatureCodec) codec); + return new TabixFeatureReader<>(featureResource, indexResource, (AsciiFeatureCodec) codec, wrapper, indexWrapper); } // Not tabix => tribble index file (might be gzipped, but not block gzipped) else { - return new TribbleIndexedFeatureReader(featureResource, indexResource, codec, requireIndex); + return new TribbleIndexedFeatureReader<>(featureResource, indexResource, codec, requireIndex, wrapper, indexWrapper); } - } catch (IOException e) { + } catch (final IOException e) { throw new TribbleException.MalformedFeatureFile("Unable to create BasicFeatureReader using feature file ", featureResource, e); - } catch (TribbleException e) { + } catch (final TribbleException e) { e.setSource(featureResource); throw e; } @@ -108,16 +128,24 @@ */ public static AbstractFeatureReader getFeatureReader(final String featureResource, final FeatureCodec codec, final Index index) throws TribbleException { try { - return new TribbleIndexedFeatureReader(featureResource, codec, index); - } catch (IOException e) { + return new TribbleIndexedFeatureReader<>(featureResource, codec, index); + } catch (final IOException e) { throw new TribbleException.MalformedFeatureFile("Unable to create AbstractFeatureReader using feature file ", featureResource, e); } } protected AbstractFeatureReader(final String path, final FeatureCodec codec) { + this(path, codec, null, null); + } + + protected AbstractFeatureReader(final String path, final FeatureCodec codec, + final Function wrapper, + final Function indexWrapper) { this.path = path; this.codec = codec; + this.wrapper = wrapper; + this.indexWrapper = indexWrapper; } /** @@ -169,25 +197,30 @@ public static boolean hasBlockCompressedExtension (final URI uri) { * * @return the header object we've read-in */ + @Override public Object getHeader() { return header.getHeaderValue(); } static class EmptyIterator implements CloseableTribbleIterator { - public Iterator iterator() { return this; } - public boolean hasNext() { return false; } - public T next() { return null; } - public void remove() { } + @Override public Iterator iterator() { return this; } + @Override public boolean hasNext() { return false; } + @Override public T next() { return null; } + @Override public void remove() { } @Override public void close() { } } + public static boolean isTabix(String resourcePath, String indexPath) throws IOException { + if(indexPath == null){ + indexPath = ParsingUtils.appendToPath(resourcePath, TabixUtils.STANDARD_INDEX_EXTENSION); + } + return hasBlockCompressedExtension(resourcePath) && ParsingUtils.resourceExists(indexPath); + } + public static class ComponentMethods{ public boolean isTabix(String resourcePath, String indexPath) throws IOException{ - if(indexPath == null){ - indexPath = ParsingUtils.appendToPath(resourcePath, TabixUtils.STANDARD_INDEX_EXTENSION); - } - return hasBlockCompressedExtension(resourcePath) && ParsingUtils.resourceExists(indexPath); + return AbstractFeatureReader.isTabix(resourcePath, indexPath); } } } diff --git a/src/main/java/htsjdk/tribble/Feature.java b/src/main/java/htsjdk/tribble/Feature.java index 941790f34..9ed852b14 100644 --- a/src/main/java/htsjdk/tribble/Feature.java +++ b/src/main/java/htsjdk/tribble/Feature.java @@ -27,13 +27,14 @@ import htsjdk.samtools.util.Locatable; /** - * Represents a locus on a reference sequence. All Features are expected to return 1-based closed-ended intervals. + * Marker interface for Locatables with Tribble support. A Feature represents a record in a tribble-supported file format. + * As {@link Locatable}, represents a locus on a reference sequence and is expected to return 1-based closed-ended intervals. */ public interface Feature extends Locatable { /** * Return the features reference sequence name, e.g chromosome or contig - * @deprecated use getContig() instead + * @deprecated on 03/2015. Use getContig() instead. */ @Deprecated default public String getChr() { diff --git a/src/main/java/htsjdk/tribble/FeatureCodec.java b/src/main/java/htsjdk/tribble/FeatureCodec.java index f14191a67..7fe6a2a16 100644 --- a/src/main/java/htsjdk/tribble/FeatureCodec.java +++ b/src/main/java/htsjdk/tribble/FeatureCodec.java @@ -125,7 +125,7 @@ * Define the tabix format for the feature, used for indexing. Default implementation throws an exception. * * Note that only {@link AsciiFeatureCodec} could read tabix files as defined in - * {@link AbstractFeatureReader#getFeatureReader(String, String, FeatureCodec, boolean)} + * {@link AbstractFeatureReader#getFeatureReader(String, String, FeatureCodec, boolean, java.util.function.Function, java.util.function.Function)} * * @return the format to use with tabix * @throws TribbleException if the format is not defined diff --git a/src/main/java/htsjdk/tribble/FeatureReader.java b/src/main/java/htsjdk/tribble/FeatureReader.java index 3471393b8..c7773a27e 100644 --- a/src/main/java/htsjdk/tribble/FeatureReader.java +++ b/src/main/java/htsjdk/tribble/FeatureReader.java @@ -32,6 +32,7 @@ public CloseableTribbleIterator iterator() throws IOException; + @Override public void close() throws IOException; public List getSequenceNames(); diff --git a/src/main/java/htsjdk/tribble/SimpleFeature.java b/src/main/java/htsjdk/tribble/SimpleFeature.java index ddc62fa10..0365dc594 100644 --- a/src/main/java/htsjdk/tribble/SimpleFeature.java +++ b/src/main/java/htsjdk/tribble/SimpleFeature.java @@ -39,14 +39,17 @@ public SimpleFeature(final String contig, final int start, final int end) { this.end = end; } + @Override public String getContig() { return contig; } + @Override public int getStart() { return start; } + @Override public int getEnd() { return end; } diff --git a/src/main/java/htsjdk/tribble/TabixFeatureReader.java b/src/main/java/htsjdk/tribble/TabixFeatureReader.java index 5d90295de..e72243325 100644 --- a/src/main/java/htsjdk/tribble/TabixFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TabixFeatureReader.java @@ -30,9 +30,11 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.channels.SeekableByteChannel; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.function.Function; /** * @author Jim Robinson @@ -50,10 +52,7 @@ * @throws IOException */ public TabixFeatureReader(final String featureFile, final AsciiFeatureCodec codec) throws IOException { - super(featureFile, codec); - tabixReader = new TabixReader(featureFile); - sequenceNames = new ArrayList(tabixReader.getChromosomes()); - readHeader(); + this(featureFile, null, codec, null, null); } /** @@ -64,9 +63,25 @@ public TabixFeatureReader(final String featureFile, final AsciiFeatureCodec code * @throws IOException */ public TabixFeatureReader(final String featureFile, final String indexFile, final AsciiFeatureCodec codec) throws IOException { - super(featureFile, codec); - tabixReader = new TabixReader(featureFile, indexFile); - sequenceNames = new ArrayList(tabixReader.getChromosomes()); + this(featureFile, indexFile, codec, null, null); + } + + /** + * + * @param featureFile path to a feature file. Can be a local file, http url, or ftp url + * @param indexFile path to the index file. + * @param wrapper a wrapper to apply to the byte stream from the featureResource allowing injecting features + * like caching and prefetching of the stream, may be null, will only be applied if featureFile + * is a uri representing a {@link java.nio.file.Path} + * @param indexWrapper a wrapper to apply to the byte stream from the indexResource, may be null, will only be + * applied if indexFile is a uri representing a {@link java.nio.file.Path} + */ + public TabixFeatureReader(final String featureFile, final String indexFile, final AsciiFeatureCodec codec, + final Function wrapper, + final Function indexWrapper) throws IOException { + super(featureFile, codec, wrapper, indexWrapper); + tabixReader = new TabixReader(featureFile, indexFile, wrapper, indexWrapper); + sequenceNames = new ArrayList<>(tabixReader.getChromosomes()); readHeader(); } @@ -80,7 +95,7 @@ public TabixFeatureReader(final String featureFile, final String indexFile, fina private void readHeader() throws IOException { SOURCE source = null; try { - source = codec.makeSourceFromStream(new PositionalBufferedStream(new BlockCompressedInputStream(ParsingUtils.openInputStream(path)))); + source = codec.makeSourceFromStream(new PositionalBufferedStream(new BlockCompressedInputStream(ParsingUtils.openInputStream(path, wrapper)))); header = codec.readHeader(source); } catch (Exception e) { throw new TribbleException.MalformedFeatureFile("Unable to parse header with error: " + e.getMessage(), path, e); @@ -97,6 +112,7 @@ public boolean hasIndex(){ } + @Override public List getSequenceNames() { return sequenceNames; } @@ -110,6 +126,7 @@ public boolean hasIndex(){ * @return * @throws IOException */ + @Override public CloseableTribbleIterator query(final String chr, final int start, final int end) throws IOException { final List mp = getSequenceNames(); if (mp == null) throw new TribbleException.TabixReaderFailure("Unable to find sequence named " + chr + @@ -121,13 +138,15 @@ public boolean hasIndex(){ return new FeatureIterator(lineReader, start - 1, end); } + @Override public CloseableTribbleIterator iterator() throws IOException { - final InputStream is = new BlockCompressedInputStream(ParsingUtils.openInputStream(path)); + final InputStream is = new BlockCompressedInputStream(ParsingUtils.openInputStream(path, wrapper)); final PositionalBufferedStream stream = new PositionalBufferedStream(is); final LineReader reader = new SynchronousLineReader(stream); return new FeatureIterator(reader, 0, Integer.MAX_VALUE); } + @Override public void close() throws IOException { tabixReader.close(); } @@ -184,10 +203,12 @@ protected void readNextRecord() throws IOException { } + @Override public boolean hasNext() { return currentRecord != null; } + @Override public T next() { T ret = currentRecord; try { @@ -200,14 +221,17 @@ public T next() { } + @Override public void remove() { throw new UnsupportedOperationException("Remove is not supported in Iterators"); } + @Override public void close() { lineReader.close(); } + @Override public Iterator iterator() { return this; } diff --git a/src/main/java/htsjdk/tribble/Tribble.java b/src/main/java/htsjdk/tribble/Tribble.java index 468f55d77..f2c07a248 100644 --- a/src/main/java/htsjdk/tribble/Tribble.java +++ b/src/main/java/htsjdk/tribble/Tribble.java @@ -27,6 +27,7 @@ import htsjdk.tribble.util.TabixUtils; import java.io.File; +import java.nio.file.Path; /** * Common, tribble wide constants and static functions @@ -37,9 +38,9 @@ private Tribble() { } // can't be instantiated public final static String STANDARD_INDEX_EXTENSION = ".idx"; /** - * Return the name of the index file for the provided vcf {@code filename} + * Return the name of the index file for the provided {@code filename} * Does not actually create an index - * @param filename name of the vcf file + * @param filename name of the file * @return non-null String representing the index filename */ public static String indexFile(final String filename) { @@ -47,9 +48,9 @@ public static String indexFile(final String filename) { } /** - * Return the File of the index file for the provided vcf {@code file} + * Return the File of the index file for the provided {@code file} * Does not actually create an index - * @param file the vcf file + * @param file the file * @return a non-null File representing the index */ public static File indexFile(final File file) { @@ -57,9 +58,19 @@ public static File indexFile(final File file) { } /** - * Return the name of the tabix index file for the provided vcf {@code filename} + * Return the name of the index file for the provided {@code path} * Does not actually create an index - * @param filename name of the vcf file + * @param path the path + * @return Path representing the index filename + */ + public static Path indexPath(final Path path) { + return path.getFileSystem().getPath(indexFile(path.toAbsolutePath().toString())); + } + + /** + * Return the name of the tabix index file for the provided {@code filename} + * Does not actually create an index + * @param filename name of the file * @return non-null String representing the index filename */ public static String tabixIndexFile(final String filename) { @@ -67,9 +78,9 @@ public static String tabixIndexFile(final String filename) { } /** - * Return the File of the tabix index file for the provided vcf {@code file} + * Return the File of the tabix index file for the provided {@code file} * Does not actually create an index - * @param file the vcf file + * @param file the file * @return a non-null File representing the index */ public static File tabixIndexFile(final File file) { @@ -77,9 +88,19 @@ public static File tabixIndexFile(final File file) { } /** - * Return the name of the index file for the provided vcf {@code filename} and {@code extension} + * Return the name of the tabix index file for the provided {@code path} + * Does not actually create an index + * @param path the path + * @return Path representing the index filename + */ + public static Path tabixIndexPath(final Path path) { + return path.getFileSystem().getPath(tabixIndexFile(path.toAbsolutePath().toString())); + } + + /** + * Return the name of the index file for the provided {@code filename} and {@code extension} * Does not actually create an index - * @param filename name of the vcf file + * @param filename name of the file * @param extension the extension to use for the index * @return non-null String representing the index filename */ @@ -88,9 +109,9 @@ private static String indexFile(final String filename, final String extension) { } /** - * Return the File of the index file for the provided vcf {@code file} and {@code extension} + * Return the File of the index file for the provided {@code file} and {@code extension} * Does not actually create an index - * @param file the vcf file + * @param file the file * @param extension the extension to use for the index * @return a non-null File representing the index */ diff --git a/src/main/java/htsjdk/tribble/TribbleException.java b/src/main/java/htsjdk/tribble/TribbleException.java index 86202ebfb..18f1f81f8 100644 --- a/src/main/java/htsjdk/tribble/TribbleException.java +++ b/src/main/java/htsjdk/tribble/TribbleException.java @@ -54,6 +54,7 @@ public void setSource(String source) { * override the default message with ours, which attaches the source file in question * @return a string with our internal error, along with the causitive source file (or other input source) */ + @Override public String getMessage() { String ret = super.getMessage(); if ( source != null ) diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index 514782d1e..7c39faa04 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -33,15 +33,15 @@ import htsjdk.tribble.util.ParsingUtils; import java.io.BufferedInputStream; -import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URI; -import java.net.URISyntaxException; import java.net.URLEncoder; +import java.nio.channels.SeekableByteChannel; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.function.Function; import java.util.zip.GZIPInputStream; /** @@ -80,8 +80,13 @@ * @throws IOException */ public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec codec, final boolean requireIndex) throws IOException { + this(featurePath, codec, requireIndex, null, null); + } - super(featurePath, codec); + public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec codec, final boolean requireIndex, + Function wrapper, + Function indexWrapper) throws IOException { + super(featurePath, codec, wrapper, indexWrapper); if (requireIndex) { this.loadIndex(); @@ -104,9 +109,23 @@ public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec< * @throws IOException */ public TribbleIndexedFeatureReader(final String featureFile, final String indexFile, final FeatureCodec codec, final boolean requireIndex) throws IOException { - this(featureFile, codec, false); // required to read the header + this(featureFile, indexFile, codec, requireIndex, null, null); + } + + /** + * @param featureFile - path to the feature file, can be a local file path, http url, or ftp url, or any other + * uri supported by a {@link java.nio.file.Path} plugin + * @param indexFile - path to the index file + * @param codec - codec to decode the features + * @param requireIndex - true if the reader will be queries for specific ranges. An index (idx) file must exist + * @throws IOException + */ + public TribbleIndexedFeatureReader(final String featureFile, final String indexFile, final FeatureCodec codec, final boolean requireIndex, + Function wrapper, + Function indexWrapper) throws IOException { + this(featureFile, codec, false, wrapper, indexWrapper); // required to read the header if (indexFile != null && ParsingUtils.resourceExists(indexFile)) { - index = IndexFactory.loadIndex(indexFile); + index = IndexFactory.loadIndex(indexFile, indexWrapper); this.needCheckForIndex = false; } else { if (requireIndex) { @@ -118,6 +137,8 @@ public TribbleIndexedFeatureReader(final String featureFile, final String indexF } } + + /** * @param featureFile - path to the feature file, can be a local file path, http url, or ftp url * @param codec - codec to decode the features @@ -139,12 +160,12 @@ public TribbleIndexedFeatureReader(final String featureFile, final FeatureCodec< private void loadIndex() throws IOException{ String indexFile = Tribble.indexFile(this.path); if (ParsingUtils.resourceExists(indexFile)) { - index = IndexFactory.loadIndex(indexFile); + index = IndexFactory.loadIndex(indexFile, indexWrapper); } else { // See if the index itself is gzipped indexFile = ParsingUtils.appendToPath(indexFile, ".gz"); if (ParsingUtils.resourceExists(indexFile)) { - index = IndexFactory.loadIndex(indexFile); + index = IndexFactory.loadIndex(indexFile, indexWrapper); } } this.needCheckForIndex = false; @@ -164,11 +185,11 @@ private SeekableStream getSeekableStream() throws IOException { final SeekableStream result; if (reuseStreamInQuery()) { // if the stream points to an underlying file, only create the underlying seekable stream once - if (seekableStream == null) seekableStream = SeekableStreamFactory.getInstance().getStreamFor(path); + if (seekableStream == null) seekableStream = SeekableStreamFactory.getInstance().getStreamFor(path, wrapper); result = seekableStream; } else { // we are not reusing the stream, so make a fresh copy each time we request it - result = SeekableStreamFactory.getInstance().getStreamFor(path); + result = SeekableStreamFactory.getInstance().getStreamFor(path, wrapper); } return result; @@ -183,6 +204,7 @@ private boolean reuseStreamInQuery() { return pathIsRegularFile; } + @Override public void close() throws IOException { // close the seekable stream if that's necessary if (seekableStream != null) seekableStream.close(); @@ -193,6 +215,7 @@ public void close() throws IOException { * * @return list of strings of the contig names */ + @Override public List getSequenceNames() { return !this.hasIndex() ? new ArrayList() : new ArrayList(index.getSequenceNames()); } @@ -218,7 +241,7 @@ private void readHeader() throws IOException { InputStream is = null; PositionalBufferedStream pbs = null; try { - is = ParsingUtils.openInputStream(path); + is = ParsingUtils.openInputStream(path, wrapper); if (hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) { // TODO -- warning I don't think this can work, the buffered input stream screws up position is = new GZIPInputStream(new BufferedInputStream(is)); @@ -252,6 +275,7 @@ private void readHeader() throws IOException { * @return an iterator of records in this interval * @throws IOException */ + @Override public CloseableTribbleIterator query(final String chr, final int start, final int end) throws IOException { if (!this.hasIndex()) { @@ -271,36 +295,12 @@ private void readHeader() throws IOException { * @return Return an iterator to iterate over the entire file * @throws IOException */ + @Override public CloseableTribbleIterator iterator() throws IOException { return new WFIterator(); } /** - * @deprecated use {@link #hasBlockCompressedExtension(String)} instead - */ - //Visible for testing - @Deprecated - static boolean isGZIPPath(final String path) { - if (path.toLowerCase().endsWith(".gz")) { - return true; - } - else { - String uriPath = null; - try { - URI uri = new URI(path); - if (uri != null) { - uriPath = uri.getPath(); - return uriPath != null && uriPath.toLowerCase().endsWith(".gz"); - } - return false; - } - catch (URISyntaxException e) { - return false; - } - } - } - - /** * Class to iterator over an entire file. */ class WFIterator implements CloseableTribbleIterator { @@ -313,7 +313,7 @@ static boolean isGZIPPath(final String path) { * @throws IOException */ public WFIterator() throws IOException { - final InputStream inputStream = ParsingUtils.openInputStream(path); + final InputStream inputStream = ParsingUtils.openInputStream(path, wrapper); final PositionalBufferedStream pbs; if (hasBlockCompressedExtension(path)) { @@ -324,9 +324,9 @@ public WFIterator() throws IOException { } else { pbs = new PositionalBufferedStream(inputStream, 512000); } - /** + /* * The header was already read from the original source in the constructor; don't read it again, since some codecs keep state - * about its initializagtion. Instead, skip that part of the stream. + * about its initialization. Instead, skip that part of the stream. */ pbs.skip(header.getHeaderEnd()); source = codec.makeSourceFromStream(pbs); @@ -424,10 +424,12 @@ public QueryIterator(final String chr, final int start, final int end, final Lis } + @Override public boolean hasNext() { return currentRecord != null; } + @Override public T next() { final T ret = currentRecord; try { @@ -511,11 +513,13 @@ private void readNextRecord() throws IOException { } + @Override public void remove() { throw new UnsupportedOperationException("Remove is not supported."); } + @Override public void close() { // Note that this depends on BlockStreamWrapper not actually closing the underlying stream codec.close(source); @@ -529,6 +533,7 @@ public void close() { } } + @Override public Iterator iterator() { return this; } diff --git a/src/main/java/htsjdk/tribble/bed/FullBEDFeature.java b/src/main/java/htsjdk/tribble/bed/FullBEDFeature.java index eab568837..975777dc2 100644 --- a/src/main/java/htsjdk/tribble/bed/FullBEDFeature.java +++ b/src/main/java/htsjdk/tribble/bed/FullBEDFeature.java @@ -39,6 +39,7 @@ public FullBEDFeature(String chr, int start, int end) { } + @Override public java.util.List getExons() { return exons; } diff --git a/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java b/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java index 77a030fa9..4a6416867 100644 --- a/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java +++ b/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java @@ -56,14 +56,17 @@ public String getContig() { return chr; } + @Override public int getStart() { return start; } + @Override public int getEnd() { return end; } + @Override public Strand getStrand() { return strand; } @@ -84,6 +87,7 @@ public void setEnd(int end) { this.end = end; } + @Override public String getType() { return type; } @@ -92,6 +96,7 @@ public void setType(String type) { this.type = type; } + @Override public Color getColor() { return color; } @@ -100,6 +105,7 @@ public void setColor(Color color) { this.color = color; } + @Override public String getDescription() { return description; } @@ -108,6 +114,7 @@ public void setDescription(String description) { this.description = description; } + @Override public String getName() { return name; } @@ -116,6 +123,7 @@ public void setName(String name) { this.name = name; } + @Override public float getScore() { return score; } @@ -124,6 +132,7 @@ public void setScore(float score) { this.score = score; } + @Override public String getLink() { return link; } @@ -134,6 +143,7 @@ public void setLink(String link) { final static List emptyExonList = new ArrayList(); + @Override public java.util.List getExons() { return emptyExonList; } diff --git a/src/main/java/htsjdk/tribble/example/CountRecords.java b/src/main/java/htsjdk/tribble/example/CountRecords.java index 230c1bf3d..3bb8e4160 100644 --- a/src/main/java/htsjdk/tribble/example/CountRecords.java +++ b/src/main/java/htsjdk/tribble/example/CountRecords.java @@ -29,7 +29,6 @@ import htsjdk.tribble.FeatureCodec; import htsjdk.tribble.Tribble; import htsjdk.tribble.bed.BEDCodec; -import htsjdk.tribble.gelitext.GeliTextCodec; import htsjdk.tribble.index.Index; import htsjdk.tribble.index.IndexFactory; import htsjdk.tribble.index.linear.LinearIndex; @@ -193,8 +192,6 @@ public static FeatureCodec getFeatureCodec(File featureFile) { // return new VCFCodec(); if (featureFile.getName().endsWith(".bed") || featureFile.getName().endsWith(".BED") ) return new BEDCodec(); - if (featureFile.getName().endsWith(".geli.calls") || featureFile.getName().endsWith(".geli") ) - return new GeliTextCodec(); throw new IllegalArgumentException("Unable to determine correct file type based on the file name, for file -> " + featureFile); } } diff --git a/src/main/java/htsjdk/tribble/gelitext/DiploidGenotype.java b/src/main/java/htsjdk/tribble/gelitext/DiploidGenotype.java deleted file mode 100644 index f53343270..000000000 --- a/src/main/java/htsjdk/tribble/gelitext/DiploidGenotype.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2013 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.tribble.gelitext; - - -/** - * Class DiploidGenotype - * - * Enum describing all possible combinations of diploid genotype variations; - * AA, AC, etc. - * - * @author aaron - */ -@Deprecated -public enum DiploidGenotype { - AA, AC, AG, AT, CC, CG, CT, GG, GT, TT; - - public static DiploidGenotype toDiploidGenotype(String genotype) { - if (genotype.length() != 2) - throw new DiploidGenotypeException("Genotype string for conversion should be of length 2, we were passed = " + genotype); - genotype = genotype.toUpperCase(); - for (DiploidGenotype g: DiploidGenotype.values()) - if (g.toString().equals(genotype)) return g; - throw new DiploidGenotypeException("Unable to find genotype matching " + genotype); - } - - public boolean isHet() { - return toString().toCharArray()[0] != toString().toCharArray()[1]; - } - - public boolean containsBase(char base) { - return (toString().charAt(0) == base || toString().charAt(1) == base); - } -} - -@Deprecated -class DiploidGenotypeException extends RuntimeException { - DiploidGenotypeException(String s) { - super(s); - } - - DiploidGenotypeException(String s, Throwable throwable) { - super(s, throwable); - } -} \ No newline at end of file diff --git a/src/main/java/htsjdk/tribble/gelitext/GeliTextCodec.java b/src/main/java/htsjdk/tribble/gelitext/GeliTextCodec.java deleted file mode 100644 index 394b5dc78..000000000 --- a/src/main/java/htsjdk/tribble/gelitext/GeliTextCodec.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2013 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.tribble.gelitext; - -import htsjdk.samtools.util.CollectionUtil; -import htsjdk.tribble.AsciiFeatureCodec; -import htsjdk.tribble.Feature; -import htsjdk.tribble.exception.CodecLineParsingException; -import htsjdk.tribble.readers.LineIterator; - -import java.util.Arrays; - - -/** - *

    - * A codec for parsing geli text files, which is the text version of the geli binary format. - *

    - *

    - * GELI text has the following tab-seperated fields: - * contig the contig (string) - * position the position on the contig (long) - * refBase the reference base (char) - * depthOfCoverage the depth of coverage at this position (int) - * maximumMappingQual the maximum mapping quality of a read at this position (int) - * genotype the called genotype (string) - * LODBestToReference the LOD score of the best to the reference (double) - * LODBestToNext the LOD score of the best to the next best genotype (double) - * likelihoods the array of all genotype likelihoods, in ordinal ordering (array of 10 doubles, in ordinal order) - * - * @author aaron - * @deprecated This is deprecated and unsupported. - */ -@Deprecated -public class GeliTextCodec extends AsciiFeatureCodec { - public GeliTextCodec() { - super(GeliTextFeature.class); - } - - public Feature decodeLoc(final String line) { - return decode(line); - } - - @Override - public GeliTextFeature decode(final String line) { - // clean out header lines and comments - if (line.startsWith("#") || line.startsWith("@")) - return null; - - // parse into tokens - final String[] parts = line.trim().split("\\s+"); - return decode(parts); - } - - @Override - public boolean canDecode(String path){ - return path.toLowerCase().endsWith(".geli.calls") || path.toLowerCase().endsWith(".geli"); - } - - @Override - public Object readActualHeader(LineIterator reader) { - return null; - } - - public GeliTextFeature decode(final String[] tokens) { - try { - // check that we got the correct number of tokens in the split - if (tokens.length != 18) - throw new CodecLineParsingException("Invalid GeliTextFeature row found -- incorrect element count. Expected 18, got " + tokens.length + " line = " + CollectionUtil.join(Arrays.asList(tokens), " ")); - - // UPPER case and sort - final char[] x = tokens[5].toUpperCase().toCharArray(); - Arrays.sort(x); - final String bestGenotype = new String(x); - - final double[] genotypeLikelihoods = new double[10]; - for (int pieceIndex = 8, offset = 0; pieceIndex < 18; pieceIndex++, offset++) { - genotypeLikelihoods[offset] = Double.valueOf(tokens[pieceIndex]); - } - return new GeliTextFeature(tokens[0], - Long.valueOf(tokens[1]), - Character.toUpperCase(tokens[2].charAt(0)), - Integer.valueOf(tokens[3]), - Integer.valueOf(tokens[4]), - DiploidGenotype.toDiploidGenotype(bestGenotype), - Double.valueOf(tokens[6]), - Double.valueOf(tokens[7]), - genotypeLikelihoods); - } catch (CodecLineParsingException e) { - e.printStackTrace(); - throw new RuntimeException("Unable to parse line " + CollectionUtil.join(Arrays.asList(tokens), " "), e); - } catch (NumberFormatException e) { - e.printStackTrace(); - throw new RuntimeException("Unable to parse line " + CollectionUtil.join(Arrays.asList(tokens), " "), e); - } - } -} diff --git a/src/main/java/htsjdk/tribble/gelitext/GeliTextFeature.java b/src/main/java/htsjdk/tribble/gelitext/GeliTextFeature.java deleted file mode 100644 index baad1caab..000000000 --- a/src/main/java/htsjdk/tribble/gelitext/GeliTextFeature.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2013 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.tribble.gelitext; - -import htsjdk.tribble.Feature; - -import java.util.Arrays; - - -/** - *

    - * Class GeliTextFeature - *

    - * This is a feature for the Geli text object, which is the text version of the Geli binary genotyping format. - * - * @author aaron - * @deprecated this is deprecated and no longer supported - */ -@Deprecated -public class GeliTextFeature implements Feature { - - private final String contig; // the contig name - private final long position; // the position on the contig - private final char refBase; // the reference base - private final int depthOfCoverage; // the depth of coverage at this position - private final int maximumMappingQual; // the maximum mapping quality of a read at this position - private final DiploidGenotype genotype; // the called genotype - private final double LODBestToReference; // the LOD score of the best to the reference - private final double LODBestToNext; // the LOD score of the best to the next best genotype - private final double likelihoods[]; // the array of all genotype likelihoods, in ordinal order - - /** - * Create a geli text feature, given: - * - * @param contig the contig - * @param position the position on the contig - * @param refBase the reference base - * @param depthOfCoverage the depth of coverage at this position - * @param maximumMappingQual the maximum mapping quality of a read at this position - * @param genotype the called genotype - * @param LODBestToReference the LOD score of the best to the reference - * @param LODBestToNext the LOD score of the best to the next best genotype - * @param likelihoods the array of all genotype likelihoods, in ordinal ordering - */ - public GeliTextFeature(String contig, - long position, - char refBase, - int depthOfCoverage, - int maximumMappingQual, - DiploidGenotype genotype, - double LODBestToReference, - double LODBestToNext, - double[] likelihoods) { - this.contig = contig; - this.position = position; - this.refBase = refBase; - this.depthOfCoverage = depthOfCoverage; - this.maximumMappingQual = maximumMappingQual; - this.genotype = genotype; - this.LODBestToReference = LODBestToReference; - this.LODBestToNext = LODBestToNext; - this.likelihoods = likelihoods; - } - - @Override - public String getContig() { - return this.contig; - } - - /** Return the start position in 1-based coordinates (first base is 1) */ - public int getStart() { - return (int) this.position; - } - - /** - * Return the end position following 1-based fully closed conventions. The length of a feature is - * end - start + 1; - */ - public int getEnd() { - return (int) this.position; - } - - public char getRefBase() { - return refBase; - } - - public int getDepthOfCoverage() { - return depthOfCoverage; - } - - public int getMaximumMappingQual() { - return maximumMappingQual; - } - - public DiploidGenotype getGenotype() { - return genotype; - } - - public double getLODBestToNext() { - return LODBestToNext; - } - - public double getLODBestToReference() { - return LODBestToReference; - } - - public double[] getLikelihoods() { - return likelihoods; - } - - private static double Epsilon = 0.0001; - public boolean equals(Object o) { - if (!(o instanceof GeliTextFeature)) return false; - GeliTextFeature other = (GeliTextFeature)o; - if (!Arrays.equals(likelihoods,other.likelihoods)) return false; - if (!contig.equals(other.contig)) return false; - if (!(position == other.position)) return false; - if (!(refBase == other.refBase)) return false; - if (!(depthOfCoverage == other.depthOfCoverage)) return false; - if (!(maximumMappingQual == other.maximumMappingQual)) return false; - if (!(genotype == other.genotype)) return false; - if (!(Math.abs(LODBestToReference - other.LODBestToReference) < Epsilon)) return false; - if (!(Math.abs(LODBestToNext - other.LODBestToNext) < Epsilon)) return false; - return true; - } - -} diff --git a/src/main/java/htsjdk/tribble/index/AbstractIndex.java b/src/main/java/htsjdk/tribble/index/AbstractIndex.java index 47e31ccef..ac90e5d2b 100644 --- a/src/main/java/htsjdk/tribble/index/AbstractIndex.java +++ b/src/main/java/htsjdk/tribble/index/AbstractIndex.java @@ -18,6 +18,9 @@ package htsjdk.tribble.index; +import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.Log; +import htsjdk.samtools.util.RuntimeIOException; import htsjdk.tribble.Tribble; import htsjdk.tribble.TribbleException; import htsjdk.tribble.util.LittleEndianInputStream; @@ -25,8 +28,9 @@ import java.io.BufferedOutputStream; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashMap; @@ -67,11 +71,12 @@ private final static long NO_TS = -1L; protected int version; // Our version value - protected File indexedFile = null; // The file we've created this index for + protected Path indexedPath = null; // The file we've created this index for protected long indexedFileSize = NO_FILE_SIZE; // The size of the indexed file protected long indexedFileTS = NO_TS; // The timestamp protected String indexedFileMD5 = NO_MD5; // The MD5 value, generally not filled in (expensive to calc) protected int flags; + protected final Log logger = Log.getInstance(this.getClass()); public boolean hasFileSize() { return indexedFileSize != NO_FILE_SIZE; @@ -101,6 +106,7 @@ public boolean hasMD5() { * @param obj * @return true if this and obj are 'effectively' equivalent data structures. */ + @Override public boolean equalsIgnoreProperties(final Object obj) { if (this == obj) return true; if (!(obj instanceof AbstractIndex)) { @@ -115,8 +121,8 @@ public boolean equalsIgnoreProperties(final Object obj) { return false; } - if (indexedFile != other.indexedFile && (indexedFile == null || !indexedFile.equals(other.indexedFile))) { - System.err.printf("equals indexedFile: this %s != other %s%n", indexedFile, other.indexedFile); + if (indexedPath != other.indexedPath && (indexedPath == null || !indexedPath.equals(other.indexedPath))) { + System.err.printf("equals indexedPath: this %s != other %s%n", indexedPath, other.indexedPath); return false; } @@ -158,18 +164,27 @@ public AbstractIndex() { * @param featureFile the feature file to create an index from */ public AbstractIndex(final String featureFile) { - this(new File(featureFile)); + this(); + try { + this.indexedPath = IOUtil.getPath(featureFile).toAbsolutePath(); + } catch (IOException e) { + throw new IllegalArgumentException("IO error: " + e.getMessage(), e); + } } public AbstractIndex(final File featureFile) { + this(featureFile.toPath()); + } + + public AbstractIndex(final Path featurePath) { this(); - this.indexedFile = featureFile; + this.indexedPath = featurePath.toAbsolutePath(); } public AbstractIndex(final AbstractIndex parent) { this(); this.version = parent.version; - this.indexedFile = parent.indexedFile; + this.indexedPath = parent.indexedPath; this.indexedFileSize = parent.indexedFileSize; this.indexedFileTS = parent.indexedFileTS; this.indexedFileMD5 = parent.indexedFileMD5; @@ -194,12 +209,23 @@ protected void validateIndexHeader(final int indexType, final LittleEndianInputS * * @return true if we're up to date, false otherwise */ + @Override public boolean isCurrentVersion() { return version == VERSION; } + /** + * Gets the indexed file. + * @throws UnsupportedOperationException if the path cannot be represented as a file. + * @deprecated on 03/2017. Use {@link #getIndexedPath()} instead. + */ + @Deprecated public File getIndexedFile() { - return indexedFile; + return getIndexedPath().toFile(); + } + + public Path getIndexedPath() { + return indexedPath; } public long getIndexedFileSize() { @@ -226,15 +252,20 @@ public void setMD5(final String md5) { this.indexedFileMD5 = md5; } + @Override public boolean containsChromosome(final String chr) { return chrIndices.containsKey(chr); } public void finalizeIndex() { - // these two functions must be called now because the file may be being written during on the fly indexing - if (indexedFile != null) { - this.indexedFileSize = indexedFile.length(); - this.indexedFileTS = indexedFile.lastModified(); + try { + // these two functions must be called now because the file may be being written during on the fly indexing + if (indexedPath != null) { + this.indexedFileSize = Files.size(indexedPath); + this.indexedFileTS = Files.getLastModifiedTime(indexedPath).toMillis(); + } + } catch (IOException e) { + throw new RuntimeIOException(e); } } @@ -248,7 +279,7 @@ private void writeHeader(final LittleEndianOutputStream dos) throws IOException dos.writeInt(MAGIC_NUMBER); dos.writeInt(getType()); dos.writeInt(version); - dos.writeString(indexedFile.getAbsolutePath()); + dos.writeString(indexedPath.toUri().toString()); dos.writeLong(indexedFileSize); dos.writeLong(indexedFileTS); dos.writeString(indexedFileMD5); @@ -271,7 +302,7 @@ private void writeHeader(final LittleEndianOutputStream dos) throws IOException private void readHeader(final LittleEndianInputStream dis) throws IOException { version = dis.readInt(); - indexedFile = new File(dis.readString()); + indexedPath = IOUtil.getPath(dis.readString()); indexedFileSize = dis.readLong(); indexedFileTS = dis.readLong(); indexedFileMD5 = dis.readString(); @@ -306,10 +337,12 @@ private void readSequenceDictionary(final LittleEndianInputStream dis) throws IO } } + @Override public List getSequenceNames() { return new ArrayList(chrIndices.keySet()); } + @Override public List getBlocks(final String chr, final int start, final int end) { return getChrIndex(chr).getBlocks(start, end); } @@ -332,6 +365,7 @@ private final ChrIndex getChrIndex(final String chr) { } } + @Override public void write(final LittleEndianOutputStream stream) throws IOException { writeHeader(stream); @@ -343,18 +377,21 @@ public void write(final LittleEndianOutputStream stream) throws IOException { } @Override - public void write(final File idxFile) throws IOException { - try(final LittleEndianOutputStream idxStream = new LittleEndianOutputStream(new BufferedOutputStream(new FileOutputStream(idxFile)))) { + public void write(final Path idxPath) throws IOException { + try(final LittleEndianOutputStream idxStream = new LittleEndianOutputStream(new BufferedOutputStream(Files.newOutputStream(idxPath)))) { write(idxStream); } } @Override - public void writeBasedOnFeatureFile(final File featureFile) throws IOException { - if (!featureFile.isFile()) return; - write(Tribble.indexFile(featureFile)); + public void writeBasedOnFeaturePath(final Path featurePath) throws IOException { + if (!Files.isRegularFile(featurePath)) { + throw new IOException("Cannot write based on a non-regular file: " + featurePath.toUri()); + } + write(Tribble.indexPath(featurePath)); } + public void read(final LittleEndianInputStream dis) throws IOException { try { readHeader(dis); @@ -380,7 +417,7 @@ public void read(final LittleEndianInputStream dis) throws IOException { } protected void printIndexInfo() { - System.out.println(String.format("Index for %s with %d indices", indexedFile, chrIndices.size())); + System.out.println(String.format("Index for %s with %d indices", indexedPath, chrIndices.size())); final BlockStats stats = getBlockStats(true); System.out.println(String.format(" total blocks %d", stats.total)); System.out.println(String.format(" total empty blocks %d", stats.empty)); @@ -418,10 +455,12 @@ protected String statsSummary() { return String.format("%12d blocks (%12d empty (%.2f%%))", stats.total, stats.empty, (100.0 * stats.empty) / stats.total); } + @Override public void addProperty(final String key, final String value) { properties.put(key, value); } + @Override public void addProperties(final Map properties) { this.properties.putAll(properties); } @@ -431,6 +470,7 @@ public void addProperties(final Map properties) { * * @return the mapping of values as an unmodifiable map */ + @Override public Map getProperties() { return Collections.unmodifiableMap(properties); } diff --git a/src/main/java/htsjdk/tribble/index/DynamicIndexCreator.java b/src/main/java/htsjdk/tribble/index/DynamicIndexCreator.java index 52153a51f..17274ace6 100644 --- a/src/main/java/htsjdk/tribble/index/DynamicIndexCreator.java +++ b/src/main/java/htsjdk/tribble/index/DynamicIndexCreator.java @@ -31,6 +31,7 @@ import htsjdk.tribble.util.MathUtils; import java.io.File; +import java.nio.file.Path; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; @@ -56,15 +57,18 @@ MathUtils.RunningStat stats = new MathUtils.RunningStat(); long basesSeen = 0; Feature lastFeature = null; - File inputFile; - public DynamicIndexCreator(final File inputFile, final IndexFactory.IndexBalanceApproach iba) { + public DynamicIndexCreator(final Path inputPath, final IndexFactory.IndexBalanceApproach iba) { this.iba = iba; // get a list of index creators - this.inputFile = inputFile; - creators = getIndexCreators(inputFile,iba); + creators = getIndexCreators(inputPath, iba); + } + + public DynamicIndexCreator(final File inputFile, final IndexFactory.IndexBalanceApproach iba) { + this(inputFile.toPath(), iba); } + @Override public Index finalizeIndex(final long finalFilePosition) { // finalize all of the indexes // return the score of the indexes we've generated @@ -89,19 +93,19 @@ public Index finalizeIndex(final long finalFilePosition) { /** * create a list of index creators (initialized) representing the common index types we'd suspect they'd like to use - * @param inputFile the input file to use to create the indexes + * @param inputPath the input path to use to create the indexes * @return a map of index type to the best index for that balancing approach */ - private Map getIndexCreators(final File inputFile, final IndexFactory.IndexBalanceApproach iba) { + private Map getIndexCreators(final Path inputPath, final IndexFactory.IndexBalanceApproach iba) { final Map creators = new HashMap(); if (iba == IndexFactory.IndexBalanceApproach.FOR_SIZE) { // add a linear index with the default bin size - final LinearIndexCreator linearNormal = new LinearIndexCreator(inputFile, LinearIndexCreator.DEFAULT_BIN_WIDTH); + final LinearIndexCreator linearNormal = new LinearIndexCreator(inputPath, LinearIndexCreator.DEFAULT_BIN_WIDTH); creators.put(IndexFactory.IndexType.LINEAR,linearNormal); // create a tree index with the default size - final IntervalIndexCreator treeNormal = new IntervalIndexCreator(inputFile, IntervalIndexCreator.DEFAULT_FEATURE_COUNT); + final IntervalIndexCreator treeNormal = new IntervalIndexCreator(inputPath, IntervalIndexCreator.DEFAULT_FEATURE_COUNT); creators.put(IndexFactory.IndexType.INTERVAL_TREE,treeNormal); } @@ -110,12 +114,12 @@ public Index finalizeIndex(final long finalFilePosition) { if (iba == IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME) { // create a linear index with a small bin size final LinearIndexCreator linearSmallBin = - new LinearIndexCreator(inputFile, Math.max(200, LinearIndexCreator.DEFAULT_BIN_WIDTH / 4)); + new LinearIndexCreator(inputPath, Math.max(200, LinearIndexCreator.DEFAULT_BIN_WIDTH / 4)); creators.put(IndexFactory.IndexType.LINEAR,linearSmallBin); // create a tree index with a small index size final IntervalIndexCreator treeSmallBin = - new IntervalIndexCreator(inputFile, Math.max(20, IntervalIndexCreator.DEFAULT_FEATURE_COUNT / 8)); + new IntervalIndexCreator(inputPath, Math.max(20, IntervalIndexCreator.DEFAULT_FEATURE_COUNT / 8)); creators.put(IndexFactory.IndexType.INTERVAL_TREE,treeSmallBin); } @@ -123,6 +127,7 @@ public Index finalizeIndex(final long finalFilePosition) { } + @Override public void addFeature(final Feature f, final long filePosition) { // protected static Map createIndex(FileBasedFeatureIterator iterator, Map creators, IndexBalanceApproach iba) { // feed each feature to the indexes we've created diff --git a/src/main/java/htsjdk/tribble/index/Index.java b/src/main/java/htsjdk/tribble/index/Index.java index ca6cc60d3..51982c6d2 100644 --- a/src/main/java/htsjdk/tribble/index/Index.java +++ b/src/main/java/htsjdk/tribble/index/Index.java @@ -27,6 +27,7 @@ import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.List; import java.util.Map; @@ -72,17 +73,43 @@ /** * Writes the index into a file. * + * Default implementation delegates to {@link #write(Path)} + * * @param idxFile Where to write the index. * @throws IOException if the index is unable to write to the specified file */ - public void write(final File idxFile) throws IOException; + public default void write(final File idxFile) throws IOException { + write(idxFile.toPath()); + } + + /** + * Writes the index into a path. + * + * @param indexPath Where to write the index. + * @throws IOException if the index is unable to write to the specified path. + */ + public void write(final Path indexPath) throws IOException; /** * Write an appropriately named and located Index file based on the name and location of the featureFile. - * If featureFile is not a normal file, the index will silently not be written. + * + * Default implementation delegates to {@link #writeBasedOnFeaturePath(Path)} + * * @param featureFile + * @throws IOException if featureFile is not a normal file. + */ + public default void writeBasedOnFeatureFile(File featureFile) throws IOException { + writeBasedOnFeaturePath(featureFile.toPath()); + } + + /** + * Write an appropriately named and located Index file based on the name and location of the featureFile. + * If featureFile is not a normal file, the index will silently not be written. + * + * @param featurePath + * @throws IOException if featureFile is not a normal file. */ - public void writeBasedOnFeatureFile(File featureFile) throws IOException; + public void writeBasedOnFeaturePath(Path featurePath) throws IOException; /** * @return get the list of properties for this index. Returns null if no properties. diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java index 4e23e934d..928236620 100644 --- a/src/main/java/htsjdk/tribble/index/IndexFactory.java +++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java @@ -51,9 +51,11 @@ import java.io.IOException; import java.io.InputStream; import java.lang.reflect.Constructor; +import java.nio.channels.SeekableByteChannel; import java.util.HashMap; import java.util.Iterator; import java.util.Map; +import java.util.function.Function; import java.util.zip.GZIPInputStream; /** @@ -90,9 +92,7 @@ public int getDefaultBinSize() { public IndexCreator getIndexCreator() { try { return indexCreatorClass.newInstance(); - } catch ( final InstantiationException e ) { - throw new TribbleException("Couldn't make index creator in " + this, e); - } catch ( final IllegalAccessException e ) { + } catch ( final InstantiationException | IllegalAccessException e ) { throw new TribbleException("Couldn't make index creator in " + this, e); } } @@ -162,11 +162,25 @@ public static IndexType getIndexType(final BufferedInputStream is) { * @param indexFile from which to load the index */ public static Index loadIndex(final String indexFile) { + return loadIndex(indexFile, null); + } + + /** + * Load in index from the specified file. The type of index (LinearIndex or IntervalTreeIndex) is determined + * at run time by reading the type flag in the file. + * + * @param indexFile from which to load the index + * @param indexWrapper a wrapper to apply to the raw byte stream of the index file, only applied to uri's loaded as + * {@link java.nio.file.Path} + */ + public static Index loadIndex(final String indexFile, Function indexWrapper) { // Must be buffered, because getIndexType uses mark and reset - try (BufferedInputStream bufferedInputStream = new BufferedInputStream(indexFileInputStream(indexFile), Defaults.NON_ZERO_BUFFER_SIZE)){ + try (BufferedInputStream bufferedInputStream = new BufferedInputStream(indexFileInputStream(indexFile, indexWrapper), Defaults.NON_ZERO_BUFFER_SIZE)) { final Class indexClass = IndexType.getIndexType(bufferedInputStream).getIndexType(); final Constructor ctor = indexClass.getConstructor(InputStream.class); return ctor.newInstance(bufferedInputStream); + } catch (final TribbleException ex) { + throw ex; } catch (final IOException ex) { throw new TribbleException.UnableToReadIndexFile("Unable to read index file", indexFile, ex); } catch (final Exception ex) { @@ -174,8 +188,8 @@ public static Index loadIndex(final String indexFile) { } } - private static InputStream indexFileInputStream(final String indexFile) throws IOException { - final InputStream inputStreamInitial = ParsingUtils.openInputStream(indexFile); + private static InputStream indexFileInputStream(final String indexFile, Function indexWrapper) throws IOException { + final InputStream inputStreamInitial = ParsingUtils.openInputStream(indexFile, indexWrapper); if (indexFile.endsWith(".gz")) { return new GZIPInputStream(inputStreamInitial); } @@ -461,10 +475,12 @@ private PositionalBufferedStream initStream(final File inputFile, final long ski } } + @Override public boolean hasNext() { return nextFeature != null; } + @Override public Feature next() { final Feature ret = nextFeature; readNextFeature(); @@ -474,6 +490,7 @@ public Feature next() { /** * @throws UnsupportedOperationException */ + @Override public void remove() { throw new UnsupportedOperationException("We cannot remove"); } diff --git a/src/main/java/htsjdk/tribble/index/interval/Interval.java b/src/main/java/htsjdk/tribble/index/interval/Interval.java index 9d4787774..6c0e648ee 100644 --- a/src/main/java/htsjdk/tribble/index/interval/Interval.java +++ b/src/main/java/htsjdk/tribble/index/interval/Interval.java @@ -76,6 +76,7 @@ public int hashCode() { } + @Override public int compareTo(Object o) { Interval other = (Interval) o; if (this.start < other.start) diff --git a/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java b/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java index e826edaa7..58e2f87ee 100644 --- a/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java +++ b/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java @@ -25,6 +25,7 @@ import htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex; import java.io.File; +import java.nio.file.Path; import java.util.ArrayList; import java.util.LinkedList; @@ -51,17 +52,26 @@ private final ArrayList intervals = new ArrayList(); - File inputFile; + Path inputPath; - public IntervalIndexCreator(final File inputFile, final int featuresPerInterval) { - this.inputFile = inputFile; + public IntervalIndexCreator(final Path inputPath, final int featuresPerInterval) { + this.inputPath = inputPath; this.featuresPerInterval = featuresPerInterval; } + public IntervalIndexCreator(final File inputFile, final int featuresPerInterval) { + this(inputFile.toPath(), featuresPerInterval); + } + public IntervalIndexCreator(final File inputFile) { - this(inputFile, DEFAULT_FEATURE_COUNT); + this(inputFile.toPath()); + } + + public IntervalIndexCreator(final Path inputPath) { + this(inputPath, DEFAULT_FEATURE_COUNT); } + @Override public void addFeature(final Feature feature, final long filePosition) { // if we don't have a chrIndex yet, or if the last one was for the previous contig, create a new one if (chrList.isEmpty() || !chrList.getLast().getName().equals(feature.getContig())) { @@ -105,8 +115,9 @@ private void addIntervalsToLastChr(final long currentPos) { * @param finalFilePosition the final file position, for indexes that have to close out with the final position * @return a Tree Index */ + @Override public Index finalizeIndex(final long finalFilePosition) { - final IntervalTreeIndex featureIndex = new IntervalTreeIndex(inputFile.getAbsolutePath()); + final IntervalTreeIndex featureIndex = new IntervalTreeIndex(inputPath); // dump the remaining bins to the index addIntervalsToLastChr(finalFilePosition); featureIndex.setChrIndex(chrList); diff --git a/src/main/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java b/src/main/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java index 055888ecc..c4b2865dc 100644 --- a/src/main/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java +++ b/src/main/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -57,6 +58,15 @@ public IntervalTreeIndex(final InputStream inputStream) throws IOException { * * @param featureFile File which we are indexing */ + public IntervalTreeIndex(final Path featureFile) { + super(featureFile); + } + + /** + * Prepare to build an index. + * + * @param featureFile File which we are indexing + */ public IntervalTreeIndex(final String featureFile) { super(featureFile); } @@ -119,6 +129,7 @@ public ChrIndex(final String name) { tree = new IntervalTree(); } + @Override public String getName() { return name; } @@ -127,11 +138,13 @@ public void insert(final Interval iv) { tree.insert(iv); } + @Override public List getBlocks() { return null; } + @Override public List getBlocks(final int start, final int end) { // Get intervals and build blocks list @@ -148,6 +161,7 @@ public void insert(final Interval iv) { // Sort blocks by start position Arrays.sort(blocks, new Comparator() { + @Override public int compare(final Block b1, final Block b2) { // this is a little cryptic because the normal method (b1.getStartPosition() - b2.getStartPosition()) wraps in int space and we incorrectly sort the blocks in extreme cases return b1.getStartPosition() - b2.getStartPosition() < 1 ? -1 : (b1.getStartPosition() - b2.getStartPosition() > 1 ? 1 : 0); @@ -175,6 +189,7 @@ public void printTree() { System.out.println(tree.toString()); } + @Override public void write(final LittleEndianOutputStream dos) throws IOException { dos.writeString(name); @@ -190,6 +205,7 @@ public void write(final LittleEndianOutputStream dos) throws IOException { } + @Override public void read(final LittleEndianInputStream dis) throws IOException { tree = new IntervalTree(); diff --git a/src/main/java/htsjdk/tribble/index/linear/LinearIndex.java b/src/main/java/htsjdk/tribble/index/linear/LinearIndex.java index 4f4d9100e..3d7905af1 100644 --- a/src/main/java/htsjdk/tribble/index/linear/LinearIndex.java +++ b/src/main/java/htsjdk/tribble/index/linear/LinearIndex.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -71,12 +72,21 @@ * @param indices * @param featureFile */ - public LinearIndex(final List indices, final File featureFile) { - super(featureFile.getAbsolutePath()); + public LinearIndex(final List indices, final Path featureFile) { + super(featureFile); for (final ChrIndex index : indices) chrIndices.put(index.getName(), index); } + /** + * Initialize using the specified {@code indices} + * @param indices + * @param featureFile + */ + public LinearIndex(final List indices, final File featureFile) { + this(indices, featureFile.toPath()); + } + private LinearIndex(final LinearIndex parent, final List indices) { super(parent); for (final ChrIndex index : indices) @@ -92,6 +102,14 @@ public LinearIndex(final String featureFile) { } /** + * Initialize with default parameters + * @param featurePath Path for which this is an index + */ + public LinearIndex(final Path featurePath) { + super(featurePath); + } + + /** * Load from file. * @param inputStream This method assumes that the input stream is already buffered as appropriate. */ @@ -101,6 +119,7 @@ public LinearIndex(final InputStream inputStream) throws IOException { read(dis); } + @Override public boolean isCurrentVersion() { if (!super.isCurrentVersion()) return false; @@ -117,6 +136,7 @@ protected int getType() { return INDEX_TYPE; } + @Override public List getSequenceNames() { return (chrIndices == null ? Collections.EMPTY_LIST : Collections.unmodifiableList(new ArrayList(chrIndices.keySet()))); @@ -173,6 +193,7 @@ public ChrIndex() { this.nFeatures = 0; } + @Override public String getName() { return name; } @@ -186,10 +207,12 @@ public int getNBlocks() { return blocks.size(); } + @Override public List getBlocks() { return blocks; } + @Override public List getBlocks(final int start, final int end) { if (blocks.isEmpty()) { return Collections.emptyList(); @@ -231,6 +254,7 @@ public void incrementFeatureCount() { this.nFeatures++; } + @Override public void write(final LittleEndianOutputStream dos) throws IOException { // Chr name, binSize, # bins, longest feature @@ -253,6 +277,7 @@ public void write(final LittleEndianOutputStream dos) throws IOException { dos.writeLong(pos + size); } + @Override public void read(final LittleEndianInputStream dis) throws IOException { name = dis.readString(); binWidth = dis.readInt(); diff --git a/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java b/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java index 1158fdfd3..9109705d2 100644 --- a/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java +++ b/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java @@ -29,6 +29,7 @@ import htsjdk.tribble.index.TribbleIndexCreator; import java.io.File; +import java.nio.file.Path; import java.util.ArrayList; import java.util.LinkedList; @@ -43,20 +44,28 @@ private int binWidth = DEFAULT_BIN_WIDTH; // the input file - private final File inputFile; + private final Path inputFile; private final LinkedList chrList = new LinkedList(); private int longestFeature= 0; private final ArrayList blocks = new ArrayList(); - public LinearIndexCreator(final File inputFile, final int binSize) { - this.inputFile = inputFile; + public LinearIndexCreator(final Path inputPath, final int binSize) { + this.inputFile = inputPath; binWidth = binSize; } + public LinearIndexCreator(final File inputFile, final int binSize) { + this(inputFile.toPath(), binSize); + } + public LinearIndexCreator(final File inputFile) { - this(inputFile, DEFAULT_BIN_WIDTH); + this(inputFile.toPath()); + } + + public LinearIndexCreator(final Path inputPath) { + this(inputPath, DEFAULT_BIN_WIDTH); } /** @@ -64,6 +73,7 @@ public LinearIndexCreator(final File inputFile) { * @param feature the feature, from which we use the contig, start, and stop * @param filePosition the position of the file at the BEGINNING of the current feature */ + @Override public void addFeature(final Feature feature, final long filePosition) { // fi we don't have a chrIndex yet, or if the last one was for the previous contig, create a new one if (chrList.isEmpty() || !chrList.getLast().getName().equals(feature.getContig())) { @@ -97,6 +107,7 @@ public void addFeature(final Feature feature, final long filePosition) { * @param finalFilePosition the final file position, for indexes that have to close out with the final position * @return an Index object */ + @Override public Index finalizeIndex(final long finalFilePosition) { if (finalFilePosition == 0) throw new IllegalArgumentException("finalFilePosition != 0, -> " + finalFilePosition); diff --git a/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java b/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java index 044cefe61..d7cc31cef 100644 --- a/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java +++ b/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java @@ -30,7 +30,10 @@ import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.BlockCompressedOutputStream; import htsjdk.samtools.util.CloserUtil; +import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.Log; import htsjdk.samtools.util.StringUtil; +import htsjdk.tribble.Tribble; import htsjdk.tribble.TribbleException; import htsjdk.tribble.index.Block; import htsjdk.tribble.index.Index; @@ -44,6 +47,8 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.*; /** @@ -95,6 +100,13 @@ public TabixIndex(final File tabixFile) throws IOException { this(new BlockCompressedInputStream(tabixFile), true); } + /** + * Convenient ctor that opens the path, wraps with with BGZF reader, and closes after reading index. + */ + public TabixIndex(final Path tabixPath) throws IOException { + this(new BlockCompressedInputStream(Files.newInputStream(tabixPath)), true); + } + private TabixIndex(final InputStream inputStream, final boolean closeInputStream) throws IOException { final LittleEndianInputStream dis = new LittleEndianInputStream(inputStream); if (dis.readInt() != MAGIC_NUMBER) { @@ -199,24 +211,27 @@ public TabixFormat getFormatSpec() { /** * Writes the index with BGZF. * - * @param tabixFile Where to write the index. + * @param tabixPath Where to write the index. */ @Override - public void write(final File tabixFile) throws IOException { - try(final LittleEndianOutputStream los = new LittleEndianOutputStream(new BlockCompressedOutputStream(tabixFile))) { + public void write(final Path tabixPath) throws IOException { + try(final LittleEndianOutputStream los = new LittleEndianOutputStream(new BlockCompressedOutputStream(Files.newOutputStream(tabixPath), null))) { write(los); } } /** - * Writes to a file with appropriate name and directory based on feature file. + * Writes to a path with appropriate name and directory based on feature path. * - * @param featureFile File being indexed. + * @param featurePath Path being indexed. + * @throws IOException if featureFile is not a normal file. */ @Override - public void writeBasedOnFeatureFile(final File featureFile) throws IOException { - if (!featureFile.isFile()) return; - write(new File(featureFile.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION)); + public void writeBasedOnFeaturePath(final Path featurePath) throws IOException { + if (!Files.isRegularFile(featurePath)) { + throw new IOException("Cannot write based on a non-regular file: " + featurePath.toUri()); + } + write(Tribble.tabixIndexPath(featurePath)); } /** diff --git a/src/main/java/htsjdk/tribble/readers/AsciiLineReader.java b/src/main/java/htsjdk/tribble/readers/AsciiLineReader.java index 8f06205a7..39c7d409c 100644 --- a/src/main/java/htsjdk/tribble/readers/AsciiLineReader.java +++ b/src/main/java/htsjdk/tribble/readers/AsciiLineReader.java @@ -40,8 +40,9 @@ private static final byte LINEFEED = (byte) ('\n' & 0xff); private static final byte CARRIAGE_RETURN = (byte) ('\r' & 0xff); - PositionalBufferedStream is; - char[] lineBuffer; + private final PositionalBufferedStream is; + private char[] lineBuffer; + private int lineTerminatorLength = -1; public AsciiLineReader(final InputStream is){ this(new PositionalBufferedStream(is)); @@ -57,6 +58,7 @@ public AsciiLineReader(final PositionalBufferedStream is) { /** * @return The position of the InputStream */ + @Override public long getPosition(){ if(is == null){ throw new TribbleException("getPosition() called but no default stream was provided to the class on creation"); @@ -64,6 +66,16 @@ public long getPosition(){ return is.getPosition(); } + /** Returns the length of the line terminator read after the last read line. Returns either: + * -1 if no line has been read + * 0 after the last line if the last line in the file had no CR or LF line ending + * 1 if the line ended with CR or LF + * 2 if the line ended with CR and LF + */ + public int getLineTerminatorLength() { + return this.lineTerminatorLength; + } + /** * Read a line of text. A line is considered to be terminated by any one * of a line feed ('\n'), a carriage return ('\r'), or a carriage return @@ -82,6 +94,7 @@ public final String readLine(final PositionalBufferedStream stream) throws IOExc if (b == -1) { // eof reached. Return the last line, or null if this is a new line if (linePosition > 0) { + this.lineTerminatorLength = 0; return new String(lineBuffer, 0, linePosition); } else { return null; @@ -92,6 +105,10 @@ public final String readLine(final PositionalBufferedStream stream) throws IOExc if (c == LINEFEED || c == CARRIAGE_RETURN) { if (c == CARRIAGE_RETURN && stream.peek() == LINEFEED) { stream.read(); // <= skip the trailing \n in case of \r\n termination + this.lineTerminatorLength = 2; + } + else { + this.lineTerminatorLength = 1; } return new String(lineBuffer, 0, linePosition); @@ -115,6 +132,7 @@ public final String readLine(final PositionalBufferedStream stream) throws IOExc * * @return */ + @Override public final String readLine() throws IOException{ if ( is == null ){ throw new TribbleException("readLine() called without an explicit stream argument but no default stream was provided to the class on creation"); diff --git a/src/main/java/htsjdk/tribble/readers/LineReader.java b/src/main/java/htsjdk/tribble/readers/LineReader.java index 969b6b511..2782afc96 100644 --- a/src/main/java/htsjdk/tribble/readers/LineReader.java +++ b/src/main/java/htsjdk/tribble/readers/LineReader.java @@ -39,5 +39,6 @@ public String readLine() throws IOException; + @Override public void close(); } diff --git a/src/main/java/htsjdk/tribble/readers/LineReaderUtil.java b/src/main/java/htsjdk/tribble/readers/LineReaderUtil.java deleted file mode 100644 index 83a0545e5..000000000 --- a/src/main/java/htsjdk/tribble/readers/LineReaderUtil.java +++ /dev/null @@ -1,56 +0,0 @@ -package htsjdk.tribble.readers; - -import java.io.InputStream; -import java.io.StringReader; - -/** - * A collection of factories for generating {@link LineReader}s. - * - * @Deprecated use {@link SynchronousLineReader} directly. - * @author mccowan - */ -@Deprecated -public class LineReaderUtil { - @Deprecated - public enum LineReaderOption { - ASYNCHRONOUS, //Note: the asynchronous option has no effect - this class does not provide asynchronous reading anymore - SYNCHRONOUS - } - - /** - * Creates a line reader from the given stream. - * @Deprecated use new SynchronousLineReader(stream); - */ - @Deprecated - public static LineReader fromBufferedStream(final InputStream stream) { - return new SynchronousLineReader(stream); - } - - /** - * Creates a line reader from the given string reader. - * @Deprecated use new SynchronousLineReader(stringReader); - */ - @Deprecated - public static LineReader fromStringReader(final StringReader stringReader) { - return new SynchronousLineReader(stringReader); - } - - /** - * Creates a line reader from the given string reader. - * @Deprecated Asynchronous mode is not going to be supported. Use new SynchronousLineReader(stringReader); - */ - @Deprecated - public static LineReader fromStringReader(final StringReader stringReader, final Object ignored) { - return new SynchronousLineReader(stringReader); - } - - /** - * Convenience factory for composing a LineReader from an InputStream. - * @Deprecated Asynchronous mode is not going to be supported. Use new SynchronousLineReader(bufferedStream); - */ - @Deprecated - public static LineReader fromBufferedStream(final InputStream bufferedStream, final Object ignored) { - return new SynchronousLineReader(bufferedStream); - } - -} diff --git a/src/main/java/htsjdk/tribble/readers/LongLineBufferedReader.java b/src/main/java/htsjdk/tribble/readers/LongLineBufferedReader.java index 5ca8e8d13..dbb659343 100644 --- a/src/main/java/htsjdk/tribble/readers/LongLineBufferedReader.java +++ b/src/main/java/htsjdk/tribble/readers/LongLineBufferedReader.java @@ -153,6 +153,7 @@ private void fill() throws IOException { * end of the stream has been reached * @throws IOException If an I/O error occurs */ + @Override public int read() throws IOException { synchronized (lock) { ensureOpen(); @@ -250,6 +251,7 @@ private int read1(char[] cbuf, int off, int len) throws IOException { * stream has been reached * @throws IOException If an I/O error occurs */ + @Override public int read(char cbuf[], int off, int len) throws IOException { synchronized (lock) { ensureOpen(); @@ -362,6 +364,7 @@ public String readLine() throws IOException { * @throws IllegalArgumentException If n is negative. * @throws IOException If an I/O error occurs */ + @Override public long skip(long n) throws IOException { if (n < 0L) { throw new IllegalArgumentException("skip value is negative"); @@ -401,6 +404,7 @@ public long skip(long n) throws IOException { * * @throws IOException If an I/O error occurs */ + @Override public boolean ready() throws IOException { synchronized (lock) { ensureOpen(); @@ -429,6 +433,7 @@ public boolean ready() throws IOException { /** * Tells whether this stream supports the mark() operation, which it does. */ + @Override public boolean markSupported() { return true; } @@ -448,6 +453,7 @@ public boolean markSupported() { * @throws IllegalArgumentException If readAheadLimit is < 0 * @throws IOException If an I/O error occurs */ + @Override public void mark(int readAheadLimit) throws IOException { if (readAheadLimit < 0) { throw new IllegalArgumentException("Read-ahead limit < 0"); @@ -466,6 +472,7 @@ public void mark(int readAheadLimit) throws IOException { * @throws IOException If the stream has never been marked, * or if the mark has been invalidated */ + @Override public void reset() throws IOException { synchronized (lock) { ensureOpen(); @@ -478,6 +485,7 @@ public void reset() throws IOException { } } + @Override public void close() throws IOException { synchronized (lock) { if (in == null) diff --git a/src/main/java/htsjdk/tribble/readers/PositionalBufferedStream.java b/src/main/java/htsjdk/tribble/readers/PositionalBufferedStream.java index ac642df98..4d7ae05eb 100644 --- a/src/main/java/htsjdk/tribble/readers/PositionalBufferedStream.java +++ b/src/main/java/htsjdk/tribble/readers/PositionalBufferedStream.java @@ -48,6 +48,7 @@ public PositionalBufferedStream(final InputStream is, final int bufferSize) { nextChar = nChars = 0; } + @Override public final long getPosition() { return position; } @@ -129,6 +130,7 @@ private final int fill() throws IOException { return nChars; } + @Override public final long skip(final long nBytes) throws IOException { long remainingToSkip = nBytes; @@ -156,6 +158,7 @@ public final long skip(final long nBytes) throws IOException { return actuallySkipped; } + @Override public final void close() { try { is.close(); diff --git a/src/main/java/htsjdk/tribble/readers/TabixIteratorLineReader.java b/src/main/java/htsjdk/tribble/readers/TabixIteratorLineReader.java index 49b6f0cfd..2a04725e7 100644 --- a/src/main/java/htsjdk/tribble/readers/TabixIteratorLineReader.java +++ b/src/main/java/htsjdk/tribble/readers/TabixIteratorLineReader.java @@ -40,6 +40,7 @@ public TabixIteratorLineReader(TabixReader.Iterator iterator) { this.iterator = iterator; } + @Override public String readLine() { try { return iterator != null ? iterator.next() : null; @@ -48,6 +49,7 @@ public String readLine() { } } + @Override public void close() { // Ignore - } diff --git a/src/main/java/htsjdk/tribble/readers/TabixReader.java b/src/main/java/htsjdk/tribble/readers/TabixReader.java index 8867d076b..244fcd5d2 100644 --- a/src/main/java/htsjdk/tribble/readers/TabixReader.java +++ b/src/main/java/htsjdk/tribble/readers/TabixReader.java @@ -34,19 +34,22 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.SeekableByteChannel; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Set; +import java.util.function.Function; /** * @author Heng Li */ public class TabixReader { - private String mFn; - private String mIdxFn; - private BlockCompressedInputStream mFp; + private final String mFilePath; + private final String mIndexPath; + private final Function mIndexWrapper; + private final BlockCompressedInputStream mFp; private int mPreset; private int mSc; @@ -75,6 +78,7 @@ public TPair64(final TPair64 p) { v = p.v; } + @Override public int compareTo(final TPair64 p) { return u == p.u ? 0 : ((u < p.u) ^ (u < 0) ^ (p.u < 0)) ? -1 : 1; // unsigned 64-bit comparison } @@ -96,40 +100,64 @@ private static boolean less64(final long u, final long v) { // unsigned 64-bit c } /** - * @param fn File name of the data file + * @param filePath path to the data file/uri + */ + public TabixReader(final String filePath) throws IOException { + this(filePath, null, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath))); + } + + /** + * @param filePath path to the of the data file/uri + * @param indexPath Full path to the index file. Auto-generated if null */ - public TabixReader(final String fn) throws IOException { - this(fn, null, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(fn))); + public TabixReader(final String filePath, final String indexPath) throws IOException { + this(filePath, indexPath, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath))); } /** - * @param fn File name of the data file - * @param idxFn Full path to the index file. Auto-generated if null + * @param filePath path to the data file/uri + * @param indexPath Full path to the index file. Auto-generated if null + * @param wrapper a wrapper to apply to the raw byte stream of the data file if is a uri representing a {@link java.nio.file.Path} + * @param indexWrapper a wrapper to apply to the raw byte stream of the index file if it is a uri representing a {@link java.nio.file.Path} + */ + public TabixReader(final String filePath, final String indexPath, + final Function wrapper, + final Function indexWrapper) throws IOException { + this(filePath, indexPath, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath, wrapper)), indexWrapper); + } + + + /** + * @param filePath Path to the data file (used for error messages only) + * @param stream Seekable stream from which the data is read */ - public TabixReader(final String fn, final String idxFn) throws IOException { - this(fn, idxFn, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(fn))); + public TabixReader(final String filePath, SeekableStream stream) throws IOException { + this(filePath, null, stream); } /** - * @param fn File name of the data file (used for error messages only) + * @param filePath Path to the data file (used for error messages only) + * @param indexPath Full path to the index file. Auto-generated if null * @param stream Seekable stream from which the data is read */ - public TabixReader(final String fn, SeekableStream stream) throws IOException { - this(fn, null, stream); + public TabixReader(final String filePath, final String indexPath, SeekableStream stream) throws IOException { + this(filePath, indexPath, stream, null); } /** - * @param fn File name of the data file (used for error messages only) - * @param idxFn Full path to the index file. Auto-generated if null + * @param filePath Path to the data file (used for error messages only) + * @param indexPath Full path to the index file. Auto-generated if null + * @param indexWrapper a wrapper to apply to the raw byte stream of the index file if it is a uri representing a {@link java.nio.file.Path} * @param stream Seekable stream from which the data is read */ - public TabixReader(final String fn, final String idxFn, SeekableStream stream) throws IOException { - mFn = fn; + public TabixReader(final String filePath, final String indexPath, SeekableStream stream, Function indexWrapper) throws IOException { + mFilePath = filePath; mFp = new BlockCompressedInputStream(stream); - if(idxFn == null){ - mIdxFn = ParsingUtils.appendToPath(fn, TabixUtils.STANDARD_INDEX_EXTENSION); + mIndexWrapper = indexWrapper; + if(indexPath == null){ + mIndexPath = ParsingUtils.appendToPath(filePath, TabixUtils.STANDARD_INDEX_EXTENSION); } else { - mIdxFn = idxFn; + mIndexPath = indexPath; } readIndex(); } @@ -137,7 +165,7 @@ public TabixReader(final String fn, final String idxFn, SeekableStream stream) t /** return the source (filename/URL) of that reader */ public String getSource() { - return this.mFn; + return this.mFilePath; } private static int reg2bins(final int beg, final int _end, final int[] list) { @@ -239,7 +267,7 @@ private void readIndex(SeekableStream fp) throws IOException { */ private void readIndex() throws IOException { ISeekableStreamFactory ssf = SeekableStreamFactory.getInstance(); - readIndex(ssf.getBufferedStream(ssf.getStreamFor(mIdxFn), 128000)); + readIndex(ssf.getBufferedStream(ssf.getStreamFor(mIndexPath, mIndexWrapper), 128000)); } /** diff --git a/src/main/java/htsjdk/tribble/util/HTTPHelper.java b/src/main/java/htsjdk/tribble/util/HTTPHelper.java index 90e622859..cdd6b277e 100644 --- a/src/main/java/htsjdk/tribble/util/HTTPHelper.java +++ b/src/main/java/htsjdk/tribble/util/HTTPHelper.java @@ -57,6 +57,7 @@ public static synchronized void setProxy(Proxy p) { proxy = p; } + @Override public URL getUrl() { return url; } @@ -65,6 +66,7 @@ public URL getUrl() { * @return content length of the resource * @throws IOException */ + @Override public long getContentLength() throws IOException { HttpURLConnection con = null; @@ -84,6 +86,7 @@ public long getContentLength() throws IOException { } + @Override public InputStream openInputStream() throws IOException { HttpURLConnection connection = openConnection(); @@ -98,7 +101,11 @@ public InputStream openInputStream() throws IOException { * @param end end of range ni bytes * @return * @throws IOException + * + * @deprecated since 12/10/14 Will be removed in a future release, as is somewhat fragile + * and not used. */ + @Override @Deprecated public InputStream openInputStreamForRange(long start, long end) throws IOException { @@ -118,6 +125,7 @@ private HttpURLConnection openConnection() throws IOException { return connection; } + @Override public boolean exists() throws IOException { HttpURLConnection con = null; try { diff --git a/src/main/java/htsjdk/tribble/util/LittleEndianOutputStream.java b/src/main/java/htsjdk/tribble/util/LittleEndianOutputStream.java index 9bec07188..eab2f8785 100644 --- a/src/main/java/htsjdk/tribble/util/LittleEndianOutputStream.java +++ b/src/main/java/htsjdk/tribble/util/LittleEndianOutputStream.java @@ -25,11 +25,13 @@ public LittleEndianOutputStream(OutputStream out) { super(out); } + @Override public void write(int b) throws IOException { out.write(b); written++; } + @Override public void write(byte[] data, int offset, int length) throws IOException { out.write(data, offset, length); diff --git a/src/main/java/htsjdk/tribble/util/ParsingUtils.java b/src/main/java/htsjdk/tribble/util/ParsingUtils.java index 2a88ed735..70c3a3d1d 100644 --- a/src/main/java/htsjdk/tribble/util/ParsingUtils.java +++ b/src/main/java/htsjdk/tribble/util/ParsingUtils.java @@ -23,6 +23,8 @@ */ package htsjdk.tribble.util; +import htsjdk.samtools.seekablestream.SeekablePathStream; +import htsjdk.samtools.util.IOUtil; import java.awt.Color; import java.io.File; import java.io.FileInputStream; @@ -31,6 +33,8 @@ import java.lang.reflect.Constructor; import java.net.MalformedURLException; import java.net.URL; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -40,6 +44,7 @@ import java.util.List; import java.util.Map; import java.util.WeakHashMap; +import java.util.function.Function; /** * @author jrobinso @@ -76,18 +81,37 @@ } + /** + * @return an input stream from the given path + * @throws IOException + */ public static InputStream openInputStream(String path) throws IOException { + return openInputStream(path, null); + } - InputStream inputStream; + /** + * open an input stream from the given path and wrap the raw byte stream with a wrapper if given + * + * the wrapper will only be applied to paths that are not http, https, ftp, or file, i.e. any {@link java.nio.file.Path} + * using a custom filesystem plugin + * @param path a uri like string + * @param wrapper to wrap the input stream in, may be used to implement caching or prefetching, etc + * @return + * @throws IOException + */ + public static InputStream openInputStream(String path, Function wrapper) + throws IOException { + final InputStream inputStream; if (path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:")) { inputStream = getURLHelper(new URL(path)).openInputStream(); + } else if (IOUtil.hasScheme(path)) { + inputStream = new SeekablePathStream(IOUtil.getPath(path), wrapper); } else { File file = new File(path); inputStream = new FileInputStream(file); } - return inputStream; } @@ -400,6 +424,8 @@ public static boolean resourceExists(String resource) throws IOException{ } URLHelper helper = getURLHelper(url); return helper.exists(); + } else if (IOUtil.hasScheme(resource)) { + return Files.exists(IOUtil.getPath(resource)); } else { return (new File(resource)).exists(); } diff --git a/src/main/java/htsjdk/tribble/util/TabixUtils.java b/src/main/java/htsjdk/tribble/util/TabixUtils.java index aa365cd58..5ae9f8afd 100644 --- a/src/main/java/htsjdk/tribble/util/TabixUtils.java +++ b/src/main/java/htsjdk/tribble/util/TabixUtils.java @@ -55,6 +55,7 @@ public TPair64(final TPair64 p) { v = p.v; } + @Override public int compareTo(final TPair64 p) { return u == p.u ? 0 : ((u < p.u) ^ (u < 0) ^ (p.u < 0)) ? -1 : 1; // unsigned 64-bit comparison } diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java index a9457643f..4926c80fe 100644 --- a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java +++ b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java @@ -25,6 +25,7 @@ package htsjdk.variant.bcf2; +import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.BinaryFeatureCodec; import htsjdk.tribble.Feature; import htsjdk.tribble.FeatureCodecHeader; @@ -44,10 +45,8 @@ import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLineType; -import java.io.ByteArrayInputStream; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; +import java.io.*; +import java.nio.file.Files; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -207,21 +206,11 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream @Override public boolean canDecode( final String path ) { - FileInputStream fis = null; - try { - fis = new FileInputStream(path); + try (InputStream fis = Files.newInputStream(IOUtil.getPath(path)) ){ final BCFVersion version = BCFVersion.readBCFVersion(fis); return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION; - } catch ( FileNotFoundException e ) { - return false; - } catch ( IOException e ) { + } catch ( final IOException e ) { return false; - } finally { - try { - if ( fis != null ) fis.close(); - } catch ( IOException e ) { - // do nothing - } } } diff --git a/src/main/java/htsjdk/variant/variantcontext/Allele.java b/src/main/java/htsjdk/variant/variantcontext/Allele.java index 44fc6aaa7..71aa20126 100644 --- a/src/main/java/htsjdk/variant/variantcontext/Allele.java +++ b/src/main/java/htsjdk/variant/variantcontext/Allele.java @@ -523,6 +523,7 @@ public static Allele getMatchingAllele(final Collection allAlleles, fina return null; // couldn't find anything } + @Override public int compareTo(final Allele other) { if ( isReference() && other.isNonReference() ) return -1; diff --git a/src/main/java/htsjdk/variant/variantcontext/FastGenotype.java b/src/main/java/htsjdk/variant/variantcontext/FastGenotype.java index 665e67242..495ba4192 100644 --- a/src/main/java/htsjdk/variant/variantcontext/FastGenotype.java +++ b/src/main/java/htsjdk/variant/variantcontext/FastGenotype.java @@ -154,6 +154,7 @@ protected FastGenotype(final String sampleName, // // --------------------------------------------------------------------------------------------------------- + @Override public Map getExtendedAttributes() { return extendedAttributes; } diff --git a/src/main/java/htsjdk/variant/variantcontext/GenotypeLikelihoods.java b/src/main/java/htsjdk/variant/variantcontext/GenotypeLikelihoods.java index ee3e08d47..605f2985f 100644 --- a/src/main/java/htsjdk/variant/variantcontext/GenotypeLikelihoods.java +++ b/src/main/java/htsjdk/variant/variantcontext/GenotypeLikelihoods.java @@ -183,6 +183,10 @@ public String getAsString() { * If you know you're biallelic, use getGQLog10FromLikelihoods directly. * @param genotype - actually a genotype type (no call, hom ref, het, hom var) * @return an unsafe quantity that could be negative. In the bi-allelic case, the GQ resulting from best minus next best (if the type is the best). + * + * @deprecated since 2/5/13 use + * {@link GenotypeLikelihoods#getLog10GQ(Genotype, VariantContext)} or + * {@link GenotypeLikelihoods#getLog10GQ(Genotype, List)} */ @Deprecated public double getLog10GQ(GenotypeType genotype){ @@ -554,6 +558,8 @@ public static synchronized void initializeAnyploidPLIndexToAlleleIndices(final i * * @param PLindex the PL index * @return the allele index pair + * + * @deprecated since 2/5/13 */ @Deprecated public static GenotypeLikelihoodsAllelePair getAllelePairUsingDeprecatedOrdering(final int PLindex) { diff --git a/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java b/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java index b8e13c75b..c4664b08a 100644 --- a/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java +++ b/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java @@ -5,11 +5,7 @@ import org.apache.commons.jexl2.JexlException; import org.apache.commons.jexl2.MapContext; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; +import java.util.*; /** * This is an implementation of a Map of {@link JexlVCMatchExp} to true or false values. @@ -17,49 +13,88 @@ */ class JEXLMap implements Map { + /** + * If a JEXL expression contains values that are not available in the given context, the default behavior is to + * treat that expression as a miss match. + */ + public static final JexlMissingValueTreatment DEFAULT_MISSING_VALUE_TREATMENT = JexlMissingValueTreatment.TREAT_AS_MISMATCH; + // our variant context and/or Genotype private final VariantContext vc; private final Genotype g; - // our context - private JexlContext jContext = null; + private final JexlMissingValueTreatment howToTreatMissingValues; /** * our mapping from {@link JexlVCMatchExp} to {@link Boolean}s, which will be set to {@code NULL} * for previously un-cached {@link JexlVCMatchExp}. */ - private Map jexl; + private final Map jexl; - public JEXLMap(final Collection jexlCollection, final VariantContext vc, final Genotype g) { - initialize(jexlCollection); + // our context + private JexlContext jContext = null; + + /** + * Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context + * @param jexlCollection collection of expressions to be evaluated + * @param vc VariantContext to evaluate expressions against + * @param g genotype to evaluate expressions against, may be null + * @param howToTreatMissingValues how missing values in vc and g should be treated + */ + public JEXLMap(final Collection jexlCollection, final VariantContext vc, final Genotype g, final JexlMissingValueTreatment howToTreatMissingValues) { + this.jexl = initializeMap(jexlCollection); this.vc = vc; this.g = g; + this.howToTreatMissingValues = howToTreatMissingValues; } + + /** + * Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context + * @param jexlCollection collection of expressions to be evaluated + * @param vc VariantContext to evaluate expressions against + * @param g genotype to evaluate expressions against, may be null + * + * missing values are treated as false + */ + public JEXLMap(final Collection jexlCollection, final VariantContext vc, final Genotype g) { + this(jexlCollection, vc, g, DEFAULT_MISSING_VALUE_TREATMENT); + } + + /** + * Construct a new JEXLMap which can evaluate expressions against a specific VariantContext + * @param jexlCollection collection of expressions to be evaluated + * @param vc VariantContext to evaluate expressions against + * + * missing values are treated as non matches (false) + */ public JEXLMap(final Collection jexlCollection, final VariantContext vc) { - this(jexlCollection, vc, null); + this(jexlCollection, vc, null, DEFAULT_MISSING_VALUE_TREATMENT); } /** * Note: due to laziness, this accessor actually modifies the instance by possibly forcing evaluation of an Jexl expression. * - * @throws IllegalArgumentException when {@code o} is {@code null} or + * @throws IllegalArgumentException when {@code key} is {@code null} or * when any of the JexlVCMatchExp (i.e. keys) contains invalid Jexl expressions. */ - public Boolean get(Object o) { - if (o == null) { + @Override + public Boolean get(Object key) { + if (key == null) { throw new IllegalArgumentException("Query key is null"); } // if we've already determined the value, return it - if (jexl.containsKey(o) && jexl.get(o) != null) { - return jexl.get(o); + final Boolean value = jexl.get(key); + if (jexl.containsKey(key) && value != null) { + return value; } // otherwise cast the expression and try again - final JexlVCMatchExp e = (JexlVCMatchExp) o; - evaluateExpression(e); - return jexl.get(e); + final JexlVCMatchExp exp = (JexlVCMatchExp) key; + final boolean matches = evaluateExpression(exp); + jexl.put(exp, matches); + return matches; } /** @@ -67,8 +102,10 @@ public Boolean get(Object o) { * @param o the key * @return true if we have a value for that key */ + @Override public boolean containsKey(Object o) { return jexl.containsKey(o); } + @Override public Set keySet() { return jexl.keySet(); } @@ -85,11 +122,10 @@ public Boolean get(Object o) { * * @throws IllegalArgumentException when any of the JexlVCMatchExp (i.e. keys) contains invalid Jexl expressions. */ + @Override public Collection values() { for (final JexlVCMatchExp exp : jexl.keySet()) { - if (jexl.get(exp) == null) { - evaluateExpression(exp); - } + jexl.computeIfAbsent(exp, k -> evaluateExpression(exp)); } return jexl.values(); } @@ -97,53 +133,61 @@ public Boolean get(Object o) { /** * @return the number of keys, i.e. {@link JexlVCMatchExp}'s held by this mapping. */ + @Override public int size() { return jexl.size(); } + @Override public boolean isEmpty() { return this.jexl.isEmpty(); } + @Override public Boolean put(JexlVCMatchExp jexlVCMatchExp, Boolean aBoolean) { return jexl.put(jexlVCMatchExp, aBoolean); } + @Override public void putAll(Map map) { jexl.putAll(map); } /** - * Initializes all keys with null values indicating that they have not yet been evaluated. + * Initializes a map and give all keys with null values indicating that they have not yet been evaluated. * The actual value will be computed only when the key is requested via {@link #get(Object)} or {@link #values()}. + * + * @return an initialized map of jexlExpression -> null */ - private void initialize(Collection jexlCollection) { - jexl = new HashMap<>(); + private static Map initializeMap(final Collection jexlCollection) { + final Map jexlMap = new HashMap<>(jexlCollection.size()); for (final JexlVCMatchExp exp: jexlCollection) { - jexl.put(exp, null); + jexlMap.put(exp, null); } + + return jexlMap; } /** * Evaluates a {@link JexlVCMatchExp}'s expression, given the current context (and setup the context if it's {@code null}). * * @param exp the {@link JexlVCMatchExp} to evaluate - * + * @return true if the expression matched the context * @throws IllegalArgumentException when {@code exp} is {@code null}, or * when the Jexl expression in {@code exp} fails to evaluate the JexlContext * constructed with the input VC or genotype. */ - private void evaluateExpression(final JexlVCMatchExp exp) { + private boolean evaluateExpression(final JexlVCMatchExp exp) { // if the context is null, we need to create it to evaluate the JEXL expression if (this.jContext == null) { - createContext(); + jContext = createContext(); } try { + //TODO figure out of this can ever evaluate to null or if that isn't actually possible final Boolean value = (Boolean) exp.exp.evaluate(jContext); - // treat errors as no match - jexl.put(exp, value == null ? false : value); + return value == null ? howToTreatMissingValues.getMissingValueOrExplode() : value; } catch (final JexlException.Variable e) { - // if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE - jexl.put(exp,false); + //this occurs when the jexl expression contained a literal that didn't match anything in the given context + return howToTreatMissingValues.getMissingValueOrExplode(); } catch (final JexlException e) { // todo - might be better if no exception is caught here but let's user decide how to deal with them; note this will propagate to get() and values() throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s", exp.name), e); @@ -151,16 +195,17 @@ private void evaluateExpression(final JexlVCMatchExp exp) { } /** - * Create the internal JexlContext, only when required. + * Create a new JexlContext * This code is where new JEXL context variables should get added. + * @return a new jexl context initialized appropriately */ - private void createContext() { + private JexlContext createContext() { if (vc == null) { - jContext = new MapContext(Collections.emptyMap()); + return new MapContext(Collections.emptyMap()); } else if (g == null) { - jContext = new VariantJEXLContext(vc); + return new VariantJEXLContext(vc); } else { - jContext = new GenotypeJEXLContext(vc, g); + return new GenotypeJEXLContext(vc, g); } } @@ -170,21 +215,25 @@ private void createContext() { // this doesn't make much sense to implement, boolean doesn't offer too much variety to deal // with evaluating every key in the internal map. + @Override public boolean containsValue(Object o) { throw new UnsupportedOperationException("containsValue() not supported on a JEXLMap"); } // this doesn't make much sense + @Override public Boolean remove(Object o) { throw new UnsupportedOperationException("remove() not supported on a JEXLMap"); } + @Override public Set> entrySet() { - throw new UnsupportedOperationException("clear() not supported on a JEXLMap"); + throw new UnsupportedOperationException("entrySet() not supported on a JEXLMap"); } // nope + @Override public void clear() { throw new UnsupportedOperationException("clear() not supported on a JEXLMap"); } diff --git a/src/main/java/htsjdk/variant/variantcontext/JexlMissingValueTreatment.java b/src/main/java/htsjdk/variant/variantcontext/JexlMissingValueTreatment.java new file mode 100644 index 000000000..204cc3f2c --- /dev/null +++ b/src/main/java/htsjdk/variant/variantcontext/JexlMissingValueTreatment.java @@ -0,0 +1,39 @@ +package htsjdk.variant.variantcontext; + +import java.util.function.Supplier; + +/** + * How to treat values that appear in a jexl expression but are missing in the context it's applied to + */ +public enum JexlMissingValueTreatment { + /** + * Treat expressions with a missing value as a mismatch and evaluate to false + */ + TREAT_AS_MISMATCH(() -> false), + + /** + * Treat expressions with a missing value as a match and evaluate to true + */ + TREAT_AS_MATCH(() -> true), + + /** + * Treat expressions with a missing value as an error and throw an {@link IllegalArgumentException} + */ + THROW(() -> {throw new IllegalArgumentException("Jexl Expression couldn't be evaluated because there was a missing value.");}); + + private final Supplier resultSupplier; + + JexlMissingValueTreatment(final Supplier resultSupplier){ + this.resultSupplier = resultSupplier; + } + + /** + * get the missing value that corresponds to this option or throw an exception + * @return the value that should be used in case of a missing value + * @throws IllegalArgumentException if this should be treated as an error + */ + boolean getMissingValueOrExplode(){ + return resultSupplier.get(); + } + +} diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java index 55825fb4d..6def89ef9 100644 --- a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java +++ b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java @@ -1663,6 +1663,7 @@ public String getContig() { * underlying vcf file, VariantContexts representing the same biological event may have different start positions depending on the * specifics of the vcf file they are derived from */ + @Override public int getStart() { return (int)start; } @@ -1673,6 +1674,7 @@ public int getStart() { * For VariantContexts with a single alternate allele, if that allele is an insertion, the end position will be on the reference base * before the insertion event. If the single alt allele is a deletion, the end will be on the final deleted reference base. */ + @Override public int getEnd() { return (int)stop; } diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java b/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java index 96eaa64e3..6988b4c8e 100644 --- a/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java +++ b/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java @@ -52,15 +52,12 @@ /** Use a {@link Lazy} {@link JexlEngine} instance to avoid class-loading issues. (Applications that access this class are otherwise * forced to build a {@link JexlEngine} instance, which depends on some apache logging libraries that mightn't be packaged.) */ - final public static Lazy engine = new Lazy(new Lazy.LazyInitializer() { - @Override - public JexlEngine make() { - final JexlEngine jexl = new JexlEngine(); - jexl.setSilent(false); // will throw errors now for selects that don't evaluate properly - jexl.setLenient(false); - jexl.setDebug(false); - return jexl; - } + final public static Lazy engine = new Lazy<>(() -> { + final JexlEngine jexl = new JexlEngine(); + jexl.setSilent(false); // will throw errors now for selects that don't evaluate properly + jexl.setLenient(false); + jexl.setDebug(false); + return jexl; }); private final static boolean ASSUME_MISSING_FIELDS_ARE_STRINGS = false; @@ -307,6 +304,7 @@ public static boolean match(VariantContext vc, JexlVCMatchExp exp) { * This the best way to apply JEXL expressions to {@link VariantContext} records. * Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions. * + * Expressions that contain literals not available in the VariantContext or Genotype will be treated as not matching * @param vc variant context * @param exps expressions * @return true if there is a match @@ -324,7 +322,20 @@ public static boolean match(VariantContext vc, JexlVCMatchExp exp) { * @return true if there is a match */ public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) { - return match(vc,g, Collections.singletonList(exp)).get(exp); + return match(vc, g, Collections.singletonList(exp), JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT).get(exp); + } + + /** + * Returns true if {@code exp} match {@code vc}, {@code g}. + * See {@link #match(VariantContext, Genotype, Collection)} for full docs. + * @param vc variant context + * @param g genotype + * @param exp expression + * @param howToTreatMissingValues what to do if the jexl expression contains literals that aren't in the context + * @return true if there is a match + */ + public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp, JexlMissingValueTreatment howToTreatMissingValues) { + return match(vc, g, Collections.singletonList(exp), howToTreatMissingValues).get(exp); } /** @@ -333,13 +344,30 @@ public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) { * This the best way to apply JEXL expressions to {@link VariantContext} records. * Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions. * + * Expressions that contain literals not available in the VariantContext or Genotype will be treated as not matching * @param vc variant context * @param g genotype * @param exps expressions * @return true if there is a match */ public static Map match(VariantContext vc, Genotype g, Collection exps) { - return new JEXLMap(exps,vc,g); + return match(vc, g, exps, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT); + } + + /** + * Matches each {@link JexlVCMatchExp} exp against the data contained in {@code vc}, {@code g}, + * and returns a map from these expressions to {@code true} (if they matched) or {@code false} (if they didn't). + * This the best way to apply JEXL expressions to {@link VariantContext} records. + * Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions. + * + * @param vc variant context + * @param g genotype + * @param exps expressions + * @param howToTreatMissingValues what to do if the jexl expression contains literals that aren't in the context + * @return true if there is a match + */ + public static Map match(VariantContext vc, Genotype g, Collection exps, JexlMissingValueTreatment howToTreatMissingValues) { + return new JEXLMap(exps, vc, g, howToTreatMissingValues); } /** diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java b/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java index 34cde3395..012586381 100644 --- a/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java +++ b/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java @@ -76,6 +76,7 @@ public VariantJEXLContext(VariantContext vc) { this.vc = vc; } + @Override public Object get(String name) { Object result = null; if ( attributes.containsKey(name) ) { // dynamic resolution of name -> value via map @@ -89,6 +90,7 @@ public Object get(String name) { return result; } + @Override public boolean has(String name) { return get(name) != null; } @@ -96,6 +98,7 @@ public boolean has(String name) { /** * @throws UnsupportedOperationException */ + @Override public void set(String name, Object value) { throw new UnsupportedOperationException("remove() not supported on a VariantJEXLContext"); } diff --git a/src/main/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java b/src/main/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java index 04609a89b..44362d6c1 100644 --- a/src/main/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java +++ b/src/main/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java @@ -36,7 +36,7 @@ * * @author Yossi Farjoun * - * use {@link FilteringVariantContextIterator} instead + * @deprecated since 2/29/16 use {@link FilteringVariantContextIterator} instead */ @Deprecated diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriter.java index 4604316b2..481ab871e 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriter.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriter.java @@ -39,10 +39,12 @@ public AsyncVariantContextWriter(final VariantContextWriter out, final int queue @Override protected final String getThreadNamePrefix() { return "VariantContextWriterThread-"; } + @Override public void add(final VariantContext vc) { write(vc); } + @Override public void writeHeader(final VCFHeader header) { this.underlyingWriter.writeHeader(header); } diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java index f9dd458d0..9582e00ab 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java @@ -231,6 +231,7 @@ public FTGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEnc super(header, fieldEncoder); } + @Override public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException { final String fieldValue = g.getFilters(); getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype); diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/IndexingVariantContextWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/IndexingVariantContextWriter.java index 6a77f6b3b..fa3f6ba54 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/IndexingVariantContextWriter.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/IndexingVariantContextWriter.java @@ -60,6 +60,8 @@ private IndexingVariantContextWriter(final String name, final File location, fin this.refDict = refDict; } + static String DEFAULT_READER_NAME = "Reader Name"; + /** * Create a VariantContextWriter with an associated index using the default index creator * @@ -126,11 +128,13 @@ public String getStreamName() { return name; } + @Override public abstract void writeHeader(VCFHeader header); /** * attempt to close the VCF file */ + @Override public void close() { try { // close the underlying output stream @@ -161,6 +165,7 @@ public SAMSequenceDictionary getRefDict() { * * @param vc the Variant Context object */ + @Override public void add(final VariantContext vc) { // if we are doing on the fly indexing, add the record ***before*** we write any bytes if ( indexer != null ) @@ -175,6 +180,6 @@ public void add(final VariantContext vc) { * @return */ protected static final String writerName(final File location, final OutputStream stream) { - return location == null ? stream.toString() : location.getAbsolutePath(); + return location == null ? stream == null ? DEFAULT_READER_NAME : stream.toString() : location.getAbsolutePath(); } } diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriter.java index 21854827b..edc70c4ff 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriter.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriter.java @@ -51,6 +51,7 @@ public SortingVariantContextWriter(VariantContextWriter innerWriter, int maxCach this(innerWriter, maxCachingStartDistance, false); // by default, don't own inner } + @Override protected void noteCurrentRecord(VariantContext vc) { super.noteCurrentRecord(vc); // first, check for errors diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java b/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java index 690a7813c..7d9273f97 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java @@ -186,6 +186,7 @@ private synchronized void emitRecords(boolean emitUnsafe) { private static class VariantContextComparator implements Comparator, Serializable { private static final long serialVersionUID = 1L; + @Override public int compare(VCFRecord r1, VCFRecord r2) { return r1.vc.getStart() - r2.vc.getStart(); } diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriter.java index 187ff17c3..843901a20 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriter.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriter.java @@ -40,6 +40,7 @@ /** * attempt to close the VCF file */ + @Override public void close(); /** diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java index 56c8b8bf6..ddc0d50d5 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java @@ -53,7 +53,6 @@ * *

    * Provides methods for creating VariantContextWriters using the Builder pattern. - * Replaces VariantContextWriterFactory. *

    *

    * The caller must choose an output file or an output stream for the VariantContextWriter to write to. @@ -407,8 +406,9 @@ else if (STREAM_TYPES.contains(this.outType)) typeToBuild = OutputType.BCF_STREAM; } + // If we are writing to a file, or a special file type (ex. pipe) where the stream is not yet open. OutputStream outStreamFromFile = this.outStream; - if (FILE_TYPES.contains(this.outType)) { + if (FILE_TYPES.contains(this.outType) || (STREAM_TYPES.contains(this.outType) && this.outStream == null)) { try { outStreamFromFile = IOUtil.maybeBufferOutputStream(new FileOutputStream(outFile), bufferSize); } catch (final FileNotFoundException e) { @@ -446,7 +446,7 @@ else if (STREAM_TYPES.contains(this.outType)) if (options.contains(Options.INDEX_ON_THE_FLY)) throw new IllegalArgumentException("VCF index creation not supported for stream output."); - writer = createVCFWriter(null, outStream); + writer = createVCFWriter(null, outStreamFromFile); break; case BCF_STREAM: if (options.contains(Options.INDEX_ON_THE_FLY)) diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterFactory.java b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterFactory.java deleted file mode 100644 index e1e00265d..000000000 --- a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterFactory.java +++ /dev/null @@ -1,282 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package htsjdk.variant.variantcontext.writer; - -import htsjdk.samtools.Defaults; -import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.util.BlockCompressedOutputStream; -import htsjdk.samtools.util.IOUtil; -import htsjdk.samtools.util.RuntimeIOException; -import htsjdk.tribble.AbstractFeatureReader; -import htsjdk.tribble.index.IndexCreator; -import htsjdk.tribble.index.tabix.TabixFormat; -import htsjdk.tribble.index.tabix.TabixIndexCreator; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.OutputStream; -import java.util.EnumSet; - -/** - * Factory methods to create VariantContext writers - * - * @author depristo - * @since 5/12 - * - * @deprecated Replaced by {@link VariantContextWriterBuilder} - */ -@Deprecated -public class VariantContextWriterFactory { - - public static final EnumSet DEFAULT_OPTIONS = EnumSet.of(Options.INDEX_ON_THE_FLY); - public static final EnumSet NO_OPTIONS = EnumSet.noneOf(Options.class); - - static { - if (Defaults.USE_ASYNC_IO_WRITE_FOR_TRIBBLE) { - DEFAULT_OPTIONS.add(Options.USE_ASYNC_IO); - } - } - - private VariantContextWriterFactory() {} - - public static VariantContextWriter create(final File location, final SAMSequenceDictionary refDict) { - return create(location, openOutputStream(location), refDict, DEFAULT_OPTIONS); - } - - public static VariantContextWriter create(final File location, final SAMSequenceDictionary refDict, final EnumSet options) { - return create(location, openOutputStream(location), refDict, options); - } - - /** - * @param output If buffered writing is desired, caller must provide some kind of buffered OutputStream. - */ - public static VariantContextWriter create(final File location, - final OutputStream output, - final SAMSequenceDictionary refDict) { - return create(location, output, refDict, DEFAULT_OPTIONS); - } - - /** - * @param output If buffered writing is desired, caller must provide some kind of buffered OutputStream. - */ - public static VariantContextWriter create(final OutputStream output, - final SAMSequenceDictionary refDict, - final EnumSet options) { - return create(null, output, refDict, options); - } - - /** - * @param location Note that this parameter is used to producing intelligent log messages, and for naming the index, - * but does not control where the file is written - * @param output This is where the BCF is actually written. If buffered writing is desired, caller must provide - * some kind of buffered OutputStream. - */ - public static VariantContextWriter createBcf2(final File location, - final OutputStream output, - final SAMSequenceDictionary refDict, - final EnumSet options) { - return maybeWrapWithAsyncWriter(new BCF2Writer(location, output, refDict, - options.contains(Options.INDEX_ON_THE_FLY), - options.contains(Options.DO_NOT_WRITE_GENOTYPES)), options); - } - - /** - * @param location Note that this parameter is used to producing intelligent log messages, and for naming the index, - * but does not control where the file is written - * @param output This is where the BCF is actually written. If buffered writing is desired, caller must provide - * some kind of buffered OutputStream. - */ - public static VariantContextWriter createBcf2(final File location, - final OutputStream output, - final SAMSequenceDictionary refDict, - final IndexCreator indexCreator, - final EnumSet options) { - return maybeWrapWithAsyncWriter(new BCF2Writer(location, output, refDict, indexCreator, - options.contains(Options.INDEX_ON_THE_FLY), - options.contains(Options.DO_NOT_WRITE_GENOTYPES)), options); - } - - /** - * @param location Note that this parameter is used to producing intelligent log messages, and for naming the index, - * but does not control where the file is written - * @param output This is where the VCF is actually written. If buffered writing is desired, caller must provide - * some kind of buffered OutputStream. - */ - public static VariantContextWriter createVcf(final File location, - final OutputStream output, - final SAMSequenceDictionary refDict, - final EnumSet options) { - return maybeWrapWithAsyncWriter(new VCFWriter(location, output, refDict, - options.contains(Options.INDEX_ON_THE_FLY), - options.contains(Options.DO_NOT_WRITE_GENOTYPES), - options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER), - options.contains(Options.WRITE_FULL_FORMAT_FIELD)), options); - } - - /** - * @param location Note that this parameter is used to producing intelligent log messages, and for naming the index, - * but does not control where the file is written - * @param output This is where the VCF is actually written. If buffered writing is desired, caller must provide - * some kind of buffered OutputStream. - */ - public static VariantContextWriter createVcf(final File location, - final OutputStream output, - final SAMSequenceDictionary refDict, - final IndexCreator indexCreator, - final EnumSet options) { - return maybeWrapWithAsyncWriter(new VCFWriter(location, output, refDict, indexCreator, - options.contains(Options.INDEX_ON_THE_FLY), - options.contains(Options.DO_NOT_WRITE_GENOTYPES), - options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER), - options.contains(Options.WRITE_FULL_FORMAT_FIELD)), options); - } - - /** - * @param location Note that this parameter is used to producing intelligent log messages, - * but does not control where the file is written - * @param output This is where the VCF is actually written. If buffered writing is desired, caller must provide - * some kind of buffered OutputStream. - */ - public static VariantContextWriter createBlockCompressedVcf(final File location, - final OutputStream output, - final SAMSequenceDictionary refDict, - final EnumSet options) { - final TabixIndexCreator indexCreator; - if (options.contains(Options.INDEX_ON_THE_FLY)) { - indexCreator = new TabixIndexCreator(refDict, TabixFormat.VCF); - } else { - indexCreator = null; - } - return maybeWrapWithAsyncWriter(new VCFWriter(location, BlockCompressedOutputStream.maybeBgzfWrapOutputStream(location, output), - refDict, indexCreator, - options.contains(Options.INDEX_ON_THE_FLY), - options.contains(Options.DO_NOT_WRITE_GENOTYPES), - options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER), - options.contains(Options.WRITE_FULL_FORMAT_FIELD)), options); - } - - /** - * @param location Note that this parameter is used to producing intelligent log messages, - * but does not control where the file is written - * @param output This is where the VCF is actually written. If buffered writing is desired, caller must provide - * some kind of buffered OutputStream. - */ - public static VariantContextWriter createBlockCompressedVcf(final File location, - final OutputStream output, - final SAMSequenceDictionary refDict, - final IndexCreator indexCreator, - final EnumSet options) { - return maybeWrapWithAsyncWriter(new VCFWriter(location, BlockCompressedOutputStream.maybeBgzfWrapOutputStream(location, output), - refDict, indexCreator, - options.contains(Options.INDEX_ON_THE_FLY), - options.contains(Options.DO_NOT_WRITE_GENOTYPES), - options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER), - options.contains(Options.WRITE_FULL_FORMAT_FIELD)), options); - } - - public static VariantContextWriter create(final File location, - final OutputStream output, - final SAMSequenceDictionary refDict, - final EnumSet options) { - - if (isBCFOutput(location, options)) { - return createBcf2(location, output, refDict, options); - } else if (isCompressedVcf(location)) { - return createBlockCompressedVcf(location, output, refDict, options); - } else { - return createVcf(location, output, refDict, options); - } - } - - /** - * @param output If buffered writing is desired, caller must provide some kind of buffered OutputStream. - */ - public static VariantContextWriter create(final File location, - final OutputStream output, - final SAMSequenceDictionary refDict, - final IndexCreator indexCreator, - final EnumSet options) { - - if (isBCFOutput(location, options)) { - return createBcf2(location, output, refDict, indexCreator, options); - } else if (isCompressedVcf(location)) { - return createBlockCompressedVcf(location, output, refDict, indexCreator, options); - } else { - return createVcf(location, output, refDict, indexCreator, options); - } - } - - private static VariantContextWriter maybeWrapWithAsyncWriter(final VariantContextWriter writer, - final EnumSet options) { - if (options.contains(Options.USE_ASYNC_IO)) { - return new AsyncVariantContextWriter(writer, AsyncVariantContextWriter.DEFAULT_QUEUE_SIZE); - } - else return writer; - } - - /** - * Should we output a BCF file based solely on the name of the file at location? - * - * @param location - * @return - */ - public static boolean isBCFOutput(final File location) { - return isBCFOutput(location, EnumSet.noneOf(Options.class)); - } - - public static boolean isBCFOutput(final File location, final EnumSet options) { - return options.contains(Options.FORCE_BCF) || (location != null && location.getName().contains(".bcf")); - } - - public static boolean isCompressedVcf(final File location) { - if (location == null) - return false; - - return AbstractFeatureReader.hasBlockCompressedExtension(location); - } - - public static VariantContextWriter sortOnTheFly(final VariantContextWriter innerWriter, final int maxCachingStartDistance) { - return sortOnTheFly(innerWriter, maxCachingStartDistance, false); - } - - public static VariantContextWriter sortOnTheFly(final VariantContextWriter innerWriter, final int maxCachingStartDistance, final boolean takeOwnershipOfInner) { - return new SortingVariantContextWriter(innerWriter, maxCachingStartDistance, takeOwnershipOfInner); - } - - /** - * Returns a output stream writing to location, or throws an exception if this fails - * @param location - * @return - */ - protected static OutputStream openOutputStream(final File location) { - try { - return IOUtil.maybeBufferOutputStream(new FileOutputStream(location)); - } catch (final FileNotFoundException e) { - throw new RuntimeIOException(location + ": Unable to create VCF writer", e); - } - } -} diff --git a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java index 16857b4e6..8a55a1946 100644 --- a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java +++ b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java @@ -26,6 +26,7 @@ package htsjdk.variant.vcf; import htsjdk.samtools.util.BlockCompressedInputStream; +import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.Feature; import htsjdk.tribble.NameAwareCodec; @@ -45,6 +46,8 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -254,6 +257,7 @@ public Feature decodeLoc(String line) { * @param line the line * @return a VariantContext */ + @Override public VariantContext decode(String line) { return decodeLine(line, true); } @@ -364,6 +368,7 @@ else if ( parts[2].equals(VCFConstants.EMPTY_ID_FIELD) ) * get the name of this codec * @return our set name */ + @Override public String getName() { return name; } @@ -372,6 +377,7 @@ public String getName() { * set the name of this codec * @param name new name */ + @Override public void setName(String name) { this.name = name; } @@ -616,10 +622,11 @@ private static void parseSingleAltAllele(List alleles, String alt, int l public static boolean canDecodeFile(final String potentialInput, final String MAGIC_HEADER_LINE) { try { + Path path = IOUtil.getPath(potentialInput); //isVCFStream closes the stream that's passed in - return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) || - isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) || - isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE); + return isVCFStream(Files.newInputStream(path), MAGIC_HEADER_LINE) || + isVCFStream(new GZIPInputStream(Files.newInputStream(path)), MAGIC_HEADER_LINE) || + isVCFStream(new BlockCompressedInputStream(Files.newInputStream(path)), MAGIC_HEADER_LINE); } catch ( FileNotFoundException e ) { return false; } catch ( IOException e ) { diff --git a/src/main/java/htsjdk/variant/vcf/VCF3Codec.java b/src/main/java/htsjdk/variant/vcf/VCF3Codec.java index 5f4f48ec5..e9ca3abdf 100644 --- a/src/main/java/htsjdk/variant/vcf/VCF3Codec.java +++ b/src/main/java/htsjdk/variant/vcf/VCF3Codec.java @@ -56,6 +56,7 @@ * @param reader the line reader to take header lines from * @return the number of header lines */ + @Override public Object readActualHeader(final LineIterator reader) { final List headerStrings = new ArrayList(); @@ -97,6 +98,7 @@ else if (line.startsWith(VCFHeader.HEADER_INDICATOR)) { * @param filterString the string to parse * @return a set of the filters applied */ + @Override protected List parseFilters(String filterString) { // null for unfiltered diff --git a/src/main/java/htsjdk/variant/vcf/VCFCodec.java b/src/main/java/htsjdk/variant/vcf/VCFCodec.java index 89d68813e..6e5d3b7d2 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFCodec.java +++ b/src/main/java/htsjdk/variant/vcf/VCFCodec.java @@ -125,6 +125,7 @@ else if (line.startsWith(VCFHeader.HEADER_INDICATOR)) { * @param filterString the string to parse * @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF) */ + @Override protected List parseFilters(final String filterString) { // null for unfiltered if ( filterString.equals(VCFConstants.UNFILTERED) ) diff --git a/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java index 48e0cdf0d..4d8c3447f 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java +++ b/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java @@ -57,6 +57,7 @@ private VCFHeaderLineType type; // access methods + @Override public String getID() { return name; } public String getDescription() { return description; } public VCFHeaderLineType getType() { return type; } @@ -221,6 +222,7 @@ private void validate() { * make a string representation of this header line * @return a string representation */ + @Override protected String toStringEncoding() { Map map = new LinkedHashMap(); map.put("ID", name); diff --git a/src/main/java/htsjdk/variant/vcf/VCFEncoder.java b/src/main/java/htsjdk/variant/vcf/VCFEncoder.java index a90906684..0605b73b9 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFEncoder.java +++ b/src/main/java/htsjdk/variant/vcf/VCFEncoder.java @@ -22,361 +22,362 @@ */ public class VCFEncoder { - /** - * The encoding used for VCF files: ISO-8859-1 - */ - public static final Charset VCF_CHARSET = Charset.forName("ISO-8859-1"); - private static final String QUAL_FORMAT_STRING = "%.2f"; - private static final String QUAL_FORMAT_EXTENSION_TO_TRIM = ".00"; - - private final IntGenotypeFieldAccessors GENOTYPE_FIELD_ACCESSORS = new IntGenotypeFieldAccessors(); - - private VCFHeader header; - - private boolean allowMissingFieldsInHeader = false; - - private boolean outputTrailingFormatFields = false; - - /** - * Prepare a VCFEncoder that will encode records appropriate to the given VCF header, optionally - * allowing missing fields in the header. - */ - public VCFEncoder(final VCFHeader header, final boolean allowMissingFieldsInHeader, final boolean outputTrailingFormatFields) { - if (header == null) throw new NullPointerException("The VCF header must not be null."); - this.header = header; - this.allowMissingFieldsInHeader = allowMissingFieldsInHeader; - this.outputTrailingFormatFields = outputTrailingFormatFields; - } - - /** - * Please see the notes in the default constructor - */ - @Deprecated - public void setVCFHeader(final VCFHeader header) { - this.header = header; - } - - /** - * Please see the notes in the default constructor - */ - @Deprecated - public void setAllowMissingFieldsInHeader(final boolean allow) { - this.allowMissingFieldsInHeader = allow; - } - - public String encode(final VariantContext context) { - if (this.header == null) { - throw new NullPointerException("The header field must be set on the VCFEncoder before encoding records."); - } - - final StringBuilder stringBuilder = new StringBuilder(); - - // CHROM - stringBuilder.append(context.getContig()).append(VCFConstants.FIELD_SEPARATOR) - // POS - .append(String.valueOf(context.getStart())).append(VCFConstants.FIELD_SEPARATOR) - // ID - .append(context.getID()).append(VCFConstants.FIELD_SEPARATOR) - // REF - .append(context.getReference().getDisplayString()).append(VCFConstants.FIELD_SEPARATOR); - - // ALT - if ( context.isVariant() ) { - Allele altAllele = context.getAlternateAllele(0); - String alt = altAllele.getDisplayString(); - stringBuilder.append(alt); - - for (int i = 1; i < context.getAlternateAlleles().size(); i++) { - altAllele = context.getAlternateAllele(i); - alt = altAllele.getDisplayString(); - stringBuilder.append(','); - stringBuilder.append(alt); - } - } else { - stringBuilder.append(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); - } - - stringBuilder.append(VCFConstants.FIELD_SEPARATOR); - - // QUAL - if ( ! context.hasLog10PError()) stringBuilder.append(VCFConstants.MISSING_VALUE_v4); - else stringBuilder.append(formatQualValue(context.getPhredScaledQual())); - stringBuilder.append(VCFConstants.FIELD_SEPARATOR) - // FILTER - .append(getFilterString(context)).append(VCFConstants.FIELD_SEPARATOR); - - // INFO - final Map infoFields = new TreeMap(); - for (final Map.Entry field : context.getAttributes().entrySet() ) { - if ( ! this.header.hasInfoLine(field.getKey())) fieldIsMissingFromHeaderError(context, field.getKey(), "INFO"); - - final String outputValue = formatVCFField(field.getValue()); - if (outputValue != null) infoFields.put(field.getKey(), outputValue); - } - writeInfoString(infoFields, stringBuilder); - - // FORMAT - final GenotypesContext gc = context.getGenotypes(); - if (gc.isLazyWithData() && ((LazyGenotypesContext) gc).getUnparsedGenotypeData() instanceof String) { - stringBuilder.append(VCFConstants.FIELD_SEPARATOR); - stringBuilder.append(((LazyGenotypesContext) gc).getUnparsedGenotypeData().toString()); - } else { - final List genotypeAttributeKeys = context.calcVCFGenotypeKeys(this.header); - if ( ! genotypeAttributeKeys.isEmpty()) { - for (final String format : genotypeAttributeKeys) - if ( ! this.header.hasFormatLine(format)) - fieldIsMissingFromHeaderError(context, format, "FORMAT"); - - final String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys); - - stringBuilder.append(VCFConstants.FIELD_SEPARATOR); - stringBuilder.append(genotypeFormatString); - - final Map alleleStrings = buildAlleleStrings(context); - addGenotypeData(context, alleleStrings, genotypeAttributeKeys, stringBuilder); - } - } - - return stringBuilder.toString(); - } - - VCFHeader getVCFHeader() { - return this.header; - } - - boolean getAllowMissingFieldsInHeader() { - return this.allowMissingFieldsInHeader; - } - - private String getFilterString(final VariantContext vc) { - if (vc.isFiltered()) { - for (final String filter : vc.getFilters()) { - if ( ! this.header.hasFilterLine(filter)) fieldIsMissingFromHeaderError(vc, filter, "FILTER"); - } - - return ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())); - } - else if (vc.filtersWereApplied()) return VCFConstants.PASSES_FILTERS_v4; - else return VCFConstants.UNFILTERED; - } - - private String formatQualValue(final double qual) { - String s = String.format(QUAL_FORMAT_STRING, qual); - if ( s.endsWith(QUAL_FORMAT_EXTENSION_TO_TRIM) ) - s = s.substring(0, s.length() - QUAL_FORMAT_EXTENSION_TO_TRIM.length()); - return s; - } - - private void fieldIsMissingFromHeaderError(final VariantContext vc, final String id, final String field) { - if ( ! allowMissingFieldsInHeader) - throw new IllegalStateException("Key " + id + " found in VariantContext field " + field - + " at " + vc.getContig() + ":" + vc.getStart() - + " but this key isn't defined in the VCFHeader. We require all VCFs to have" - + " complete VCF headers by default."); - } - - String formatVCFField(final Object val) { - final String result; - if ( val == null ) - result = VCFConstants.MISSING_VALUE_v4; - else if ( val instanceof Double ) - result = formatVCFDouble((Double) val); - else if ( val instanceof Boolean ) - result = (Boolean)val ? "" : null; // empty string for true, null for false - else if ( val instanceof List ) { - result = formatVCFField(((List)val).toArray()); - } else if ( val.getClass().isArray() ) { - final int length = Array.getLength(val); - if ( length == 0 ) - return formatVCFField(null); - final StringBuilder sb = new StringBuilder(formatVCFField(Array.get(val, 0))); - for ( int i = 1; i < length; i++) { - sb.append(','); - sb.append(formatVCFField(Array.get(val, i))); - } - result = sb.toString(); - } else - result = val.toString(); - - return result; - } - - /** - * Takes a double value and pretty prints it to a String for display - * - * Large doubles => gets %.2f style formatting - * Doubles < 1 / 10 but > 1/100 => get %.3f style formatting - * Double < 1/100 => %.3e formatting - * @param d - * @return - */ - public static String formatVCFDouble(final double d) { - final String format; - if ( d < 1 ) { - if ( d < 0.01 ) { - if ( Math.abs(d) >= 1e-20 ) - format = "%.3e"; - else { - // return a zero format - return "0.00"; - } - } else { - format = "%.3f"; - } - } else { - format = "%.2f"; - } - - return String.format(format, d); - } - - static int countOccurrences(final char c, final String s) { - int count = 0; - for (int i = 0; i < s.length(); i++) { - count += s.charAt(i) == c ? 1 : 0; - } - return count; - } - - static boolean isMissingValue(final String s) { - // we need to deal with the case that it's a list of missing values - return (countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + countOccurrences(',', s) == s.length()); - } - - /* - * Add the genotype data - */ - public void addGenotypeData(final VariantContext vc, final Map alleleMap, final List genotypeFormatKeys, final StringBuilder builder) { - final int ploidy = vc.getMaxPloidy(2); - - for (final String sample : this.header.getGenotypeSamples()) { - builder.append(VCFConstants.FIELD_SEPARATOR); - - Genotype g = vc.getGenotype(sample); - if (g == null) g = GenotypeBuilder.createMissing(sample, ploidy); - - final List attrs = new ArrayList(genotypeFormatKeys.size()); - for (final String field : genotypeFormatKeys) { - if (field.equals(VCFConstants.GENOTYPE_KEY)) { - if ( ! g.isAvailable()) { - throw new IllegalStateException("GTs cannot be missing for some samples if they are available for others in the record"); - } - - writeAllele(g.getAllele(0), alleleMap, builder); - for (int i = 1; i < g.getPloidy(); i++) { - builder.append(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); - writeAllele(g.getAllele(i), alleleMap, builder); - } - continue; - - } else { - final String outputValue; - if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) { - outputValue = g.isFiltered() ? g.getFilters() : VCFConstants.PASSES_FILTERS_v4; - } else { - final IntGenotypeFieldAccessors.Accessor accessor = GENOTYPE_FIELD_ACCESSORS.getAccessor(field); - if ( accessor != null ) { - final int[] intValues = accessor.getValues(g); - if ( intValues == null ) - outputValue = VCFConstants.MISSING_VALUE_v4; - else if ( intValues.length == 1 ) // fast path - outputValue = Integer.toString(intValues[0]); - else { - final StringBuilder sb = new StringBuilder(); - sb.append(intValues[0]); - for ( int i = 1; i < intValues.length; i++) { - sb.append(','); - sb.append(intValues[i]); - } - outputValue = sb.toString(); - } - } else { - Object val = g.hasExtendedAttribute(field) ? g.getExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4; - - final VCFFormatHeaderLine metaData = this.header.getFormatHeaderLine(field); - if ( metaData != null ) { - final int numInFormatField = metaData.getCount(vc); - if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) { - // If we have a missing field but multiple values are expected, we need to construct a new string with all fields. - // For example, if Number=2, the string has to be ".,." - final StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4); - for ( int i = 1; i < numInFormatField; i++ ) { - sb.append(','); - sb.append(VCFConstants.MISSING_VALUE_v4); - } - val = sb.toString(); - } - } - - // assume that if key is absent, then the given string encoding suffices - outputValue = formatVCFField(val); - } - } - - if ( outputValue != null ) - attrs.add(outputValue); - } - } - - // strip off trailing missing values - if (!outputTrailingFormatFields) { - for (int i = attrs.size() - 1; i >= 0; i--) { - if (isMissingValue(attrs.get(i))) attrs.remove(i); - else break; - } - } - - for (int i = 0; i < attrs.size(); i++) { - if ( i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY)) { - builder.append(VCFConstants.GENOTYPE_FIELD_SEPARATOR); - } - builder.append(attrs.get(i)); - } - } - } - - /* - * Create the info string; assumes that no values are null - */ - private void writeInfoString(final Map infoFields, final StringBuilder builder) { - if ( infoFields.isEmpty() ) { - builder.append(VCFConstants.EMPTY_INFO_FIELD); - return; - } - - boolean isFirst = true; - for (final Map.Entry entry : infoFields.entrySet()) { - if (isFirst) isFirst = false; - else builder.append(VCFConstants.INFO_FIELD_SEPARATOR); - - builder.append(entry.getKey()); - - if ( ! entry.getValue().equals("")) { - final VCFInfoHeaderLine metaData = this.header.getInfoHeaderLine(entry.getKey()); - if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) { - builder.append('='); - builder.append(entry.getValue()); - } - } - } - } - - public Map buildAlleleStrings(final VariantContext vc) { - final Map alleleMap = new HashMap(vc.getAlleles().size()+1); - alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup - - final List alleles = vc.getAlleles(); - for ( int i = 0; i < alleles.size(); i++ ) { - alleleMap.put(alleles.get(i), String.valueOf(i)); - } - - return alleleMap; - } - - private void writeAllele(final Allele allele, final Map alleleMap, final StringBuilder builder) { - final String encoding = alleleMap.get(allele); - if ( encoding == null ) - throw new RuntimeException("Allele " + allele + " is not an allele in the variant context"); - builder.append(encoding); - } + /** + * The encoding used for VCF files: ISO-8859-1 + */ + public static final Charset VCF_CHARSET = Charset.forName("ISO-8859-1"); + private static final String QUAL_FORMAT_STRING = "%.2f"; + private static final String QUAL_FORMAT_EXTENSION_TO_TRIM = ".00"; + + private final IntGenotypeFieldAccessors GENOTYPE_FIELD_ACCESSORS = new IntGenotypeFieldAccessors(); + + private VCFHeader header; + + private boolean allowMissingFieldsInHeader = false; + + private boolean outputTrailingFormatFields = false; + + /** + * Prepare a VCFEncoder that will encode records appropriate to the given VCF header, optionally + * allowing missing fields in the header. + */ + public VCFEncoder(final VCFHeader header, final boolean allowMissingFieldsInHeader, final boolean outputTrailingFormatFields) { + if (header == null) throw new NullPointerException("The VCF header must not be null."); + this.header = header; + this.allowMissingFieldsInHeader = allowMissingFieldsInHeader; + this.outputTrailingFormatFields = outputTrailingFormatFields; + } + + /** + * @deprecated since 10/24/13 use the constructor + */ + @Deprecated + public void setVCFHeader(final VCFHeader header) { + this.header = header; + } + + /** + * @deprecated since 10/24/13 use the constructor + */ + @Deprecated + public void setAllowMissingFieldsInHeader(final boolean allow) { + this.allowMissingFieldsInHeader = allow; + } + + public String encode(final VariantContext context) { + if (this.header == null) { + throw new NullPointerException("The header field must be set on the VCFEncoder before encoding records."); + } + + final StringBuilder stringBuilder = new StringBuilder(); + + // CHROM + stringBuilder.append(context.getContig()).append(VCFConstants.FIELD_SEPARATOR) + // POS + .append(String.valueOf(context.getStart())).append(VCFConstants.FIELD_SEPARATOR) + // ID + .append(context.getID()).append(VCFConstants.FIELD_SEPARATOR) + // REF + .append(context.getReference().getDisplayString()).append(VCFConstants.FIELD_SEPARATOR); + + // ALT + if (context.isVariant()) { + Allele altAllele = context.getAlternateAllele(0); + String alt = altAllele.getDisplayString(); + stringBuilder.append(alt); + + for (int i = 1; i < context.getAlternateAlleles().size(); i++) { + altAllele = context.getAlternateAllele(i); + alt = altAllele.getDisplayString(); + stringBuilder.append(','); + stringBuilder.append(alt); + } + } else { + stringBuilder.append(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); + } + + stringBuilder.append(VCFConstants.FIELD_SEPARATOR); + + // QUAL + if (!context.hasLog10PError()) stringBuilder.append(VCFConstants.MISSING_VALUE_v4); + else stringBuilder.append(formatQualValue(context.getPhredScaledQual())); + stringBuilder.append(VCFConstants.FIELD_SEPARATOR) + // FILTER + .append(getFilterString(context)).append(VCFConstants.FIELD_SEPARATOR); + + // INFO + final Map infoFields = new TreeMap<>(); + for (final Map.Entry field : context.getAttributes().entrySet()) { + if (!this.header.hasInfoLine(field.getKey())) + fieldIsMissingFromHeaderError(context, field.getKey(), "INFO"); + + final String outputValue = formatVCFField(field.getValue()); + if (outputValue != null) infoFields.put(field.getKey(), outputValue); + } + writeInfoString(infoFields, stringBuilder); + + // FORMAT + final GenotypesContext gc = context.getGenotypes(); + if (gc.isLazyWithData() && ((LazyGenotypesContext) gc).getUnparsedGenotypeData() instanceof String) { + stringBuilder.append(VCFConstants.FIELD_SEPARATOR); + stringBuilder.append(((LazyGenotypesContext) gc).getUnparsedGenotypeData().toString()); + } else { + final List genotypeAttributeKeys = context.calcVCFGenotypeKeys(this.header); + if (!genotypeAttributeKeys.isEmpty()) { + for (final String format : genotypeAttributeKeys) + if (!this.header.hasFormatLine(format)) + fieldIsMissingFromHeaderError(context, format, "FORMAT"); + + final String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys); + + stringBuilder.append(VCFConstants.FIELD_SEPARATOR); + stringBuilder.append(genotypeFormatString); + + final Map alleleStrings = buildAlleleStrings(context); + addGenotypeData(context, alleleStrings, genotypeAttributeKeys, stringBuilder); + } + } + + return stringBuilder.toString(); + } + + VCFHeader getVCFHeader() { + return this.header; + } + + boolean getAllowMissingFieldsInHeader() { + return this.allowMissingFieldsInHeader; + } + + private String getFilterString(final VariantContext vc) { + if (vc.isFiltered()) { + for (final String filter : vc.getFilters()) { + if (!this.header.hasFilterLine(filter)) fieldIsMissingFromHeaderError(vc, filter, "FILTER"); + } + + return ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())); + } else if (vc.filtersWereApplied()) return VCFConstants.PASSES_FILTERS_v4; + else return VCFConstants.UNFILTERED; + } + + private String formatQualValue(final double qual) { + String s = String.format(QUAL_FORMAT_STRING, qual); + if (s.endsWith(QUAL_FORMAT_EXTENSION_TO_TRIM)) + s = s.substring(0, s.length() - QUAL_FORMAT_EXTENSION_TO_TRIM.length()); + return s; + } + + private void fieldIsMissingFromHeaderError(final VariantContext vc, final String id, final String field) { + if (!allowMissingFieldsInHeader) + throw new IllegalStateException("Key " + id + " found in VariantContext field " + field + + " at " + vc.getContig() + ":" + vc.getStart() + + " but this key isn't defined in the VCFHeader. We require all VCFs to have" + + " complete VCF headers by default."); + } + + String formatVCFField(final Object val) { + final String result; + if (val == null) + result = VCFConstants.MISSING_VALUE_v4; + else if (val instanceof Double) + result = formatVCFDouble((Double) val); + else if (val instanceof Boolean) + result = (Boolean) val ? "" : null; // empty string for true, null for false + else if (val instanceof List) { + result = formatVCFField(((List) val).toArray()); + } else if (val.getClass().isArray()) { + final int length = Array.getLength(val); + if (length == 0) + return formatVCFField(null); + final StringBuilder sb = new StringBuilder(formatVCFField(Array.get(val, 0))); + for (int i = 1; i < length; i++) { + sb.append(','); + sb.append(formatVCFField(Array.get(val, i))); + } + result = sb.toString(); + } else + result = val.toString(); + + return result; + } + + /** + * Takes a double value and pretty prints it to a String for display + *

    + * Large doubles => gets %.2f style formatting + * Doubles < 1 / 10 but > 1/100 => get %.3f style formatting + * Double < 1/100 => %.3e formatting + * + * @param d + * @return + */ + public static String formatVCFDouble(final double d) { + final String format; + if (d < 1) { + if (d < 0.01) { + if (Math.abs(d) >= 1e-20) + format = "%.3e"; + else { + // return a zero format + return "0.00"; + } + } else { + format = "%.3f"; + } + } else { + format = "%.2f"; + } + + return String.format(format, d); + } + + static int countOccurrences(final char c, final String s) { + int count = 0; + for (int i = 0; i < s.length(); i++) { + count += s.charAt(i) == c ? 1 : 0; + } + return count; + } + + static boolean isMissingValue(final String s) { + // we need to deal with the case that it's a list of missing values + return (countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + countOccurrences(',', s) == s.length()); + } + + /* + * Add the genotype data + */ + public void addGenotypeData(final VariantContext vc, final Map alleleMap, final List genotypeFormatKeys, final StringBuilder builder) { + final int ploidy = vc.getMaxPloidy(2); + + for (final String sample : this.header.getGenotypeSamples()) { + builder.append(VCFConstants.FIELD_SEPARATOR); + + Genotype g = vc.getGenotype(sample); + if (g == null) g = GenotypeBuilder.createMissing(sample, ploidy); + + final List attrs = new ArrayList(genotypeFormatKeys.size()); + for (final String field : genotypeFormatKeys) { + if (field.equals(VCFConstants.GENOTYPE_KEY)) { + if (!g.isAvailable()) { + throw new IllegalStateException("GTs cannot be missing for some samples if they are available for others in the record"); + } + + writeAllele(g.getAllele(0), alleleMap, builder); + for (int i = 1; i < g.getPloidy(); i++) { + builder.append(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); + writeAllele(g.getAllele(i), alleleMap, builder); + } + continue; + + } else { + final String outputValue; + if (field.equals(VCFConstants.GENOTYPE_FILTER_KEY)) { + outputValue = g.isFiltered() ? g.getFilters() : VCFConstants.PASSES_FILTERS_v4; + } else { + final IntGenotypeFieldAccessors.Accessor accessor = GENOTYPE_FIELD_ACCESSORS.getAccessor(field); + if (accessor != null) { + final int[] intValues = accessor.getValues(g); + if (intValues == null) + outputValue = VCFConstants.MISSING_VALUE_v4; + else if (intValues.length == 1) // fast path + outputValue = Integer.toString(intValues[0]); + else { + final StringBuilder sb = new StringBuilder(); + sb.append(intValues[0]); + for (int i = 1; i < intValues.length; i++) { + sb.append(','); + sb.append(intValues[i]); + } + outputValue = sb.toString(); + } + } else { + Object val = g.hasExtendedAttribute(field) ? g.getExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4; + + final VCFFormatHeaderLine metaData = this.header.getFormatHeaderLine(field); + if (metaData != null) { + final int numInFormatField = metaData.getCount(vc); + if (numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4)) { + // If we have a missing field but multiple values are expected, we need to construct a new string with all fields. + // For example, if Number=2, the string has to be ".,." + final StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4); + for (int i = 1; i < numInFormatField; i++) { + sb.append(','); + sb.append(VCFConstants.MISSING_VALUE_v4); + } + val = sb.toString(); + } + } + + // assume that if key is absent, then the given string encoding suffices + outputValue = formatVCFField(val); + } + } + + if (outputValue != null) + attrs.add(outputValue); + } + } + + // strip off trailing missing values + if (!outputTrailingFormatFields) { + for (int i = attrs.size() - 1; i >= 0; i--) { + if (isMissingValue(attrs.get(i))) attrs.remove(i); + else break; + } + } + + for (int i = 0; i < attrs.size(); i++) { + if (i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY)) { + builder.append(VCFConstants.GENOTYPE_FIELD_SEPARATOR); + } + builder.append(attrs.get(i)); + } + } + } + + /* + * Create the info string; assumes that no values are null + */ + private void writeInfoString(final Map infoFields, final StringBuilder builder) { + if (infoFields.isEmpty()) { + builder.append(VCFConstants.EMPTY_INFO_FIELD); + return; + } + + boolean isFirst = true; + for (final Map.Entry entry : infoFields.entrySet()) { + if (isFirst) isFirst = false; + else builder.append(VCFConstants.INFO_FIELD_SEPARATOR); + + builder.append(entry.getKey()); + + if (!entry.getValue().equals("")) { + final VCFInfoHeaderLine metaData = this.header.getInfoHeaderLine(entry.getKey()); + if (metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0) { + builder.append('='); + builder.append(entry.getValue()); + } + } + } + } + + public Map buildAlleleStrings(final VariantContext vc) { + final Map alleleMap = new HashMap(vc.getAlleles().size() + 1); + alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup + + final List alleles = vc.getAlleles(); + for (int i = 0; i < alleles.size(); i++) { + alleleMap.put(alleles.get(i), String.valueOf(i)); + } + + return alleleMap; + } + + private void writeAllele(final Allele allele, final Map alleleMap, final StringBuilder builder) { + final String encoding = alleleMap.get(allele); + if (encoding == null) + throw new RuntimeException("Allele " + allele + " is not an allele in the variant context"); + builder.append(encoding); + } } diff --git a/src/main/java/htsjdk/variant/vcf/VCFFileReader.java b/src/main/java/htsjdk/variant/vcf/VCFFileReader.java index 9024f34fc..d13387cd2 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFFileReader.java +++ b/src/main/java/htsjdk/variant/vcf/VCFFileReader.java @@ -129,14 +129,22 @@ public VCFHeader getFileHeader() { } /** Returns an iterator over all records in this VCF/BCF file. */ - public CloseableIterator iterator() { + @Override + public CloseableIterator iterator() { try { return reader.iterator(); } catch (final IOException ioe) { throw new TribbleException("Could not create an iterator from a feature reader.", ioe); } } - /** Queries for records within the region specified. */ + /** + * Queries for records overlapping the region specified. + * Note that this method requires VCF files with an associated index. If no index exists a TribbleException will be thrown. + * @param chrom the chomosome to query + * @param start query interval start + * @param end query interval end + * @return non-null iterator over VariantContexts + */ public CloseableIterator query(final String chrom, final int start, final int end) { try { return reader.query(chrom, start, end); } catch (final IOException ioe) { @@ -144,7 +152,8 @@ public VCFHeader getFileHeader() { } } - public void close() { + @Override + public void close() { try { this.reader.close(); } catch (final IOException ioe) { throw new TribbleException("Could not close a variant context feature reader.", ioe); diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java index c4c1e3bdf..ce12c4273 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java +++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java @@ -127,6 +127,7 @@ public int hashCode() { return result; } + @Override public int compareTo(Object other) { return toString().compareTo(other.toString()); } diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java index 071d815ca..3ac72b28c 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java +++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java @@ -67,6 +67,7 @@ * @param valueLine the line * @return a mapping of the tags parsed out */ + @Override public Map parseLine(String valueLine, List expectedTagOrder) { // our return map Map ret = new LinkedHashMap(); @@ -145,6 +146,7 @@ class VCF3Parser implements VCFLineParser { + @Override public Map parseLine(String valueLine, List expectedTagOrder) { // our return map Map ret = new LinkedHashMap(); diff --git a/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java index a5da687e6..1c36f9e95 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java +++ b/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java @@ -92,6 +92,7 @@ protected void initialize(String name, Map genericFields) { this.genericFields.putAll(genericFields); } + @Override protected String toStringEncoding() { Map map = new LinkedHashMap(); map.put("ID", name); @@ -121,6 +122,7 @@ public int hashCode() { return result; } + @Override public String getID() { return name; } diff --git a/src/main/java/htsjdk/variant/vcf/VCFUtils.java b/src/main/java/htsjdk/variant/vcf/VCFUtils.java index c8eceeab5..72f757105 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFUtils.java +++ b/src/main/java/htsjdk/variant/vcf/VCFUtils.java @@ -32,26 +32,23 @@ import java.io.File; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeMap; public class VCFUtils { public static Set smartMergeHeaders(final Collection headers, final boolean emitWarnings) throws IllegalStateException { // We need to maintain the order of the VCFHeaderLines, otherwise they will be scrambled in the returned Set. // This will cause problems for VCFHeader.getSequenceDictionary and anything else that implicitly relies on the line ordering. - final TreeMap map = new TreeMap(); // from KEY.NAME -> line + final LinkedHashMap map = new LinkedHashMap<>(); // from KEY.NAME -> line final HeaderConflictWarner conflictWarner = new HeaderConflictWarner(emitWarnings); // todo -- needs to remove all version headers from sources and add its own VCF version line for ( final VCFHeader source : headers ) { - //System.out.printf("Merging in header %s%n", source); for ( final VCFHeaderLine line : source.getMetaDataInSortedOrder()) { String key = line.getKey(); @@ -102,12 +99,11 @@ } } else { map.put(key, line); - //System.out.printf("Adding header line %s%n", line); } } } // returning a LinkedHashSet so that ordering will be preserved. Ensures the contig lines do not get scrambled. - return new LinkedHashSet(map.values()); + return new LinkedHashSet<>(map.values()); } /** diff --git a/src/test/java/htsjdk/HtsjdkTest.java b/src/test/java/htsjdk/HtsjdkTest.java new file mode 100644 index 000000000..4da626b7e --- /dev/null +++ b/src/test/java/htsjdk/HtsjdkTest.java @@ -0,0 +1,10 @@ +package htsjdk; + +import org.scalatest.testng.TestNGSuite; + +/** + * Base class for all Java tests in HTSJDK. + */ +public class HtsjdkTest extends TestNGSuite { + +} diff --git a/src/test/java/htsjdk/cram/io/ExternalCompressionTest.java b/src/test/java/htsjdk/cram/io/ExternalCompressionTest.java index 09f6e4905..60a65197d 100644 --- a/src/test/java/htsjdk/cram/io/ExternalCompressionTest.java +++ b/src/test/java/htsjdk/cram/io/ExternalCompressionTest.java @@ -1,16 +1,15 @@ -package htsjdk.samtools.cram.io; +package htsjdk.cram.io; -import org.apache.commons.compress.utils.IOUtils; +import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.io.ExternalCompression; import org.testng.Assert; import org.testng.annotations.Test; import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.nio.file.Files; -public class ExternalCompressionTest { +public class ExternalCompressionTest extends HtsjdkTest { public static final File BZIP2_FILE = new File("src/test/resources/htsjdk/samtools/cram/io/bzip2-test.bz2"); public static final byte [] TEST_BYTES = "This is a simple string to test BZip2".getBytes(); diff --git a/src/test/java/htsjdk/samtools/AbstractBAMFileIndexTest.java b/src/test/java/htsjdk/samtools/AbstractBAMFileIndexTest.java index 74c2dd7f2..cf451b86a 100644 --- a/src/test/java/htsjdk/samtools/AbstractBAMFileIndexTest.java +++ b/src/test/java/htsjdk/samtools/AbstractBAMFileIndexTest.java @@ -1,11 +1,12 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.seekablestream.SeekableStream; import org.testng.annotations.Test; import java.io.IOException; -public class AbstractBAMFileIndexTest { +public class AbstractBAMFileIndexTest extends HtsjdkTest { /** * @see https://github.com/samtools/htsjdk/issues/73 @@ -59,4 +60,4 @@ public int read() throws IOException { buffer.readInteger(); buffer.readBytes(new byte[10000]); } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/samtools/BAMCigarOverflowTest.java b/src/test/java/htsjdk/samtools/BAMCigarOverflowTest.java index dd630f937..8f91c6448 100644 --- a/src/test/java/htsjdk/samtools/BAMCigarOverflowTest.java +++ b/src/test/java/htsjdk/samtools/BAMCigarOverflowTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloserUtil; import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; @@ -10,7 +11,7 @@ * Test the fix of a bug reported by s-andrews in which the use of an arithmetic rather than a logical right shift in BinaryCigarCodec.binaryCigarToCigarElement() * causes an overflow in the CIGAR when reading a BAM file for a read that spans a very large intron. */ -public class BAMCigarOverflowTest { +public class BAMCigarOverflowTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); @Test diff --git a/src/test/java/htsjdk/samtools/BAMFileIndexTest.java b/src/test/java/htsjdk/samtools/BAMFileIndexTest.java index 170bc4726..0271ade37 100755 --- a/src/test/java/htsjdk/samtools/BAMFileIndexTest.java +++ b/src/test/java/htsjdk/samtools/BAMFileIndexTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.StopWatch; @@ -46,7 +47,7 @@ /** * Test BAM file indexing. */ -public class BAMFileIndexTest { +public class BAMFileIndexTest extends HtsjdkTest { private final File BAM_FILE = new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam"); private final boolean mVerbose = false; @@ -78,8 +79,7 @@ public void testSpecificQueries() } @Test(groups = {"slow"}) - public void testRandomQueries() - throws Exception { + public void testRandomQueries() throws Exception { runRandomTest(BAM_FILE, 1000, new Random()); } @@ -181,6 +181,21 @@ public void testQueryAlignmentStart() { CloserUtil.close(reader); } + @DataProvider(name = "queryIntervalsData") + public Object[][] queryIntervalsData(){ + return new Object[][] { + {true, 1}, + {false, 2} + }; + } + @Test(dataProvider = "queryIntervalsData") + public void testQueryIntervals(final boolean contained, final int expected) { + final SamReader reader = SamReaderFactory.makeDefault().enable().open(BAM_FILE); + + final CloseableIterator it = reader.query("chr1", 202661637, 202661812, contained); + Assert.assertEquals(countElements(it), expected); + } + @Test public void testQueryMate() { final SamReader reader = SamReaderFactory.makeDefault().open(BAM_FILE); diff --git a/src/test/java/htsjdk/samtools/BAMFileSpanTest.java b/src/test/java/htsjdk/samtools/BAMFileSpanTest.java new file mode 100644 index 000000000..06d1bc9ab --- /dev/null +++ b/src/test/java/htsjdk/samtools/BAMFileSpanTest.java @@ -0,0 +1,72 @@ +package htsjdk.samtools; + +import java.util.Arrays; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class BAMFileSpanTest extends HtsjdkTest { + @Test(dataProvider = "testRemoveContentsBeforeProvider") + public void testRemoveContentsBefore(BAMFileSpan originalSpan, BAMFileSpan cutoff, + BAMFileSpan expectedSpan) { + // only start value in cutoff is used + Assert.assertEquals( + ((BAMFileSpan) originalSpan.removeContentsBefore(cutoff)).getChunks(), + expectedSpan.getChunks()); + } + + @DataProvider(name = "testRemoveContentsBeforeProvider") + private Object[][] testRemoveContentsBeforeProvider() { + return new Object[][] { + { span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(6,0)), span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(7,0)), span(chunk(7,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(9,0)), span(chunk(9,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(10,0)), span(chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(11,0)), span(chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(12,0)), span(chunk(12,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(15,0)), span() }, + { span(chunk(6,10), chunk(11,15)), span(chunk(16,0)), span() }, + { span(chunk(6,10), chunk(11,15)), span(chunk(6,10), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(16,17), chunk(18,19)), span() }, + }; + } + + @Test(dataProvider = "testRemoveContentsAfterProvider") + public void testRemoveContentsAfter(BAMFileSpan originalSpan, BAMFileSpan cutoff, + BAMFileSpan expectedSpan) { + // only end value in cutoff is used + Assert.assertEquals( + ((BAMFileSpan) originalSpan.removeContentsAfter(cutoff)).getChunks(), + expectedSpan.getChunks()); + } + + @DataProvider(name = "testRemoveContentsAfterProvider") + private Object[][] testRemoveContentsAfterProvider() { + return new Object[][] { + { span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,6)), span() }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,7)), span(chunk(6,7)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,9)), span(chunk(6,9)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,10)), span(chunk(6,10)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,11)), span(chunk(6,10)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,12)), span(chunk(6,10), chunk(11,12)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,15)), span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,16)), span(chunk(6,10), chunk(11,15)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,10)), span(chunk(6, 10)) }, + { span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) }, + }; + } + + private BAMFileSpan span(Chunk... chunks) { + return new BAMFileSpan(Arrays.asList(chunks)); + } + + private Chunk chunk(long start, long end) { + return new Chunk(start, end); + } +} diff --git a/src/test/java/htsjdk/samtools/BAMFileWriterTest.java b/src/test/java/htsjdk/samtools/BAMFileWriterTest.java index a8944d0de..3bb46e6f7 100644 --- a/src/test/java/htsjdk/samtools/BAMFileWriterTest.java +++ b/src/test/java/htsjdk/samtools/BAMFileWriterTest.java @@ -23,19 +23,24 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; +import htsjdk.samtools.util.SequenceUtil; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.IOException; /** * Test that BAM writing doesn't blow up. For presorted writing, the resulting BAM file is read and contents are * compared with the original SAM file. */ -public class BAMFileWriterTest { +public class BAMFileWriterTest extends HtsjdkTest { private SAMRecordSetBuilder getRecordSetBuilder(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) { final SAMRecordSetBuilder ret = new SAMRecordSetBuilder(sortForMe, sortOrder); @@ -189,4 +194,39 @@ public void testNegativePresorted() throws Exception { testHelper(getRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate), SAMFileHeader.SortOrder.queryname, true); Assert.fail("Exception should be thrown"); } + + + /** + * A test to check that BAM changes read bases according with {@link SequenceUtil#toBamReadBasesInPlace}. + */ + @Test + public void testBAMReadBases() throws IOException { + final SAMFileHeader header = new SAMFileHeader(); + header.addSequence(new SAMSequenceRecord("1", SequenceUtil.getIUPACCodesString().length())); + header.addReadGroup(new SAMReadGroupRecord("rg1")); + + final SAMRecord originalSAMRecord = new SAMRecord(header); + originalSAMRecord.setReadName("test"); + originalSAMRecord.setReferenceIndex(0); + originalSAMRecord.setAlignmentStart(1); + originalSAMRecord.setReadBases(SequenceUtil.getIUPACCodesString().getBytes()); + originalSAMRecord.setCigarString(originalSAMRecord.getReadLength() + "M"); + originalSAMRecord.setBaseQualities(SAMRecord.NULL_QUALS); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (final BAMFileWriter writer = new BAMFileWriter(baos, null)) { + writer.setHeader(header); + writer.addAlignment(originalSAMRecord); + } + + + final BAMFileReader reader = new BAMFileReader(new ByteArrayInputStream(baos.toByteArray()), null, true, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory()); + final CloseableIterator iterator = reader.getIterator(); + iterator.hasNext(); + final SAMRecord recordFromBAM = iterator.next(); + + Assert.assertNotEquals(recordFromBAM.getReadBases(), originalSAMRecord.getReadBases()); + Assert.assertEquals(recordFromBAM.getReadBases(), SequenceUtil.toBamReadBasesInPlace(originalSAMRecord.getReadBases())); + } + } diff --git a/src/test/java/htsjdk/samtools/BAMIndexWriterTest.java b/src/test/java/htsjdk/samtools/BAMIndexWriterTest.java index 09f92360e..db9ccb957 100644 --- a/src/test/java/htsjdk/samtools/BAMIndexWriterTest.java +++ b/src/test/java/htsjdk/samtools/BAMIndexWriterTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.IOUtil; import org.testng.annotations.DataProvider; @@ -38,7 +39,7 @@ /** * Test BAM file index creation */ -public class BAMIndexWriterTest { +public class BAMIndexWriterTest extends HtsjdkTest { // Two input files for basic test private final String BAM_FILE_LOCATION = "src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam"; private final String BAI_FILE_LOCATION = "src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.bai"; diff --git a/src/test/java/htsjdk/samtools/BAMIteratorTest.java b/src/test/java/htsjdk/samtools/BAMIteratorTest.java index 5fa9e7dc4..6fa67cd9f 100644 --- a/src/test/java/htsjdk/samtools/BAMIteratorTest.java +++ b/src/test/java/htsjdk/samtools/BAMIteratorTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; import org.testng.Assert; @@ -34,7 +35,7 @@ /** * @author alecw@broadinstitute.org */ -public class BAMIteratorTest { +public class BAMIteratorTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); @Test(dataProvider = "dataProvider") diff --git a/src/test/java/htsjdk/samtools/BAMQueryMultipleIntervalsIteratorFilterTest.java b/src/test/java/htsjdk/samtools/BAMQueryMultipleIntervalsIteratorFilterTest.java index 7c0bb1fc2..d25e7ba65 100644 --- a/src/test/java/htsjdk/samtools/BAMQueryMultipleIntervalsIteratorFilterTest.java +++ b/src/test/java/htsjdk/samtools/BAMQueryMultipleIntervalsIteratorFilterTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -7,7 +8,7 @@ import java.util.Arrays; import java.util.Random; -public class BAMQueryMultipleIntervalsIteratorFilterTest { +public class BAMQueryMultipleIntervalsIteratorFilterTest extends HtsjdkTest { private final byte[] BASES = {'A', 'C', 'G', 'T'}; private final Random random = new Random(); diff --git a/src/test/java/htsjdk/samtools/BAMRemoteFileTest.java b/src/test/java/htsjdk/samtools/BAMRemoteFileTest.java index 4b686cf04..dccfddcac 100644 --- a/src/test/java/htsjdk/samtools/BAMRemoteFileTest.java +++ b/src/test/java/htsjdk/samtools/BAMRemoteFileTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.TestUtil; import org.testng.annotations.Test; @@ -40,7 +41,7 @@ /** * Test BAM file indexing. */ -public class BAMRemoteFileTest { +public class BAMRemoteFileTest extends HtsjdkTest { private final File BAM_INDEX_FILE = new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.bai"); private final File BAM_FILE = new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam"); private final String BAM_URL_STRING = TestUtil.BASE_URL_FOR_HTTP_TESTS + "index_test.bam"; diff --git a/src/test/java/htsjdk/samtools/BinTest.java b/src/test/java/htsjdk/samtools/BinTest.java index 271a41101..6009ed37c 100644 --- a/src/test/java/htsjdk/samtools/BinTest.java +++ b/src/test/java/htsjdk/samtools/BinTest.java @@ -24,12 +24,13 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; import java.util.Collections; -public class BinTest { +public class BinTest extends HtsjdkTest { @Test public void testEmptyBin() { // Construct a new empty bin and ensure that the bin list is empty, not null. diff --git a/src/test/java/htsjdk/samtools/CRAMBAIIndexerTest.java b/src/test/java/htsjdk/samtools/CRAMBAIIndexerTest.java index 6f3b95459..ce32e7a8b 100644 --- a/src/test/java/htsjdk/samtools/CRAMBAIIndexerTest.java +++ b/src/test/java/htsjdk/samtools/CRAMBAIIndexerTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.build.ContainerFactory; import htsjdk.samtools.cram.structure.Container; import htsjdk.samtools.cram.structure.CramCompressionRecord; @@ -17,7 +18,7 @@ /** * Created by vadim on 12/01/2016. */ -public class CRAMBAIIndexerTest { +public class CRAMBAIIndexerTest extends HtsjdkTest { private static CramCompressionRecord createRecord(int recordIndex, int seqId, int start) { byte[] bases = "AAAAA".getBytes(); diff --git a/src/test/java/htsjdk/samtools/CRAMCRAIIndexerTest.java b/src/test/java/htsjdk/samtools/CRAMCRAIIndexerTest.java index 11d2f3ce9..c5a9634d4 100644 --- a/src/test/java/htsjdk/samtools/CRAMCRAIIndexerTest.java +++ b/src/test/java/htsjdk/samtools/CRAMCRAIIndexerTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAIEntry; import htsjdk.samtools.cram.build.CramContainerIterator; import htsjdk.samtools.cram.ref.ReferenceSource; @@ -17,12 +18,12 @@ * Companion to CRAMBAIIndexerTest, for testing CRAI indices created on cram * streams; */ -public class CRAMCRAIIndexerTest { +public class CRAMCRAIIndexerTest extends HtsjdkTest { @Test public void testCRAIIndexerFromContainer() throws IOException { final File CRAMFile = new File("src/test/resources/htsjdk/samtools/cram/test2.cram"); - final File refFile = new File("src/test/resources/htsjdk/samtools/cram/test2.fa"); + final File refFile = new File("src/test/resources/htsjdk/samtools/cram/auxf.fa"); ReferenceSource refSource = new ReferenceSource(refFile); CRAMFileReader reader = new CRAMFileReader( CRAMFile, @@ -55,7 +56,7 @@ public void testCRAIIndexerFromContainer() throws IOException { @Test public void testCRAIIndexerFromStream() throws IOException { final File CRAMFile = new File("src/test/resources/htsjdk/samtools/cram/test2.cram"); - final File refFile = new File("src/test/resources/htsjdk/samtools/cram/test2.fa"); + final File refFile = new File("src/test/resources/htsjdk/samtools/cram/auxf.fa"); ReferenceSource refSource = new ReferenceSource(refFile); // get the header to use @@ -180,4 +181,4 @@ private long getIteratorCount(Iterator it) { return count; } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/samtools/CRAMComplianceTest.java b/src/test/java/htsjdk/samtools/CRAMComplianceTest.java index 81cd2f916..2f6ebd5c3 100644 --- a/src/test/java/htsjdk/samtools/CRAMComplianceTest.java +++ b/src/test/java/htsjdk/samtools/CRAMComplianceTest.java @@ -1,60 +1,119 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.build.CramIO; import htsjdk.samtools.cram.common.CramVersions; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.Log; + +import htsjdk.samtools.util.SequenceUtil; import org.testng.Assert; import org.testng.annotations.BeforeTest; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; +import java.io.*; + +import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.List; /** * Created by vadim on 28/04/2015. */ -public class CRAMComplianceTest { +public class CRAMComplianceTest extends HtsjdkTest { - @DataProvider(name = "test1") - public Object[][] createData1() { - return new Object[][]{ - {"auxf#values"}, + @FunctionalInterface + public interface TriConsumer { + abstract void accept(T1 arg1, T2 arg2, T3 arg3); + } + + // The files in this provider expose a defect in CRAM conversion of one kind or another + // so the tests are executed using partial verification + @DataProvider(name = "partialVerification") + public Object[][] getPartialVerificationData() { + return new Object[][] { + {"auxf#values"}, // unsigned attributes: https://github.com/samtools/htsjdk/issues/499 + {"c1#noseq"}, // unsigned attributes: https://github.com/samtools/htsjdk/issues/499 + {"c1#unknown"}, // unsigned attributes: https://github.com/samtools/htsjdk/issues/499 + {"ce#5b"}, // reads with no read bases: https://github.com/samtools/htsjdk/issues/509 + {"ce#tag_depadded"},// reads with no read bases: https://github.com/samtools/htsjdk/issues/509 + {"ce#tag_padded"}, // reads with no read bases: https://github.com/samtools/htsjdk/issues/509 + {"ce#unmap"}, // unmapped reads with non-zero MAPQ value that is not restored + // https://github.com/samtools/htsjdk/issues/714 + {"xx#triplet"}, // the version 2.1 variant of this file has a bad insertSize, which is + // probably residual detritus from https://github.com/samtools/htsjdk/issues/364 + {"xx#minimal"}, // cigar string "5H0M5H" is restored as "10H" + // https://github.com/samtools/htsjdk/issues/713 + }; + } + + @Test(dataProvider = "partialVerification") + public void partialVerificationTest(String name) throws IOException { + // do compliance test with partial validation to work around known limitations + doComplianceTest(name, this::assertSameRecordsPartial); + } + + // Files that can be subjected to full SAMRecord equality after conversion + @DataProvider(name = "fullVerification") + public Object[][] getFullVerificationData() { + return new Object[][] { {"c1#bounds"}, {"c1#clip"}, - {"c1#noseq"}, {"c1#pad1"}, {"c1#pad2"}, {"c1#pad3"}, - {"c1#unknown"}, {"ce#1"}, {"ce#2"}, - {"ce#5b"}, {"ce#5"}, {"ce#large_seq"}, {"ce#supp"}, - {"ce#tag_depadded"}, - {"ce#tag_padded"}, {"ce#unmap1"}, {"ce#unmap2"}, - {"ce#unmap"}, {"xx#blank"}, {"xx#large_aux2"}, {"xx#large_aux"}, - {"xx#minimal"}, {"xx#pair"}, {"xx#rg"}, - {"xx#triplet"}, {"xx#unsorted"}, }; } + @Test(dataProvider = "fullVerification") + public void fullVerificationTest(String name) throws IOException { + doComplianceTest(name, (version, expected, actual) -> Assert.assertEquals(expected, actual)); + } + + // Files that can be subjected to full verification only after read base normalization, because either + // the reference or the reads contain ambiguity codes that are normalized by SequenceUtil.toBamReadBasesInPlace + // during the round-trip process. + @DataProvider(name = "ambiguityCodeVerification") + public Object[][] getAmbiguityCodeVerificationData() { + return new Object[][]{ + {"amb#amb"} + }; + } + + @Test(dataProvider = "ambiguityCodeVerification") + public void ambiguityCodeVerificationTest(String name) throws IOException { + doComplianceTest(name, + (version, expected, actual) -> + { + if (expected.getReadString().equals(actual.getReadString())) { + Assert.assertEquals(expected, actual); + } else { + // tolerate BAM and CRAM conversion of read bases to upper case IUPAC codes by + // creating a deep copy of the expected reads and normalizing (upper case IUPAC) + // the bases; then proceeding with the full compare with the actual + SAMRecord expectedNormalized = actual.deepCopy(); + final byte[] expectedBases = expectedNormalized.getReadBases(); + SequenceUtil.toBamReadBasesInPlace(expectedBases); + Assert.assertEquals(actual, expectedNormalized); + } + } + ); + } @BeforeTest public void beforeTest() { @@ -75,62 +134,60 @@ public TestCase(File root, String name) { } } - @Test(dataProvider = "test1") - public void test(String name) throws IOException { + private void doComplianceTest( + final String name, + final TriConsumer assertFunction) throws IOException { TestCase t = new TestCase(new File("src/test/resources/htsjdk/samtools/cram/"), name); - ReferenceSource source = new ReferenceSource(t.refFile); - SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(t.bamFile); - - final SAMRecordIterator samRecordIterator = reader.iterator(); - List samRecords = new ArrayList(); - while (samRecordIterator.hasNext()) - samRecords.add(samRecordIterator.next()); - SAMFileHeader samFileHeader = reader.getFileHeader(); - reader.close(); + // retrieve all records from the original file + List samRecords = getSAMRecordsFromFile(t.bamFile, t.refFile); + SAMFileHeader samFileHeader = getFileHeader(t.bamFile, t.refFile); + // write them to cram stream ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ReferenceSource source = new ReferenceSource(t.refFile); CRAMFileWriter cramFileWriter = new CRAMFileWriter(baos, source, samFileHeader, name); for (SAMRecord samRecord : samRecords) { cramFileWriter.addAlignment(samRecord); } cramFileWriter.close(); - - CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream)null, source, ValidationStringency.SILENT); + // read them back from the stream and compare to original sam via assertSameRecords + CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream) null, source, ValidationStringency.SILENT); SAMRecordIterator cramFileReaderIterator = cramFileReader.getIterator(); for (SAMRecord samRecord : samRecords) { Assert.assertTrue(cramFileReaderIterator.hasNext()); SAMRecord restored = cramFileReaderIterator.next(); Assert.assertNotNull(restored); - assertSameRecords(CramVersions.DEFAULT_CRAM_VERSION.major, samRecord, restored); + assertFunction.accept(CramVersions.DEFAULT_CRAM_VERSION.major, samRecord, restored); } Assert.assertFalse(cramFileReaderIterator.hasNext()); //v2.1 test - cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_21), (SeekableStream)null, source, ValidationStringency.SILENT); + cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_21), (SeekableStream) null, source, ValidationStringency.SILENT); cramFileReaderIterator = cramFileReader.getIterator(); for (SAMRecord samRecord : samRecords) { Assert.assertTrue(cramFileReaderIterator.hasNext()); SAMRecord restored = cramFileReaderIterator.next(); Assert.assertNotNull(restored); - assertSameRecords(CramVersions.CRAM_v2_1.major, samRecord, restored); + assertFunction.accept(CramVersions.CRAM_v2_1.major, samRecord, restored); } Assert.assertFalse(cramFileReaderIterator.hasNext()); //v3.0 test - cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_30), (SeekableStream)null, source, ValidationStringency.SILENT); + cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_30), (SeekableStream) null, source, ValidationStringency.SILENT); cramFileReaderIterator = cramFileReader.getIterator(); for (SAMRecord samRecord : samRecords) { Assert.assertTrue(cramFileReaderIterator.hasNext()); SAMRecord restored = cramFileReaderIterator.next(); Assert.assertNotNull(restored); - assertSameRecords(CramVersions.CRAM_v3.major, samRecord, restored); + assertFunction.accept(CramVersions.CRAM_v3.major, samRecord, restored); } Assert.assertFalse(cramFileReaderIterator.hasNext()); } - private void assertSameRecords(int majorVersion, SAMRecord record1, SAMRecord record2) { + private void assertSameRecordsPartial(Integer majorVersion, SAMRecord record1, SAMRecord record2) { + // test a partial set of fields for equality, avoiding known CRAM conversion issues Assert.assertEquals(record2.getFlags(), record1.getFlags()); Assert.assertEquals(record2.getReadName(), record1.getReadName()); Assert.assertEquals(record2.getReferenceName(), record1.getReferenceName()); @@ -139,12 +196,144 @@ private void assertSameRecords(int majorVersion, SAMRecord record1, SAMRecord re /** * Known issue: CRAM v2.1 doesn't handle reads with missing bases correctly. This * causes '*' bases to arise when reading CRAM. Skipping the base comparison asserts. + * https://github.com/samtools/htsjdk/issues/509 */ if (record1.getReadBases() != SAMRecord.NULL_SEQUENCE || majorVersion >= CramVersions.CRAM_v3.major) { - Assert.assertEquals(record2.getReadBases(), record1.getReadBases()); + // BAM and CRAM convert read bases to upper case IUPAC codes + final byte[] originalBases = record1.getReadBases(); + SequenceUtil.toBamReadBasesInPlace(originalBases); + Assert.assertEquals(record2.getReadBases(), originalBases); } Assert.assertEquals(record2.getBaseQualities(), record1.getBaseQualities()); } + @DataProvider(name = "CRAMSourceFiles") + public Object[][] getCRAMSources() { + final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/cram"); + + return new Object[][] { + // Test cram file created with samtools using a *reference* that contains ambiguity codes + // 'R' and 'M', a single no call '.', and some lower case bases. + {new File(TEST_DATA_DIR, "samtoolsSliceMD5WithAmbiguityCodesTest.cram"), + new File(TEST_DATA_DIR, "ambiguityCodes.fasta")}, + {new File(TEST_DATA_DIR, "NA12878.20.21.1-100.100-SeqsPerSlice.0-unMapped.cram"), + new File(TEST_DATA_DIR, "human_g1k_v37.20.21.1-100.fasta")}, + {new File(TEST_DATA_DIR, "NA12878.20.21.1-100.100-SeqsPerSlice.1-unMapped.cram"), + new File(TEST_DATA_DIR, "human_g1k_v37.20.21.1-100.fasta")}, + {new File(TEST_DATA_DIR, "NA12878.20.21.1-100.100-SeqsPerSlice.500-unMapped.cram"), + new File(TEST_DATA_DIR, "human_g1k_v37.20.21.1-100.fasta")}, + {new File(TEST_DATA_DIR, "test.cram"), new File(TEST_DATA_DIR, "auxf.fa")}, + {new File(TEST_DATA_DIR, "test2.cram"), new File(TEST_DATA_DIR, "auxf.fa")}, + }; + } + + @Test(dataProvider = "CRAMSourceFiles") + public void testCRAMThroughBAMRoundTrip(final File originalCRAMFile, final File referenceFile) throws IOException { + + // retrieve all records from the cram and make defensive deep copies + List originalCRAMRecords = getSAMRecordsFromFile(originalCRAMFile, referenceFile); + List copiedCRAMRecords = new ArrayList<>(); + originalCRAMRecords.forEach(origRec -> copiedCRAMRecords.add(origRec.deepCopy())); + + // write copies of the CRAM records to a BAM, and then read them back in + final File tempBamFile = File.createTempFile("testCRAMToBAMToCRAM", BamFileIoUtils.BAM_FILE_EXTENSION); + tempBamFile.deleteOnExit(); + SAMFileHeader samHeader = getFileHeader(originalCRAMFile, referenceFile); + writeRecordsToFile(copiedCRAMRecords, tempBamFile, referenceFile, samHeader); + List bamRecords = getSAMRecordsFromFile(tempBamFile, referenceFile); + + // compare to originals + int i = 0; + for (SAMRecord rec : bamRecords) { + rec.setIndexingBin(null); + Assert.assertTrue(rec.equals(originalCRAMRecords.get(i++))); + } + Assert.assertEquals(i, originalCRAMRecords.size()); + + // write the BAM records to a CRAM and read them back in + final File tempCRAMFile = File.createTempFile("testCRAMToBAMToCRAM", CramIO.CRAM_FILE_EXTENSION); + tempCRAMFile.deleteOnExit(); + writeRecordsToFile(bamRecords, tempCRAMFile, referenceFile, samHeader); + List roundTripCRAMRecords = getSAMRecordsFromFile(tempCRAMFile, referenceFile); + + // compare to originals + i = 0; + for (SAMRecord rec : roundTripCRAMRecords) { + Assert.assertTrue(rec.equals(originalCRAMRecords.get(i++))); + } + Assert.assertEquals(i, originalCRAMRecords.size()); + } + + @Test + public void testBAMThroughCRAMRoundTrip() throws IOException, NoSuchAlgorithmException { + final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/cram"); + + // These files are reduced versions of the CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam and human_g1k_v37.20.21.fasta + // files used in GATK4 tests. The first 8000 records from chr20 were extracted; from those around 80 placed but + // unmapped reads that contained cigar elements were removed, along with one read who's mate was on chr21. + // Finally all read positions were remapped to the subsetted reference file, which contains only the ~9000 bases + // used by the reduced read set. + final File originalBAMInputFile = new File(TEST_DATA_DIR, "CEUTrio.HiSeq.WGS.b37.NA12878.20.first.8000.bam"); + final File referenceFile = new File(TEST_DATA_DIR, "human_g1k_v37.20.subset.fasta"); + + // retrieve all records from the bam and reset the indexing bins to keep comparisons with + // cram records from failing + List originalBAMRecords = getSAMRecordsFromFile(originalBAMInputFile, referenceFile); + for (int i = 0; i < originalBAMRecords.size(); i++) { + originalBAMRecords.get(i).setIndexingBin(null); + } + + // write the BAM records to a temporary CRAM + final File tempCRAMFile = File.createTempFile("testBAMThroughCRAMRoundTrip", CramIO.CRAM_FILE_EXTENSION); + tempCRAMFile.deleteOnExit(); + SAMFileHeader samHeader = getFileHeader(originalBAMInputFile, referenceFile); + writeRecordsToFile(originalBAMRecords, tempCRAMFile, referenceFile, samHeader); + + // read the CRAM records back in and compare to the original BAM records + List cramRecords = getSAMRecordsFromFile(tempCRAMFile, referenceFile); + Assert.assertEquals(cramRecords.size(), originalBAMRecords.size()); + for (int i = 0; i < originalBAMRecords.size(); i++) { + Assert.assertEquals(originalBAMRecords.get(i), cramRecords.get(i)); + } + } + + private SAMFileHeader getFileHeader(final File sourceFile, final File referenceFile) throws IOException { + try (final SamReader reader = SamReaderFactory.make() + .validationStringency(ValidationStringency.SILENT) + .referenceSequence(referenceFile).open(sourceFile)) { + return reader.getFileHeader(); + } + } + + private List getSAMRecordsFromFile(final File sourceFile, final File referenceFile) throws IOException { + List recs = new ArrayList<>(); + try (SamReader reader = SamReaderFactory.make() + .validationStringency(ValidationStringency.SILENT) + .referenceSequence(referenceFile).open(sourceFile)) + { + for (SAMRecord rec : reader) { + recs.add(rec); + } + } + return recs; + } + + private void writeRecordsToFile ( + final List recs, + final File targetFile, + final File referenceFile, + final SAMFileHeader samHeader) { + + // NOTE: even when the input is coord-sorted, using assumePresorted=false will cause some + // tests to fail since it can change the order of some unmapped reads - AFAICT this is allowed + // by the spec since the order is arbitrary for unmapped + try (final SAMFileWriter writer = new SAMFileWriterFactory() + .makeWriter(samHeader, true, targetFile, referenceFile)) { + for (SAMRecord rec : recs) { + writer.addAlignment(rec); + } + } + } + } diff --git a/src/test/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java b/src/test/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java index b26f4b06b..9ab9ed278 100644 --- a/src/test/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java +++ b/src/test/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.reference.InMemoryReferenceSequenceFile; import htsjdk.samtools.seekablestream.SeekableMemoryStream; @@ -23,7 +24,7 @@ import java.util.Collections; import java.util.List; -public class CRAMContainerStreamWriterTest { +public class CRAMContainerStreamWriterTest extends HtsjdkTest { @BeforeClass public void initClass() { diff --git a/src/test/java/htsjdk/samtools/CRAMEdgeCasesTest.java b/src/test/java/htsjdk/samtools/CRAMEdgeCasesTest.java index e77e0e8dc..4fa9b1a59 100644 --- a/src/test/java/htsjdk/samtools/CRAMEdgeCasesTest.java +++ b/src/test/java/htsjdk/samtools/CRAMEdgeCasesTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.reference.InMemoryReferenceSequenceFile; @@ -20,7 +21,7 @@ /** * A collection of CRAM test based on round trip comparison of SAMRecord before and after CRAM compression. */ -public class CRAMEdgeCasesTest { +public class CRAMEdgeCasesTest extends HtsjdkTest { @BeforeTest public void beforeTest() { diff --git a/src/test/java/htsjdk/samtools/CRAMFileBAIIndexTest.java b/src/test/java/htsjdk/samtools/CRAMFileBAIIndexTest.java index eba2b4cb7..32160920b 100644 --- a/src/test/java/htsjdk/samtools/CRAMFileBAIIndexTest.java +++ b/src/test/java/htsjdk/samtools/CRAMFileBAIIndexTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.build.ContainerParser; import htsjdk.samtools.cram.build.CramContainerIterator; import htsjdk.samtools.cram.ref.ReferenceSource; @@ -32,7 +33,7 @@ * The scan* tests check that for every records in the BAM file the query returns the same records from the CRAM file. * Created by Vadim on 14/03/2015. */ -public class CRAMFileBAIIndexTest { +public class CRAMFileBAIIndexTest extends HtsjdkTest { private final File BAM_FILE = new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam"); private File cramFile; private File indexFile; diff --git a/src/test/java/htsjdk/samtools/CRAMFileCRAIIndexTest.java b/src/test/java/htsjdk/samtools/CRAMFileCRAIIndexTest.java index 9084a0fc5..b919c4619 100644 --- a/src/test/java/htsjdk/samtools/CRAMFileCRAIIndexTest.java +++ b/src/test/java/htsjdk/samtools/CRAMFileCRAIIndexTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.build.ContainerParser; import htsjdk.samtools.cram.build.CramContainerIterator; import htsjdk.samtools.cram.ref.ReferenceSource; @@ -29,7 +30,8 @@ * file as the source of the test data. The scan* tests check that for every records in the * CRAM file the query returns the same records from the CRAM file. */ -public class CRAMFileCRAIIndexTest { +@Test(singleThreaded = true) +public class CRAMFileCRAIIndexTest extends HtsjdkTest { private final File BAM_FILE = new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam"); private final int nofReads = 10000 ; diff --git a/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java b/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java index 3fcb3bdc9..da53f170c 100644 --- a/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java +++ b/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.reference.InMemoryReferenceSequenceFile; import htsjdk.samtools.seekablestream.SeekableFileStream; @@ -40,7 +41,7 @@ /** * Additional tests for CRAMFileReader are in CRAMFileIndexTest */ -public class CRAMFileReaderTest { +public class CRAMFileReaderTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); private static final File CRAM_WITH_CRAI = new File(TEST_DATA_DIR, "cram_with_crai_index.cram"); @@ -82,7 +83,7 @@ public void testCRAMReader2ReferenceRequired() { @Test(description = "Test CRAMReader 2 input required", expectedExceptions = IllegalArgumentException.class) public void testCRAMReader2_InputRequired() { File file = null; - InputStream bis = null; + InputStream bis = null; new CRAMFileReader(file, bis, createReferenceSource()); } diff --git a/src/test/java/htsjdk/samtools/CRAMFileWriterTest.java b/src/test/java/htsjdk/samtools/CRAMFileWriterTest.java index c49552838..bd3a5ab5b 100644 --- a/src/test/java/htsjdk/samtools/CRAMFileWriterTest.java +++ b/src/test/java/htsjdk/samtools/CRAMFileWriterTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.reference.InMemoryReferenceSequenceFile; import htsjdk.samtools.util.Log; @@ -40,7 +41,7 @@ import java.util.Collections; import java.util.List; -public class CRAMFileWriterTest { +public class CRAMFileWriterTest extends HtsjdkTest { @BeforeClass public void initClass() { @@ -248,6 +249,7 @@ public void test_roundtrip_tlen_preserved() throws IOException { SAMRecord record1 = iterator.next(); SAMRecord record2 = records.get(i++); Assert.assertEquals(record1.getInferredInsertSize(), record2.getInferredInsertSize(), record1.getReadName()); + Assert.assertEquals(record1, record2, record1.getReadName()); } Assert.assertEquals(records.size(), i); } diff --git a/src/test/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java b/src/test/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java index b7e3eab0b..b7facb6cb 100644 --- a/src/test/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java +++ b/src/test/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAIIndex; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.reference.InMemoryReferenceSequenceFile; @@ -22,7 +23,7 @@ /** * Created by vadim on 23/03/2015. */ -public class CRAMFileWriterWithIndexTest { +public class CRAMFileWriterWithIndexTest extends HtsjdkTest { private byte[] cramBytes; private byte[] indexBytes; private InMemoryReferenceSequenceFile rsf; diff --git a/src/test/java/htsjdk/samtools/CRAMIndexQueryTest.java b/src/test/java/htsjdk/samtools/CRAMIndexQueryTest.java index df9431071..845433f9f 100644 --- a/src/test/java/htsjdk/samtools/CRAMIndexQueryTest.java +++ b/src/test/java/htsjdk/samtools/CRAMIndexQueryTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.util.CloseableIterator; import org.testng.Assert; @@ -42,7 +43,7 @@ * whatever index format (.bai or .crai converted to .bai) is available for the * target file. */ -public class CRAMIndexQueryTest { +public class CRAMIndexQueryTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/cram"); diff --git a/src/test/java/htsjdk/samtools/CRAMSliceMD5Test.java b/src/test/java/htsjdk/samtools/CRAMSliceMD5Test.java new file mode 100644 index 000000000..40568c426 --- /dev/null +++ b/src/test/java/htsjdk/samtools/CRAMSliceMD5Test.java @@ -0,0 +1,136 @@ +package htsjdk.samtools; + +import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.build.CramIO; +import htsjdk.samtools.cram.ref.CRAMReferenceSource; +import htsjdk.samtools.cram.ref.ReferenceSource; +import htsjdk.samtools.cram.structure.Container; +import htsjdk.samtools.cram.structure.ContainerIO; +import htsjdk.samtools.cram.structure.CramHeader; +import htsjdk.samtools.cram.structure.Slice; +import htsjdk.samtools.reference.InMemoryReferenceSequenceFile; +import htsjdk.samtools.util.SequenceUtil; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.util.Arrays; + +/** + * Created by vadim on 03/07/2017. + */ +public class CRAMSliceMD5Test { + + @Test + public void testSliceMD5() throws IOException { + final CramTestCase test = new CramTestCase(); + + // read the CRAM: + final ByteArrayInputStream bais = new ByteArrayInputStream(test.cramData); + final CramHeader cramHeader = CramIO.readCramHeader(bais); + final Container container = ContainerIO.readContainer(cramHeader.getVersion(), bais); + final Slice slice = container.slices[0]; + Assert.assertEquals(slice.alignmentStart, 1); + Assert.assertEquals(slice.alignmentSpan, test.referenceBases.length); + // check the slice MD5 is the MD5 of upper-cased ref bases: + final byte[] ucRefMD5 = SequenceUtil.calculateMD5(test.refBasesFromUCSource, 0, test.refBasesFromUCSource.length); + Assert.assertEquals(slice.refMD5, ucRefMD5); + + // check the CRAM file reads: + final CRAMFileReader reader = new CRAMFileReader(new ByteArrayInputStream(test.cramData), (File) null, test.referenceSourceUpperCased, ValidationStringency.STRICT); + final SAMRecordIterator iterator = reader.getIterator(); + Assert.assertTrue(iterator.hasNext()); + Assert.assertEquals(iterator.next(), test.record); + } + + @Test(expectedExceptions = CRAMException.class) + public void testExceptionWhileReadingWithWrongReference() throws IOException { + final CramTestCase test = new CramTestCase(); + + // try reading the CRAM file with the incorrect ref source that does not upper case bases: + final CRAMFileReader reader = new CRAMFileReader(new ByteArrayInputStream(test.cramData), (File) null, test.referenceSourceMixedCase, ValidationStringency.STRICT); + final SAMRecordIterator iterator = reader.getIterator(); + // expect an exception here due to slice MD5 mismatch: + iterator.hasNext(); + } + + + /** + * A test case to demonstrate the effect of upper casing of reference bases. + * The class contains some assertions in the constructor to stress out reference bases case expectations. + */ + private static class CramTestCase { + private final byte[] referenceBases; + private final byte[] referenceBasesUpperCased; + private final SAMFileHeader samFileHeader; + /** + * An invalid reference source that does not change bases: + */ + private final CRAMReferenceSource referenceSourceMixedCase; + private final InMemoryReferenceSequenceFile memoryReferenceSequenceFile; + /** + * A valid reference source that uppercases reference bases: + */ + private final ReferenceSource referenceSourceUpperCased; + private final byte[] refBasesFromUCSource; + private final byte[] refBasesFromMixedCaseSource; + private final SAMRecord record; + private final byte[] cramData; + + private CramTestCase() { + referenceBases = SequenceUtil.getIUPACCodesString().getBytes(); + referenceBasesUpperCased = SequenceUtil.upperCase(Arrays.copyOf(referenceBases, referenceBases.length)); + + samFileHeader = new SAMFileHeader(); + samFileHeader.addSequence(new SAMSequenceRecord("1", referenceBases.length)); + samFileHeader.addReadGroup(new SAMReadGroupRecord("rg1")); + + // this source does not change ref bases: + referenceSourceMixedCase = (sequenceRecord, tryNameVariants) -> referenceBases; + + memoryReferenceSequenceFile = new InMemoryReferenceSequenceFile(); + // copy ref bases to avoid the original from upper casing: + memoryReferenceSequenceFile.add("1", Arrays.copyOf(referenceBases, referenceBases.length)); + // this is the correct reference source, it upper cases ref bases: + referenceSourceUpperCased = new ReferenceSource(memoryReferenceSequenceFile); + + refBasesFromUCSource = referenceSourceUpperCased.getReferenceBases(samFileHeader.getSequence(0), true); + // check the ref bases from the source are upper cased indeed: + Assert.assertEquals(refBasesFromUCSource, referenceBasesUpperCased); + // check there is no lower case A: + Assert.assertTrue(!new String(refBasesFromUCSource).contains("a")); + + refBasesFromMixedCaseSource = referenceSourceMixedCase.getReferenceBases(samFileHeader.getSequence(0), true); + // check the mixed case source does not change ref base casing: + Assert.assertEquals(refBasesFromMixedCaseSource, referenceBases); + // check the mixed case source contains lower case bases: + Assert.assertTrue(new String(refBasesFromMixedCaseSource).contains("a")); + + final int readLen = referenceBases.length; + final byte[] bases = new byte[readLen]; + Arrays.fill(bases, (byte) 'A'); + final byte[] scores = new byte[readLen]; + Arrays.fill(scores, (byte) '!'); + + record = new SAMRecord(samFileHeader); + record.setReadName("test"); + record.setReferenceIndex(0); + record.setAlignmentStart(1); + record.setCigarString(readLen + "M"); + record.setReadBases(bases); + record.setBaseQualities(scores); + + // write a valid CRAM with a valid reference source: + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (final CRAMFileWriter writer = new CRAMFileWriter(baos, referenceSourceUpperCased, samFileHeader, "test")) { + writer.addAlignment(record); + } + cramData = baos.toByteArray(); + } + } + + +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/ChunkTest.java b/src/test/java/htsjdk/samtools/ChunkTest.java index d2bc157e2..b3a9e0a53 100644 --- a/src/test/java/htsjdk/samtools/ChunkTest.java +++ b/src/test/java/htsjdk/samtools/ChunkTest.java @@ -23,10 +23,11 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; -public class ChunkTest { +public class ChunkTest extends HtsjdkTest { @Test public void testOverlaps() { // Test completely disjoint offsets. diff --git a/src/test/java/htsjdk/samtools/CigarCodecTest.java b/src/test/java/htsjdk/samtools/CigarCodecTest.java index 8275a9484..7ccde7d1b 100644 --- a/src/test/java/htsjdk/samtools/CigarCodecTest.java +++ b/src/test/java/htsjdk/samtools/CigarCodecTest.java @@ -23,12 +23,13 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; import java.util.Arrays; -public class CigarCodecTest { +public class CigarCodecTest extends HtsjdkTest { @Test diff --git a/src/test/java/htsjdk/samtools/CigarOperatorUnitTest.java b/src/test/java/htsjdk/samtools/CigarOperatorUnitTest.java new file mode 100644 index 000000000..21c36d64b --- /dev/null +++ b/src/test/java/htsjdk/samtools/CigarOperatorUnitTest.java @@ -0,0 +1,137 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2017 Daniel Gomez-Sanchez + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package htsjdk.samtools; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +/** + * @author Daniel Gomez-Sanchez (magicDGS) + */ +public class CigarOperatorUnitTest extends HtsjdkTest { + + @DataProvider + public Object[][] chars() { + return new Object[][] { + {'M', CigarOperator.M}, + {'I', CigarOperator.I}, + {'D', CigarOperator.D}, + {'N', CigarOperator.N}, + {'S', CigarOperator.S}, + {'H', CigarOperator.H}, + {'P', CigarOperator.P}, + {'=', CigarOperator.EQ}, + {'X', CigarOperator.X} + }; + } + + @Test(dataProvider = "chars") + public void testCharacterToEnum(final char c, final CigarOperator op) throws Exception { + Assert.assertEquals(CigarOperator.characterToEnum(c), op); + } + + @Test(dataProvider = "chars") + public void testEnumToCharacter(final char c, final CigarOperator op) throws Exception { + Assert.assertEquals(CigarOperator.enumToCharacter(op), c); + } + + @DataProvider + public Object[][] illegalChars() { + return new Object[][] { + {'A'}, {'E'}, {'O'}, {'U'} + }; + } + + @Test(dataProvider = "illegalChars", expectedExceptions = IllegalArgumentException.class) + public void testIllegalCharacterToEnum(final char c) throws Exception { + CigarOperator.characterToEnum(c); + } + + @DataProvider + public Object[][] binary() { + return new Object[][] { + {0, CigarOperator.M}, + {1, CigarOperator.I}, + {2, CigarOperator.D}, + {3, CigarOperator.N}, + {4, CigarOperator.S}, + {5, CigarOperator.H}, + {6, CigarOperator.P}, + {7, CigarOperator.EQ}, + {8, CigarOperator.X} + }; + } + + @Test(dataProvider = "binary") + public void testBinaryToEnum(final int bin, final CigarOperator op) throws Exception { + Assert.assertEquals(CigarOperator.binaryToEnum(bin), op); + } + + @Test(dataProvider = "binary") + public void testEnumToBinary(final int bin, final CigarOperator op) throws Exception { + Assert.assertEquals(CigarOperator.enumToBinary(op), bin); + } + + @DataProvider + public Object[][] illegalBinary() { + return new Object[][] { + {-1}, {9}, {10} + }; + } + + @Test(dataProvider = "illegalBinary", expectedExceptions = IllegalArgumentException.class) + public void testIllegalBinaryToEnum(final int bin) throws Exception { + CigarOperator.binaryToEnum(bin); + } + + @DataProvider + public Object[][] opStatus() { + return new Object[][] { + // op, isClipping, isIndel, isSkip, isAlignment, isPadding + {CigarOperator.M, false, false, false, true, false}, + {CigarOperator.I, false, true, false, false, false}, + {CigarOperator.D, false, true, false, false, false}, + {CigarOperator.N, false, false, true, false, false}, + {CigarOperator.S, true, false, false, false, false}, + {CigarOperator.H, true, false, false, false, false}, + {CigarOperator.P, false, false, false, false, true}, + {CigarOperator.EQ, false, false, false, true, false}, + {CigarOperator.X, false, false, false, true, false} + }; + } + + @Test(dataProvider = "opStatus") + public void testIsSetOfOperations(final CigarOperator op, final boolean isClipping, + final boolean isIndel,final boolean isSkip, final boolean isAlignment, + final boolean isPadding) throws Exception { + Assert.assertEquals(op.isClipping(), isClipping); + Assert.assertEquals(op.isIndel(), isIndel); + Assert.assertEquals(op.isIndelOrSkippedRegion(), isIndel || isSkip); + Assert.assertEquals(op.isAlignment(), isAlignment); + Assert.assertEquals(op.isPadding(), isPadding); + } +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/CigarTest.java b/src/test/java/htsjdk/samtools/CigarTest.java index acdc22407..c104073a0 100644 --- a/src/test/java/htsjdk/samtools/CigarTest.java +++ b/src/test/java/htsjdk/samtools/CigarTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -33,7 +34,7 @@ /** * @author alecw@broadinstitute.org */ -public class CigarTest { +public class CigarTest extends HtsjdkTest { @DataProvider(name = "positiveTestsData") public Object[][] testPositive() { @@ -62,6 +63,9 @@ public void testPositive(final String cigar) { public Object[][] negativeTestsData() { return new Object[][]{ + // CIGAR element with zero length + {"0M", SAMValidationError.Type.INVALID_CIGAR}, + // Cannot have two consecutive insertions (of the same type) {"1M1D1D1M", SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR}, {"1M1I1I1M", SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR}, @@ -79,11 +83,15 @@ public void testPositive(final String cigar) { {"1H1S", SAMValidationError.Type.INVALID_CIGAR}, {"1S1H", SAMValidationError.Type.INVALID_CIGAR}, {"1H1H", SAMValidationError.Type.INVALID_CIGAR}, + + // Hard clipping operator not at start or end of CIGAR + {"1M1H1M", SAMValidationError.Type.INVALID_CIGAR}, + + // Padding operator not valid at end of CIGAR + {"1M1P", SAMValidationError.Type.INVALID_CIGAR}, + // Padding operator not between real operators in CIGAR + {"1S1P1M", SAMValidationError.Type.INVALID_CIGAR} }; -/* - // Zero length for an element not allowed. TODO: not sure why this is commented out - {"100M0D10M1D10M", SAMValidationError.Type.INVALID_CIGAR} -*/ } @Test(dataProvider = "negativeTestsData") @@ -115,4 +123,115 @@ public void testMakeCigarFromOperators() { Assert.assertFalse(cigar.isRightClipped()); Assert.assertTrue(cigar.isClipped()); } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testMakeCigarFromNullOperator() { + Cigar.fromCigarOperators(null); + } + + @DataProvider + public Object[][] referenceLengthData() { + return new Object[][] { + // consuming reference + {"1M", 1, 1}, + {"1=", 1, 1}, + {"1X", 1, 1}, + {"1N", 1, 1}, + {"1D", 1, 1}, + + // non-consuming reference + {"1S", 0, 0}, + {"1H", 0, 0}, + + // special case: padding + {"1P", 0, 1} + }; + } + + @Test(dataProvider = "referenceLengthData") + public void testGetReferenceLength(final String textCigar, + final int referenceLength, final int paddedReferenceLenght) throws Exception{ + final Cigar cigar = TextCigarCodec.decode(textCigar); + Assert.assertEquals(cigar.getReferenceLength(), referenceLength); + Assert.assertEquals(cigar.getPaddedReferenceLength(), paddedReferenceLenght); + } + + @DataProvider + public Object[][] readLengthData() { + return new Object[][] { + // consuming read bases + {"1M", 1}, + {"2I", 2}, + {"3S", 3}, + {"4X", 4}, + {"5=", 5}, + + // non-consuming reference + {"1D", 0}, + {"2N", 0}, + {"4H", 0}, + {"4P", 0} + }; + } + + @Test(dataProvider = "readLengthData") + public void testGetReadLength(final String textCigar, final int readLength) throws Exception{ + final Cigar cigar = TextCigarCodec.decode(textCigar); + Assert.assertEquals(cigar.getReadLength(), readLength); + } + + @Test + public void testContainsOperator() { + final Cigar cigar = TextCigarCodec.decode("10M1S"); + Assert.assertTrue(cigar.containsOperator(CigarOperator.M)); + Assert.assertTrue(cigar.containsOperator(CigarOperator.S)); + Assert.assertFalse(cigar.containsOperator(CigarOperator.X)); + } + + @DataProvider + public Object[][] firstLastData() { + final CigarElement M_ELEMENT = new CigarElement(1, CigarOperator.M); + final CigarElement S_ELEMENT = new CigarElement(1, CigarOperator.S); + return new Object[][] { + {"*", null, null}, + {"1M", M_ELEMENT, M_ELEMENT}, + {"1M1S", M_ELEMENT, S_ELEMENT}, + {"1S1M", S_ELEMENT, M_ELEMENT}, + {"1S1M1S", S_ELEMENT, S_ELEMENT}, + {"1M1D1M1D1M", M_ELEMENT, M_ELEMENT} + }; + } + + @Test(dataProvider = "firstLastData") + public void testGetFirstOrLastCigarElement(final String textCigar, final CigarElement first, final CigarElement last) { + final Cigar cigar = TextCigarCodec.decode(textCigar); + Assert.assertEquals(cigar.getFirstCigarElement(), first); + Assert.assertEquals(cigar.getLastCigarElement(), last); + } + + @DataProvider + public Object[][] clippedData() { + return new Object[][] { + // no clipped + {"10M", false}, + // wrong place for soft-clip and hard-clip returns false + {"1M1S1M", false}, + {"1M1H1M", false}, + + // clipped + {"1S1M", true}, + {"1M1S", true}, + {"1S1M1S", true}, + {"1H1M", true}, + {"1M1H", true}, + {"1H1M1H", true} + }; + } + + @Test(dataProvider = "clippedData") + public void testIsClipped(final String textCigar, final boolean isClipped) { + // this test is indirectly testing both left and right clipping methods + Assert.assertEquals(TextCigarCodec.decode(textCigar).isClipped(), isClipped); + } + } diff --git a/src/test/java/htsjdk/samtools/DownsamplingIteratorTests.java b/src/test/java/htsjdk/samtools/DownsamplingIteratorTests.java index e84ee2e48..96dff4656 100644 --- a/src/test/java/htsjdk/samtools/DownsamplingIteratorTests.java +++ b/src/test/java/htsjdk/samtools/DownsamplingIteratorTests.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.DownsamplingIteratorFactory.Strategy; import org.testng.Assert; import org.testng.annotations.Test; @@ -14,7 +15,7 @@ * Tests for the downsampling iterator class. * @author Tim Fennell */ -public class DownsamplingIteratorTests { +public class DownsamplingIteratorTests extends HtsjdkTest { final int NUM_TEMPLATES = 50000; final EnumMap ACCURACY = new EnumMap(Strategy.class){{ put(Strategy.HighAccuracy, 0.001); diff --git a/src/test/java/htsjdk/samtools/DuplicateScoringStrategyTest.java b/src/test/java/htsjdk/samtools/DuplicateScoringStrategyTest.java new file mode 100644 index 000000000..d86b697a5 --- /dev/null +++ b/src/test/java/htsjdk/samtools/DuplicateScoringStrategyTest.java @@ -0,0 +1,26 @@ +package htsjdk.samtools; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class DuplicateScoringStrategyTest extends HtsjdkTest { + + @DataProvider + public Object [][] compareData() { + return new Object[][]{ + {SAMFlag.READ_PAIRED.flag, 0, true, DuplicateScoringStrategy.ScoringStrategy.RANDOM, -1}, + {0, SAMFlag.READ_PAIRED.flag, true, DuplicateScoringStrategy.ScoringStrategy.RANDOM, 1}, + }; + } + + @Test(dataProvider = "compareData") + public static void testCompare(final int samFlag1, final int samFlag2, final boolean assumeMateCigar, final DuplicateScoringStrategy.ScoringStrategy strategy, final int expected) { + final SAMRecord rec1 = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "36M", null, 2); + rec1.setFlags(samFlag1); + final SAMRecord rec2 = new SAMRecordSetBuilder().addFrag("test", 0, 1, true, false, "36M", null, 3); + rec2.setFlags(samFlag2); + Assert.assertEquals(DuplicateScoringStrategy.compare(rec1, rec2, strategy, assumeMateCigar), expected); + } +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/DuplicateSetIteratorTest.java b/src/test/java/htsjdk/samtools/DuplicateSetIteratorTest.java index 595295345..27e167881 100644 --- a/src/test/java/htsjdk/samtools/DuplicateSetIteratorTest.java +++ b/src/test/java/htsjdk/samtools/DuplicateSetIteratorTest.java @@ -1,12 +1,13 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; import java.util.HashMap; import java.util.Map; -public class DuplicateSetIteratorTest { +public class DuplicateSetIteratorTest extends HtsjdkTest { protected final static int DEFAULT_BASE_QUALITY = 10; private SAMRecordSetBuilder getSAMRecordSetBuilder() { diff --git a/src/test/java/htsjdk/samtools/GenomicIndexUtilTest.java b/src/test/java/htsjdk/samtools/GenomicIndexUtilTest.java index 8f5569c59..0bf322d58 100644 --- a/src/test/java/htsjdk/samtools/GenomicIndexUtilTest.java +++ b/src/test/java/htsjdk/samtools/GenomicIndexUtilTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -7,7 +8,7 @@ /** * Tests for GenomicIndexUtil. */ -public class GenomicIndexUtilTest { +public class GenomicIndexUtilTest extends HtsjdkTest { @Test(dataProvider = "testRegionToBinDataProvider") public void testRegionToBin(final int beg, final int end, final int bin) { @@ -47,4 +48,4 @@ public void testRegionToBin(final int beg, final int end, final int bin) { {1<<26, 1<<26+1, 2} }; } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/samtools/MergingSamRecordIteratorGroupCollisionTest.java b/src/test/java/htsjdk/samtools/MergingSamRecordIteratorGroupCollisionTest.java index 18c10c70b..d350b8f38 100644 --- a/src/test/java/htsjdk/samtools/MergingSamRecordIteratorGroupCollisionTest.java +++ b/src/test/java/htsjdk/samtools/MergingSamRecordIteratorGroupCollisionTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloserUtil; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -37,7 +38,7 @@ * * @author Dave Tefft, Andre Mesarovic */ -public class MergingSamRecordIteratorGroupCollisionTest { +public class MergingSamRecordIteratorGroupCollisionTest extends HtsjdkTest { private GroupAdapter padapter = new ProgramGroupAdapter(); private GroupAdapter radapter = new ReadGroupAdapter(); @@ -424,18 +425,22 @@ SamReader newFileReader() { } class ProgramGroupAdapter extends GroupAdapter { + @Override String getGroupId(AbstractSAMHeaderRecord group) { return ((SAMProgramRecord) group).getProgramGroupId(); } + @Override List getGroups(SAMFileHeader header) { return header.getProgramRecords(); } + @Override String getTagName() { return SAMTag.PG.toString(); } + @Override List createGroups(final String[] groupIds) { final List readers = new ArrayList(); for (final String groupId : groupIds) { @@ -457,36 +462,44 @@ String getTagName() { return fileHeaderMerger.getMergedHeader().getProgramRecords(); } + @Override void setAttribute(AbstractSAMHeaderRecord group, String value) { ((SAMProgramRecord) group).setCommandLine(value); } + @Override AbstractSAMHeaderRecord newGroup(String id) { return new SAMProgramRecord(id); } + @Override void setBuilderGroup(SAMRecordSetBuilder builder, AbstractSAMHeaderRecord group) { builder.setProgramRecord((SAMProgramRecord) group); } + @Override boolean equivalent(AbstractSAMHeaderRecord group1, AbstractSAMHeaderRecord group2) { return ((SAMProgramRecord) group1).equivalent((SAMProgramRecord) group2); } } class ReadGroupAdapter extends GroupAdapter { + @Override String getGroupId(AbstractSAMHeaderRecord group) { return ((SAMReadGroupRecord) group).getReadGroupId(); } + @Override List getGroups(SAMFileHeader header) { return header.getReadGroups(); } + @Override String getTagName() { return SAMTag.RG.toString(); } + @Override List createGroups(final String[] groupIds) { final List readers = new ArrayList(); @@ -507,20 +520,24 @@ String getTagName() { return fileHeaderMerger.getMergedHeader().getReadGroups(); } + @Override void setAttribute(AbstractSAMHeaderRecord group, String value) { ((SAMReadGroupRecord) group).setPlatformUnit(value); } + @Override AbstractSAMHeaderRecord newGroup(String id) { SAMReadGroupRecord group = new SAMReadGroupRecord(id); group.setAttribute(SAMTag.SM.name(), id); return group; } + @Override void setBuilderGroup(SAMRecordSetBuilder builder, AbstractSAMHeaderRecord group) { builder.setReadGroup((SAMReadGroupRecord) group); } + @Override boolean equivalent(AbstractSAMHeaderRecord group1, AbstractSAMHeaderRecord group2) { return ((SAMReadGroupRecord) group1).equivalent((SAMReadGroupRecord) group2); } diff --git a/src/test/java/htsjdk/samtools/MergingSamRecordIteratorTest.java b/src/test/java/htsjdk/samtools/MergingSamRecordIteratorTest.java index 885321b26..a50c02645 100644 --- a/src/test/java/htsjdk/samtools/MergingSamRecordIteratorTest.java +++ b/src/test/java/htsjdk/samtools/MergingSamRecordIteratorTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.SequenceUtil; import org.testng.Assert; import org.testng.annotations.Test; @@ -38,7 +39,7 @@ * * @author Dave Tefft */ -public class MergingSamRecordIteratorTest { +public class MergingSamRecordIteratorTest extends HtsjdkTest { @Test public void testVanillaCoordinateMultiIterator() throws Exception { diff --git a/src/test/java/htsjdk/samtools/PathInputResourceTest.java b/src/test/java/htsjdk/samtools/PathInputResourceTest.java new file mode 100644 index 000000000..f82b9a627 --- /dev/null +++ b/src/test/java/htsjdk/samtools/PathInputResourceTest.java @@ -0,0 +1,48 @@ +package htsjdk.samtools; + +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.function.Function; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class PathInputResourceTest extends HtsjdkTest { + final String localBam = "src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam"; + + @Test + public void testWrappersAreAccessed() throws Exception { + Path path = Paths.get(localBam); + Path indexPath = Paths.get(localBam + ".bai"); + HashMap fired = new HashMap<>(); + Function wrapData = (SeekableByteChannel in) -> { + fired.put("data", true); + return in; + }; + Function wrapIndex = (SeekableByteChannel in) -> { + fired.put("index", true); + return in; + }; + SamInputResource in = SamInputResource.of(path, wrapData); + in.index(indexPath, wrapIndex); + InputResource indexResource = in.indexMaybe(); + Assert.assertNotNull(indexResource); + + Assert.assertFalse(fired.containsKey("data")); + Assert.assertFalse(fired.containsKey("index")); + + indexResource.asUnbufferedSeekableStream(); + + Assert.assertFalse(fired.containsKey("data")); + Assert.assertTrue(fired.containsKey("index")); + + in.data().asUnbufferedSeekableStream(); + + Assert.assertTrue(fired.containsKey("data")); + Assert.assertTrue(fired.containsKey("index")); + } + +} diff --git a/src/test/java/htsjdk/samtools/ProgramRecordChainingTest.java b/src/test/java/htsjdk/samtools/ProgramRecordChainingTest.java index cd470c449..4811148f3 100644 --- a/src/test/java/htsjdk/samtools/ProgramRecordChainingTest.java +++ b/src/test/java/htsjdk/samtools/ProgramRecordChainingTest.java @@ -23,13 +23,14 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; /** * Test for SequenceUtil.chainProgramRecord */ -public class ProgramRecordChainingTest { +public class ProgramRecordChainingTest extends HtsjdkTest { @Test public void testChainProgramRecord() { diff --git a/src/test/java/htsjdk/samtools/SAMBinaryTagAndValueUnitTest.java b/src/test/java/htsjdk/samtools/SAMBinaryTagAndValueUnitTest.java index f5f7a5c01..93a20dc6e 100644 --- a/src/test/java/htsjdk/samtools/SAMBinaryTagAndValueUnitTest.java +++ b/src/test/java/htsjdk/samtools/SAMBinaryTagAndValueUnitTest.java @@ -1,11 +1,12 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.BinaryCodec; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -public class SAMBinaryTagAndValueUnitTest { +public class SAMBinaryTagAndValueUnitTest extends HtsjdkTest { @DataProvider(name="allowedAttributeTypes") public Object[][] allowedTypes() { diff --git a/src/test/java/htsjdk/samtools/SAMCloneTest.java b/src/test/java/htsjdk/samtools/SAMCloneTest.java index 8fdfb3bde..e05d29d7a 100644 --- a/src/test/java/htsjdk/samtools/SAMCloneTest.java +++ b/src/test/java/htsjdk/samtools/SAMCloneTest.java @@ -23,13 +23,14 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; /** * @author alecw@broadinstitute.org */ -public class SAMCloneTest { +public class SAMCloneTest extends HtsjdkTest { private SAMRecordSetBuilder getSAMReader(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) { final SAMRecordSetBuilder ret = new SAMRecordSetBuilder(sortForMe, sortOrder); ret.addPair("readB", 20, 200, 300); diff --git a/src/test/java/htsjdk/samtools/SAMFileHeaderTest.java b/src/test/java/htsjdk/samtools/SAMFileHeaderTest.java new file mode 100644 index 000000000..0723ed9e4 --- /dev/null +++ b/src/test/java/htsjdk/samtools/SAMFileHeaderTest.java @@ -0,0 +1,64 @@ +/* + * The MIT License + * + * Copyright (c) 2017 Nils Homer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + */ +package htsjdk.samtools; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class SAMFileHeaderTest extends HtsjdkTest { + + @Test + public void testSortOrder() { + final SAMFileHeader header = new SAMFileHeader(); + + header.setSortOrder(SAMFileHeader.SortOrder.coordinate); + Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.coordinate); + Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.coordinate.name()); + + header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, SAMFileHeader.SortOrder.queryname.name()); + Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.queryname); + Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.queryname.name()); + + header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, SAMFileHeader.SortOrder.coordinate); + Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.coordinate); + Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.coordinate.name()); + } + + @Test + public void testGroupOrder() { + final SAMFileHeader header = new SAMFileHeader(); + + header.setGroupOrder(SAMFileHeader.GroupOrder.query); + Assert.assertEquals(header.getGroupOrder(), SAMFileHeader.GroupOrder.query); + Assert.assertEquals(header.getAttribute(SAMFileHeader.GROUP_ORDER_TAG), SAMFileHeader.GroupOrder.query.name()); + + header.setAttribute(SAMFileHeader.GROUP_ORDER_TAG, SAMFileHeader.GroupOrder.reference.name()); + Assert.assertEquals(header.getGroupOrder(), SAMFileHeader.GroupOrder.reference); + Assert.assertEquals(header.getAttribute(SAMFileHeader.GROUP_ORDER_TAG), SAMFileHeader.GroupOrder.reference.name()); + + header.setAttribute(SAMFileHeader.GROUP_ORDER_TAG, SAMFileHeader.GroupOrder.query); + Assert.assertEquals(header.getGroupOrder(), SAMFileHeader.GroupOrder.query); + Assert.assertEquals(header.getAttribute(SAMFileHeader.GROUP_ORDER_TAG), SAMFileHeader.GroupOrder.query.name()); + } +} diff --git a/src/test/java/htsjdk/samtools/SAMFileWriterFactoryTest.java b/src/test/java/htsjdk/samtools/SAMFileWriterFactoryTest.java index dc7a6f381..0b8d7b5ac 100644 --- a/src/test/java/htsjdk/samtools/SAMFileWriterFactoryTest.java +++ b/src/test/java/htsjdk/samtools/SAMFileWriterFactoryTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.build.CramIO; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.util.IOUtil; @@ -32,7 +33,7 @@ import java.io.*; -public class SAMFileWriterFactoryTest { +public class SAMFileWriterFactoryTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); @@ -166,7 +167,29 @@ private void createSmallBamToOutputStream(final OutputStream outputStream,boolea fillSmallBam(writer); writer.close(); } - + + @Test(description="check that factory settings are propagated to writer") + public void testFactorySettings() throws Exception { + final SAMFileWriterFactory factory = new SAMFileWriterFactory(); + factory.setCreateIndex(false); + factory.setCreateMd5File(false); + final File wontBeUsed = new File("wontBeUsed.tmp"); + final int maxRecsInRam = 271828; + factory.setMaxRecordsInRam(maxRecsInRam); + factory.setTempDirectory(wontBeUsed); + final SAMFileHeader header = new SAMFileHeader(); + header.setSortOrder(SAMFileHeader.SortOrder.coordinate); + header.addSequence(new SAMSequenceRecord("chr1", 123)); + try (final SAMFileWriter writer = factory.makeBAMWriter(header, false, new ByteArrayOutputStream())) { + Assert.assertEquals(maxRecsInRam, ((SAMFileWriterImpl) writer).getMaxRecordsInRam()); + Assert.assertEquals(wontBeUsed, ((SAMFileWriterImpl) writer).getTempDirectory()); + } + try (final SAMFileWriter writer = factory.makeSAMWriter(header, false, new ByteArrayOutputStream())) { + Assert.assertEquals(maxRecsInRam, ((SAMFileWriterImpl) writer).getMaxRecordsInRam()); + Assert.assertEquals(wontBeUsed, ((SAMFileWriterImpl) writer).getTempDirectory()); + } + } + private int fillSmallBam(SAMFileWriter writer) { final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); builder.addUnmappedFragment("HiMom!"); diff --git a/src/test/java/htsjdk/samtools/SAMFlagTest.java b/src/test/java/htsjdk/samtools/SAMFlagTest.java index 7b5a5539f..86dd8f0ee 100644 --- a/src/test/java/htsjdk/samtools/SAMFlagTest.java +++ b/src/test/java/htsjdk/samtools/SAMFlagTest.java @@ -24,10 +24,11 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; -public class SAMFlagTest { +public class SAMFlagTest extends HtsjdkTest { @Test public void testFlags() { Assert.assertTrue(SAMFlag.getFlags(83).contains(SAMFlag.READ_PAIRED)); diff --git a/src/test/java/htsjdk/samtools/SAMIntegerTagTest.java b/src/test/java/htsjdk/samtools/SAMIntegerTagTest.java index 133062a15..3fa38df62 100644 --- a/src/test/java/htsjdk/samtools/SAMIntegerTagTest.java +++ b/src/test/java/htsjdk/samtools/SAMIntegerTagTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.CloserUtil; @@ -45,7 +46,7 @@ * * @author alecw@broadinstitute.org */ -public class SAMIntegerTagTest { +public class SAMIntegerTagTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/SAMIntegerTagTest"); private static final String BYTE_TAG = "BY"; diff --git a/src/test/java/htsjdk/samtools/SAMProgramRecordTest.java b/src/test/java/htsjdk/samtools/SAMProgramRecordTest.java new file mode 100644 index 000000000..99a26cc38 --- /dev/null +++ b/src/test/java/htsjdk/samtools/SAMProgramRecordTest.java @@ -0,0 +1,43 @@ +/* + * The MIT License + * + * Copyright (c) 2017 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * Test for SAMReadGroupRecordTest + */ +public class SAMProgramRecordTest extends HtsjdkTest { + + @Test + public void testGetSAMString() { + SAMProgramRecord r = new SAMProgramRecord("SW-eIV"); + r.setProgramName("telnet"); + r.setProgramVersion("0.17-40"); + r.setCommandLine("telnet towel.blinkenlights.nl"); + Assert.assertEquals("@PG\tID:SW-eIV\tPN:telnet\tVN:0.17-40\tCL:telnet towel.blinkenlights.nl", r.getSAMString()); + } +} diff --git a/src/test/java/htsjdk/samtools/SAMReadGroupRecordTest.java b/src/test/java/htsjdk/samtools/SAMReadGroupRecordTest.java new file mode 100644 index 000000000..0801f52a5 --- /dev/null +++ b/src/test/java/htsjdk/samtools/SAMReadGroupRecordTest.java @@ -0,0 +1,43 @@ +/* + * The MIT License + * + * Copyright (c) 2017 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * Test for SAMReadGroupRecordTest + */ +public class SAMReadGroupRecordTest extends HtsjdkTest { + + @Test + public void testGetSAMString() { + SAMReadGroupRecord r = new SAMReadGroupRecord("rg1"); + r.setSample("mysample"); + r.setPlatform("ILLUMINA"); + r.setDescription("my description"); + Assert.assertEquals("@RG\tID:rg1\tSM:mysample\tPL:ILLUMINA\tDS:my description", r.getSAMString()); + } +} diff --git a/src/test/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java b/src/test/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java index cb509258e..99d187a58 100644 --- a/src/test/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java +++ b/src/test/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; @@ -37,7 +38,7 @@ * * @author nhomer */ -public class SAMRecordDuplicateComparatorTest { +public class SAMRecordDuplicateComparatorTest extends HtsjdkTest { private final static SAMRecordDuplicateComparator comparator = new SAMRecordDuplicateComparator(); diff --git a/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java b/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java index 951ecee78..5fa35f3e9 100644 --- a/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java +++ b/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java @@ -24,6 +24,7 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.TestUtil; import org.testng.Assert; @@ -34,7 +35,7 @@ import java.util.Arrays; import java.util.List; -public class SAMRecordUnitTest { +public class SAMRecordUnitTest extends HtsjdkTest { @DataProvider(name = "serializationTestData") public Object[][] getSerializationTestData() { @@ -462,7 +463,7 @@ public void test_setAttribute_null_removes_tag() { } private SAMRecord createTestRecordHelper() { - return new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "3S9M", null, 2); + return new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "3S33M", null, 2); } @Test @@ -775,7 +776,7 @@ public void testNullHeaderRecordValidation() { } @Test - private void testNullHeaderDeepCopy() { + public void testNullHeaderDeepCopy() { SAMRecord sam = createTestRecordHelper(); sam.setHeader(null); final SAMRecord deepCopy = sam.deepCopy(); @@ -804,13 +805,13 @@ private void testNullHeaderCigar(SAMRecord rec) { } @Test - private void testNullHeadGetCigarSAM() { - SAMRecord sam = createTestRecordHelper(); + public void testNullHeadGetCigarSAM() { + final SAMRecord sam = createTestRecordHelper(); testNullHeaderCigar(sam); } @Test - private void testNullHeadGetCigarBAM() { + public void testNullHeadGetCigarBAM() { SAMRecord sam = createTestRecordHelper(); SAMRecordFactory factory = new DefaultSAMRecordFactory(); BAMRecord bamRec = factory.createBAMRecord( @@ -1038,4 +1039,36 @@ public SAMRecord createTestSamRec() { return(rec); } + + @DataProvider + public Object [][] readBasesArrayGetReadLengthData() { + return new Object[][]{ + { null, 0 }, + { SAMRecord.NULL_SEQUENCE, 0 }, + { new byte[] {'A', 'C'}, 2 } + }; + } + + @Test(dataProvider = "readBasesArrayGetReadLengthData") + public void testReadBasesGetReadLength(final byte[] readBases, final int readLength) { + final SAMRecord sam = createTestRecordHelper(); + sam.setReadBases(readBases); + Assert.assertEquals(sam.getReadLength(), readLength); + } + + @DataProvider + public Object [][] readBasesStringGetReadLengthData() { + return new Object[][]{ + { null, 0 }, + { SAMRecord.NULL_SEQUENCE_STRING, 0 }, + { "AC", 2 } + }; + } + + @Test(dataProvider = "readBasesStringGetReadLengthData") + public void testReadStringGetReadLength(final String readBases, final int readLength) { + final SAMRecord sam = createTestRecordHelper(); + sam.setReadString(readBases); + Assert.assertEquals(sam.getReadLength(), readLength); + } } diff --git a/src/test/java/htsjdk/samtools/SAMSequenceDictionaryCodecTest.java b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryCodecTest.java index 32de1cd82..4257508cf 100644 --- a/src/test/java/htsjdk/samtools/SAMSequenceDictionaryCodecTest.java +++ b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryCodecTest.java @@ -24,23 +24,24 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.LineReader; import htsjdk.samtools.util.StringLineReader; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import javax.sound.sampled.Line; import java.io.BufferedWriter; import java.io.StringWriter; import java.util.List; import java.util.Random; -import static org.testng.Assert.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; /** * @author Pavel_Silin@epam.com, EPAM Systems, Inc. */ -public class SAMSequenceDictionaryCodecTest { +public class SAMSequenceDictionaryCodecTest extends HtsjdkTest { private static final Random random = new Random(); private SAMSequenceDictionary dictionary; diff --git a/src/test/java/htsjdk/samtools/SAMSequenceDictionaryTest.java b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryTest.java index 0b1a50780..a8e60ed50 100644 --- a/src/test/java/htsjdk/samtools/SAMSequenceDictionaryTest.java +++ b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryTest.java @@ -26,23 +26,22 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBException; +import javax.xml.bind.Marshaller; +import javax.xml.bind.Unmarshaller; import java.io.StringReader; import java.io.StringWriter; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; -import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBException; -import javax.xml.bind.Marshaller; -import javax.xml.bind.Unmarshaller; - -public class SAMSequenceDictionaryTest { +public class SAMSequenceDictionaryTest extends HtsjdkTest { @Test public void testAliases() { final SAMSequenceRecord ssr1 = new SAMSequenceRecord("1", 1); @@ -144,4 +143,27 @@ public void testMergeDictionaries(final SAMSequenceRecord rec1, final SAMSequenc throw new Exception("Expected to not be able to merge dictionaries, but was able"); } } + + @DataProvider + public Object[][] testIsSameDictionaryData() { + + final SAMSequenceRecord rec1, rec2; + rec1 = new SAMSequenceRecord("chr1", 100); + rec2 = new SAMSequenceRecord("chr2", 101); + + return new Object[][]{ + new Object[]{Arrays.asList(rec1), Arrays.asList(rec1), true}, + new Object[]{Arrays.asList(rec1), Arrays.asList(rec2), false}, + new Object[]{Arrays.asList(rec1, rec2), Arrays.asList(rec1), false} + }; + } + + @Test(dataProvider = "testIsSameDictionaryData") + public void testIsSameDictionary(final List recs1, final List recs2, final boolean isSameDictionary) { + + final SAMSequenceDictionary dict1 = new SAMSequenceDictionary(recs1); + final SAMSequenceDictionary dict2 = new SAMSequenceDictionary(recs2); + + Assert.assertEquals(dict1.isSameDictionary(dict2), isSameDictionary); + } } diff --git a/src/test/java/htsjdk/samtools/SAMSequenceRecordTest.java b/src/test/java/htsjdk/samtools/SAMSequenceRecordTest.java new file mode 100644 index 000000000..89e6121d2 --- /dev/null +++ b/src/test/java/htsjdk/samtools/SAMSequenceRecordTest.java @@ -0,0 +1,86 @@ +/* + * The MIT License + * + * Copyright (c) 2017 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Test for SAMReadGroupRecordTest + */ +public class SAMSequenceRecordTest extends HtsjdkTest { + + @Test + public void testGetSAMString() { + final SAMSequenceRecord r = new SAMSequenceRecord("chr5_but_without_a_prefix", 271828); + r.setSpecies("Psephophorus terrypratchetti"); + r.setAssembly("GRCt01"); + r.setMd5("7a6dd3d307de916b477e7bf304ac22bc"); + Assert.assertEquals("@SQ\tSN:chr5_but_without_a_prefix\tLN:271828\tSP:Psephophorus terrypratchetti\tAS:GRCt01\tM5:7a6dd3d307de916b477e7bf304ac22bc", r.getSAMString()); + } + + @DataProvider + public Object[][] testIsSameSequenceData() { + final SAMSequenceRecord rec1 = new SAMSequenceRecord("chr1", 100); + final SAMSequenceRecord rec2 = new SAMSequenceRecord("chr2", 101); + final SAMSequenceRecord rec3 = new SAMSequenceRecord("chr3", 0); + final SAMSequenceRecord rec4 = new SAMSequenceRecord("chr1", 100); + + final String md5One = "1"; + final String md5Two = "2"; + final int index1 = 1; + final int index2 = 2; + + return new Object[][]{ + new Object[]{rec1, rec1, md5One, md5One, index1, index1, true}, + new Object[]{rec1, null, md5One, md5One, index1, index1, false}, + new Object[]{rec1, rec4, md5One, md5One, index1, index1, true}, + new Object[]{rec1, rec4, md5One, md5One, index1, index2, false}, + new Object[]{rec1, rec3, md5One, md5Two, index1, index1, false}, + new Object[]{rec1, rec2, md5One, md5Two, index1, index1, false}, + new Object[]{rec1, rec4, md5One, null, index1, index1, true}, + new Object[]{rec1, rec4, null, md5One, index1, index1, true}, + new Object[]{rec1, rec4, md5One, md5One, index1, index2, false} + }; + } + + @Test(dataProvider = "testIsSameSequenceData") + public void testIsSameSequence(final SAMSequenceRecord rec1 , final SAMSequenceRecord rec2, final String md5One, final String md5Two, + final int index1, final int index2, final boolean isSame) { + if (rec2 != null) { + rec2.setMd5(md5Two); + rec2.setSequenceIndex(index2); + } + + if (rec1 != null) { + rec1.setMd5(md5One); + rec1.setSequenceIndex(index1); + Assert.assertEquals(rec1.isSameSequence(rec2), isSame); + } + } +} diff --git a/src/test/java/htsjdk/samtools/SAMTextReaderTest.java b/src/test/java/htsjdk/samtools/SAMTextReaderTest.java index c80924b65..142eea32c 100644 --- a/src/test/java/htsjdk/samtools/SAMTextReaderTest.java +++ b/src/test/java/htsjdk/samtools/SAMTextReaderTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; import org.testng.Assert; @@ -31,7 +32,7 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -public class SAMTextReaderTest { +public class SAMTextReaderTest extends HtsjdkTest { // Simple input, spot check that parsed correctly, and make sure nothing blows up. @Test public void testBasic() throws Exception { diff --git a/src/test/java/htsjdk/samtools/SAMTextWriterTest.java b/src/test/java/htsjdk/samtools/SAMTextWriterTest.java index 123ab6ba1..5c9ff28cd 100644 --- a/src/test/java/htsjdk/samtools/SAMTextWriterTest.java +++ b/src/test/java/htsjdk/samtools/SAMTextWriterTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; @@ -31,7 +32,7 @@ import java.util.Iterator; import java.util.Map; -public class SAMTextWriterTest { +public class SAMTextWriterTest extends HtsjdkTest { private SAMRecordSetBuilder getSAMReader(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) { final SAMRecordSetBuilder ret = new SAMRecordSetBuilder(sortForMe, sortOrder); diff --git a/src/test/java/htsjdk/samtools/SAMUtilsTest.java b/src/test/java/htsjdk/samtools/SAMUtilsTest.java index 3be7e390c..28e89f755 100644 --- a/src/test/java/htsjdk/samtools/SAMUtilsTest.java +++ b/src/test/java/htsjdk/samtools/SAMUtilsTest.java @@ -23,12 +23,15 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.Arrays; import java.util.List; -public class SAMUtilsTest { +public class SAMUtilsTest extends HtsjdkTest { @Test public void testCompareMapqs() { Assert.assertEquals(SAMUtils.compareMapqs(0, 0), 0); @@ -244,7 +247,41 @@ public void testOtherCanonicalAlignments() { Assert.assertEquals(other.getAttribute(SAMTagUtil.getSingleton().NM),null); Assert.assertEquals(other.getCigarString(),"8M2S"); Assert.assertEquals(other.getInferredInsertSize(),-91);//100(mate) - 191(other) + } + + @Test() + public void testBytesToCompressedBases() { + final byte[] bases = new byte[]{'=', 'a', 'A', 'c', 'C', 'g', 'G', 't', 'T', 'n', 'N', '.', 'M', 'm', + 'R', 'r', 'S', 's', 'V', 'v', 'W', 'w', 'Y', 'y', 'H', 'h', 'K', 'k', 'D', 'd', 'B', 'b'}; + final byte[] compressedBases = SAMUtils.bytesToCompressedBases(bases); + String expectedCompressedBases = "[1, 18, 36, 72, -113, -1, 51, 85, 102, 119, -103, -86, -69, -52, -35, -18]"; + Assert.assertEquals(Arrays.toString(compressedBases), expectedCompressedBases); + } + + @DataProvider + public Object[][] testBadBase() { + return new Object[][]{ + {new byte[]{'>', 'A'}, '>'}, + {new byte[]{'A', '>'} , '>'} + }; + } + @Test(dataProvider = "testBadBase", expectedExceptions = IllegalArgumentException.class) + public void testBytesToCompressedBasesException(final byte[] bases, final char failingBase) { + try { + SAMUtils.bytesToCompressedBases(bases); + } catch ( final IllegalArgumentException ex ) { + Assert.assertTrue(ex.getMessage().contains(Character.toString(failingBase))); + throw ex; + } } + @Test + public void testCompressedBasesToBytes() { + final byte[] compressedBases = new byte[]{1, 18, 36, 72, -113, -1, 51, 85, 102, 119, -103, -86, -69, -52, -35, -18}; + final byte[] bytes = SAMUtils.compressedBasesToBytes(2*compressedBases.length, compressedBases, 0); + final byte[] expectedBases = new byte[]{'=', 'A', 'A', 'C', 'C', 'G', 'G', 'T', 'T', 'N', 'N', 'N', 'M', 'M', + 'R', 'R', 'S', 'S', 'V', 'V', 'W', 'W', 'Y', 'Y', 'H', 'H', 'K', 'K', 'D', 'D', 'B', 'B'}; + Assert.assertEquals(new String(bytes), new String(expectedBases)); + } } diff --git a/src/test/java/htsjdk/samtools/SamFileHeaderMergerTest.java b/src/test/java/htsjdk/samtools/SamFileHeaderMergerTest.java index 6e4fd750f..5c55c0b82 100644 --- a/src/test/java/htsjdk/samtools/SamFileHeaderMergerTest.java +++ b/src/test/java/htsjdk/samtools/SamFileHeaderMergerTest.java @@ -25,6 +25,7 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.SequenceUtil; @@ -58,7 +59,7 @@ *

    * Tests the ability of the SamFileHeaderMerger class to merge sequence dictionaries. */ -public class SamFileHeaderMergerTest { +public class SamFileHeaderMergerTest extends HtsjdkTest { private static File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); diff --git a/src/test/java/htsjdk/samtools/SamFilesTest.java b/src/test/java/htsjdk/samtools/SamFilesTest.java index 443a4d1e9..e7c1919d4 100644 --- a/src/test/java/htsjdk/samtools/SamFilesTest.java +++ b/src/test/java/htsjdk/samtools/SamFilesTest.java @@ -1,6 +1,8 @@ package htsjdk.samtools; import java.nio.file.Path; + +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -12,7 +14,7 @@ * Test valid combinations of bam/cram vs bai/crai files. * Created by vadim on 10/08/2015. */ -public class SamFilesTest { +public class SamFilesTest extends HtsjdkTest { private static final String TEST_DATA = "src/test/resources/htsjdk/samtools/BAMFileIndexTest/"; private static final File BAM_FILE = new File(TEST_DATA + "index_test.bam"); diff --git a/src/test/java/htsjdk/samtools/SamFlagFieldTest.java b/src/test/java/htsjdk/samtools/SamFlagFieldTest.java index f09e63683..36008cf69 100644 --- a/src/test/java/htsjdk/samtools/SamFlagFieldTest.java +++ b/src/test/java/htsjdk/samtools/SamFlagFieldTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -7,7 +8,7 @@ /** * @author nhomer */ -public class SamFlagFieldTest { +public class SamFlagFieldTest extends HtsjdkTest { @Test public void testAllFlags() { @@ -147,4 +148,4 @@ public void testIllegalHexadecimalFlagCharacter(){ public void testIllegalStringFlagCharacterExclamation(){ SamFlagField.STRING.parse("pmMr!F1s"); } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/samtools/SamHeaderRecordComparatorTest.java b/src/test/java/htsjdk/samtools/SamHeaderRecordComparatorTest.java index c11be38a1..da93add5b 100644 --- a/src/test/java/htsjdk/samtools/SamHeaderRecordComparatorTest.java +++ b/src/test/java/htsjdk/samtools/SamHeaderRecordComparatorTest.java @@ -24,11 +24,12 @@ * THE SOFTWARE. */ +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -public class SamHeaderRecordComparatorTest { +public class SamHeaderRecordComparatorTest extends HtsjdkTest { @DataProvider(name="UsualSuspects") public Object[][] createData() { diff --git a/src/test/java/htsjdk/samtools/SamIndexesTest.java b/src/test/java/htsjdk/samtools/SamIndexesTest.java index d13001f67..f78b0f371 100644 --- a/src/test/java/htsjdk/samtools/SamIndexesTest.java +++ b/src/test/java/htsjdk/samtools/SamIndexesTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAIEntry; import htsjdk.samtools.cram.CRAIIndex; import htsjdk.samtools.seekablestream.SeekableFileStream; @@ -19,7 +20,7 @@ import java.util.List; import java.util.zip.GZIPOutputStream; -public class SamIndexesTest { +public class SamIndexesTest extends HtsjdkTest { @Test public void testEmptyBai() throws IOException { diff --git a/src/test/java/htsjdk/samtools/SamPairUtilTest.java b/src/test/java/htsjdk/samtools/SamPairUtilTest.java index 80841c906..f5c288a54 100644 --- a/src/test/java/htsjdk/samtools/SamPairUtilTest.java +++ b/src/test/java/htsjdk/samtools/SamPairUtilTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SamPairUtil.SetMateInfoIterator; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -32,7 +33,7 @@ import java.util.List; -public class SamPairUtilTest { +public class SamPairUtilTest extends HtsjdkTest { @Test(dataProvider = "testGetPairOrientation") public void testGetPairOrientation(final String testName, diff --git a/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java b/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java index 31ad5c259..c244f3c8b 100644 --- a/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java +++ b/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java @@ -1,11 +1,18 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.seekablestream.ISeekableStreamFactory; import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.seekablestream.SeekableHTTPStream; import htsjdk.samtools.seekablestream.SeekableStreamFactory; import htsjdk.samtools.util.*; +import java.net.URI; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Paths; +import java.util.function.Function; +import htsjdk.samtools.util.zip.InflaterFactory; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -22,8 +29,9 @@ import java.util.List; import java.util.Set; import java.util.function.BiFunction; +import java.util.zip.Inflater; -public class SamReaderFactoryTest { +public class SamReaderFactoryTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); private static final Log LOG = Log.getInstance(SamReaderFactoryTest.class); @@ -37,6 +45,34 @@ public void variousFormatReaderTest(final String inputFile) throws IOException { reader.close(); } + @Test + public void variousFormatReaderInflatorFactoryTest() throws IOException { + final String inputFile = "compressed.bam"; + final int[] inflateCalls = {0}; //Note: using an array is a HACK to fool the compiler + class MyInflater extends Inflater { + MyInflater(boolean gzipCompatible){ + super(gzipCompatible); + } + @Override + public int inflate(byte[] b, int off, int len) throws java.util.zip.DataFormatException { + inflateCalls[0]++; + return super.inflate(b, off, len); + } + } + final InflaterFactory myInflaterFactory = new InflaterFactory() { + @Override + public Inflater makeInflater(final boolean gzipCompatible) { + return new MyInflater(gzipCompatible); + } + }; + + final File input = new File(TEST_DATA_DIR, inputFile); + try (final SamReader reader = SamReaderFactory.makeDefault().inflaterFactory(myInflaterFactory).open(input)) { + for (final SAMRecord ignored : reader) { } + } + Assert.assertNotEquals(inflateCalls[0], 0, "Not using Inflater from InflateFactory on file : " + inputFile); + } + private int countRecordsInQueryInterval(final SamReader reader, final QueryInterval query) { final SAMRecordIterator iter = reader.queryOverlapping(new QueryInterval[] { query }); int count = 0; @@ -59,6 +95,40 @@ private int countRecords(final SamReader reader) { return count; } + private static SeekableByteChannel addHeader(SeekableByteChannel input) { + try { + int total = (int)input.size(); + final String comment = "@HD\tVN:1.0 SO:unsorted\n" + + "@SQ\tSN:chr1\tLN:101\n" + + "@SQ\tSN:chr2\tLN:101\n" + + "@SQ\tSN:chr3\tLN:101\n" + + "@RG\tID:0\tSM:JP was here\n"; + + byte[] commentBuf = comment.getBytes(); + ByteBuffer buf = ByteBuffer.allocate(total + commentBuf.length); + buf.put(commentBuf); + input.position(0); + while (input.read(buf)>0) { + // read until EOF + } + buf.flip(); + return new SeekableByteChannelFromBuffer(buf); + } catch (IOException x) { + throw new RuntimeException(x); + } + } + + @Test + public void testWrap() throws IOException { + final Path input = Paths.get(TEST_DATA_DIR.getPath(), "noheader.sam"); + final SamReader wrappedReader = + SamReaderFactory + .makeDefault() + .open(input, SamReaderFactoryTest::addHeader, null); + int records = countRecords(wrappedReader); + Assert.assertEquals(10, records); + } + // See https://github.com/samtools/htsjdk/issues/76 @Test(dataProvider = "queryIntervalIssue76TestCases") public void queryIntervalIssue76(final String sequenceName, final int start, final int end, final int expectedCount) throws IOException { @@ -94,11 +164,13 @@ public void queryIntervalIssue76(final String sequenceName, final int start, fin int samRecordsCreated; int bamRecordsCreated; + @Override public SAMRecord createSAMRecord(final SAMFileHeader header) { ++samRecordsCreated; return super.createSAMRecord(header); } + @Override public BAMRecord createBAMRecord(final SAMFileHeader header, final int referenceSequenceIndex, final int alignmentStart, final short readNameLength, final short mappingQuality, final int indexingBin, final int cigarLen, final int flags, final int readLen, final int mateReferenceSequenceIndex, final int mateAlignmentStart, final int insertSize, final byte[] variableLengthBlock) { ++bamRecordsCreated; return super.createBAMRecord(header, referenceSequenceIndex, alignmentStart, readNameLength, mappingQuality, indexingBin, cigarLen, flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock); @@ -194,7 +266,7 @@ private InputResource composeInputResourceForType(final InputResource.Type type, case FILE: return new FileInputResource(f); case PATH: - return new PathInputResource(f.toPath()); + return new PathInputResource(f.toPath(), Function.identity()); case URL: return new UrlInputResource(url); case SEEKABLE_STREAM: @@ -250,7 +322,6 @@ public void openPath() throws IOException { } } - final Set> observedRecordOrdering1 = new HashSet>(); final Set> observedRecordOrdering3 = new HashSet>(); final Set> observedRecordOrdering20 = new HashSet>(); diff --git a/src/test/java/htsjdk/samtools/SamReaderSortTest.java b/src/test/java/htsjdk/samtools/SamReaderSortTest.java index 584410fd0..4d712100b 100755 --- a/src/test/java/htsjdk/samtools/SamReaderSortTest.java +++ b/src/test/java/htsjdk/samtools/SamReaderSortTest.java @@ -3,7 +3,7 @@ /* * The MIT License * - * Copyright (c) 2009 The Broad Institute + * Copyright (c) 2009-2016 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -24,6 +24,8 @@ * THE SOFTWARE. */ +import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.ref.ReferenceSource; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -35,11 +37,17 @@ * * @author ktibbett@broadinstitute.org */ -public class SamReaderSortTest { +public class SamReaderSortTest extends HtsjdkTest { - public static final String COORDINATE_SORTED_FILE = "src/test/resources/htsjdk/samtools/coordinate_sorted.sam"; - public static final String QUERYNAME_SORTED_FILE = "src/test/resources/htsjdk/samtools/queryname_sorted.sam"; - public static final String QUERYNAME_SORTED_NO_HEADER_SORT = "src/test/resources/htsjdk/samtools/unsorted.sam"; + private static final String COORDINATE_SORTED_FILE = "src/test/resources/htsjdk/samtools/coordinate_sorted.sam"; + private static final String QUERYNAME_SORTED_FILE = "src/test/resources/htsjdk/samtools/queryname_sorted.sam"; + private static final String QUERYNAME_SORTED_NO_HEADER_SORT = "src/test/resources/htsjdk/samtools/unsorted.sam"; + private static final String CRAM_FILE = "src/test/resources/htsjdk/samtools/cram_query_sorted.cram"; + private static final String CRAM_REFERENCE = "src/test/resources/htsjdk/samtools/cram_query_sorted.fasta"; + private static final String CRAM_FILE_COORDINATE = "src/test/resources/htsjdk/samtools/cram/ce#tag_depadded.2.1.cram"; + private static final String CRAM_REFERENCE_COORDINATE = "src/test/resources/htsjdk/samtools/cram/ce.fa"; + private static final String CRAM_FILE_UNSORTED = "src/test/resources/htsjdk/samtools/cram/xx#unsorted.3.0.cram"; + private static final String CRAM_REFERENCE_UNSORTED = "src/test/resources/htsjdk/samtools/cram/xx.fa"; @Test(expectedExceptions = IllegalStateException.class) public void testSortsDisagree() throws Exception { @@ -93,6 +101,49 @@ public void testSortAssertionFails(String file, SAMFileHeader.SortOrder order) t } } + private CRAMFileReader getCramFileReader(String file, String fileReference) { + final ReferenceSource referenceSource = new ReferenceSource(new File(fileReference)); + return new CRAMFileReader(new File(file), referenceSource); + } + + @Test(dataProvider = "sortsCramWithoutIndex") + public void testCramSort(String file, String fileReference, SAMFileHeader.SortOrder order) throws Exception { + final CRAMFileReader cramFileReader = getCramFileReader(file, fileReference); + final SAMRecordIterator samRecordIterator = cramFileReader.getIterator().assertSorted(order); + Assert.assertTrue(samRecordIterator.hasNext()); + while (samRecordIterator.hasNext()) { + Assert.assertNotNull(samRecordIterator.next()); + } + } + + @Test(dataProvider = "sortsFailCramWithoutIndex", expectedExceptions = IllegalStateException.class) + public void testCramSortFail(String file, String fileReference, SAMFileHeader.SortOrder order) throws Exception { + final CRAMFileReader cramFileReader = getCramFileReader(file, fileReference); + final SAMRecordIterator samRecordIterator = cramFileReader.getIterator().assertSorted(order); + Assert.assertTrue(samRecordIterator.hasNext()); + while (samRecordIterator.hasNext()) { + Assert.assertNotNull(samRecordIterator.next()); + } + } + + @DataProvider(name = "sortsFailCramWithoutIndex") + public Object[][] getSortsFailCramWithoutIndex() { + return new Object[][]{ + {CRAM_FILE, CRAM_REFERENCE, SAMFileHeader.SortOrder.coordinate}, + {CRAM_FILE_COORDINATE, CRAM_REFERENCE_COORDINATE, SAMFileHeader.SortOrder.queryname}, + {CRAM_FILE_UNSORTED, CRAM_REFERENCE_UNSORTED, SAMFileHeader.SortOrder.coordinate} + }; + } + + @DataProvider(name = "sortsCramWithoutIndex") + public Object[][] getSortsCramWithoutIndex() { + return new Object[][]{ + {CRAM_FILE, CRAM_REFERENCE, SAMFileHeader.SortOrder.queryname}, + {CRAM_FILE_COORDINATE, CRAM_REFERENCE_COORDINATE, SAMFileHeader.SortOrder.coordinate}, + {CRAM_FILE_UNSORTED, CRAM_REFERENCE_UNSORTED, SAMFileHeader.SortOrder.unsorted} + }; + } + @DataProvider(name = "invalidSorts") public Object[][] getInvalidSorts() { return new Object[][]{ diff --git a/src/test/java/htsjdk/samtools/SamReaderTest.java b/src/test/java/htsjdk/samtools/SamReaderTest.java index 093dffbeb..4d4d05634 100644 --- a/src/test/java/htsjdk/samtools/SamReaderTest.java +++ b/src/test/java/htsjdk/samtools/SamReaderTest.java @@ -23,7 +23,7 @@ */ package htsjdk.samtools; -import htsjdk.samtools.cram.CRAMException; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; import org.testng.Assert; @@ -32,7 +32,7 @@ import java.io.File; -public class SamReaderTest { +public class SamReaderTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); @Test(dataProvider = "variousFormatReaderTestCases") @@ -104,11 +104,13 @@ public void CRAMNoIndexTest(final String inputFile, final String referenceFile) int samRecordsCreated; int bamRecordsCreated; + @Override public SAMRecord createSAMRecord(final SAMFileHeader header) { ++samRecordsCreated; return super.createSAMRecord(header); } + @Override public BAMRecord createBAMRecord(final SAMFileHeader header, final int referenceSequenceIndex, final int alignmentStart, final short readNameLength, final short mappingQuality, final int indexingBin, final int cigarLen, final int flags, final int readLen, final int mateReferenceSequenceIndex, final int mateAlignmentStart, final int insertSize, final byte[] variableLengthBlock) { ++bamRecordsCreated; return super.createBAMRecord(header, referenceSequenceIndex, alignmentStart, readNameLength, mappingQuality, indexingBin, cigarLen, flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock); diff --git a/src/test/java/htsjdk/samtools/SamSpecIntTest.java b/src/test/java/htsjdk/samtools/SamSpecIntTest.java index 8305065da..2ebc24e70 100644 --- a/src/test/java/htsjdk/samtools/SamSpecIntTest.java +++ b/src/test/java/htsjdk/samtools/SamSpecIntTest.java @@ -24,6 +24,7 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloserUtil; import org.testng.Assert; import org.testng.annotations.Test; @@ -33,7 +34,7 @@ import java.util.ArrayList; import java.util.List; -public class SamSpecIntTest { +public class SamSpecIntTest extends HtsjdkTest { private static final File SAM_INPUT = new File("src/test/resources/htsjdk/samtools/inttest.sam"); private static final File BAM_INPUT = new File("src/test/resources/htsjdk/samtools/inttest.bam"); diff --git a/src/test/java/htsjdk/samtools/SamStreamsTest.java b/src/test/java/htsjdk/samtools/SamStreamsTest.java index c92d6dbc0..48a074a8e 100644 --- a/src/test/java/htsjdk/samtools/SamStreamsTest.java +++ b/src/test/java/htsjdk/samtools/SamStreamsTest.java @@ -24,6 +24,7 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.seekablestream.SeekableStreamFactory; @@ -34,7 +35,7 @@ import java.io.*; import java.net.URL; -public class SamStreamsTest { +public class SamStreamsTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); @@ -121,4 +122,4 @@ public void sourceLikeBam( SeekableStreamFactory.getInstance().getStreamFor(new URL(resourceName)); Assert.assertEquals(SamStreams.sourceLikeBam(strm), expected); } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/samtools/SeekableByteChannelFromBuffer.java b/src/test/java/htsjdk/samtools/SeekableByteChannelFromBuffer.java new file mode 100644 index 000000000..63b036902 --- /dev/null +++ b/src/test/java/htsjdk/samtools/SeekableByteChannelFromBuffer.java @@ -0,0 +1,85 @@ +package htsjdk.samtools; + +import java.io.IOException; +import java.nio.Buffer; +import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.StandardOpenOption; + +/** + * A buffer-backed SeekableByteChannel, for testing. + */ +public class SeekableByteChannelFromBuffer implements SeekableByteChannel { + + private ByteBuffer buf; + private boolean open = true; + + public SeekableByteChannelFromBuffer(ByteBuffer buf) { + this.buf = buf; + } + + @Override + public int read(ByteBuffer dst) throws IOException { + if (buf.position() == buf.limit()) { + // signal EOF + return -1; + } + int before = dst.position(); + dst.put(buf); + return dst.position() - before; + } + + @Override + public int write(ByteBuffer src) throws IOException { + throw new IOException("read-only channel"); + } + + @Override + public long position() throws IOException { + checkOpen(); + return buf.position(); + } + + @Override + public SeekableByteChannel position(long newPosition) throws IOException { + checkOpen(); + buf.position((int)newPosition); + return this; + } + + @Override + public long size() throws IOException { + checkOpen(); + return buf.limit(); + } + + @Override + public SeekableByteChannel truncate(long size) throws IOException { + checkOpen(); + if (size <0) { + throw new IllegalArgumentException("negative size"); + } + if (size > buf.limit()) { + throw new IllegalArgumentException("size larger than current"); + } + buf.limit((int)size); + return null; + } + + @Override + public boolean isOpen() { + return open; + } + + @Override + public void close() throws IOException { + open = false; + } + + private void checkOpen() throws IOException { + if (!open) { + throw new ClosedChannelException(); + } + } +} diff --git a/src/test/java/htsjdk/samtools/SequenceNameTruncationAndValidationTest.java b/src/test/java/htsjdk/samtools/SequenceNameTruncationAndValidationTest.java index 2c3a95c6a..01999c481 100644 --- a/src/test/java/htsjdk/samtools/SequenceNameTruncationAndValidationTest.java +++ b/src/test/java/htsjdk/samtools/SequenceNameTruncationAndValidationTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloserUtil; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -36,7 +37,7 @@ * * @author alecw@broadinstitute.org */ -public class SequenceNameTruncationAndValidationTest { +public class SequenceNameTruncationAndValidationTest extends HtsjdkTest { private static File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); @Test(expectedExceptions = {SAMException.class}, dataProvider = "badSequenceNames") diff --git a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java index 4ce0b7a29..8aac6e2e3 100644 --- a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java +++ b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java @@ -24,9 +24,11 @@ package htsjdk.samtools; +import htsjdk.HtsjdkTest; import htsjdk.samtools.BamIndexValidator.IndexValidationStringency; import htsjdk.samtools.metrics.MetricBase; import htsjdk.samtools.metrics.MetricsFile; +import htsjdk.samtools.reference.FastaSequenceFile; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.util.CloserUtil; @@ -57,7 +59,7 @@ * * @author Doug Voet */ -public class ValidateSamFileTest { +public class ValidateSamFileTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/ValidateSamFileTest"); private static final int TERMINATION_GZIP_BLOCK_SIZE = 28; private static final int RANDOM_NUMBER_TRUNC_BYTE = 128; @@ -70,6 +72,20 @@ public void testValidSamFile() throws Exception { } @Test + public void testValidCRAMFileWithoutSeqDict() throws Exception { + final File reference = new File(TEST_DATA_DIR, "nm_tag_validation.fa"); + final SamReader samReader = SamReaderFactory + .makeDefault() + .validationStringency(ValidationStringency.SILENT) + .referenceSequence(reference) + .open(new File(TEST_DATA_DIR, "nm_tag_validation.cram")); + final Histogram results = executeValidation(samReader, + new FastaSequenceFile(reference, true), + IndexValidationStringency.EXHAUSTIVE); + Assert.assertTrue(!results.isEmpty()); + } + + @Test public void testSamFileVersion1pt5() throws Exception { final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, "test_samfile_version_1pt5.bam")); final Histogram results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE); @@ -103,7 +119,9 @@ public void testVerbose() throws IOException { validator.validateSamFileVerbose(samBuilder.getSamReader(), null); final int lineCount = results.toString().split("\n").length; - Assert.assertEquals(lineCount, 11); + Assert.assertEquals(lineCount, 11); // 1 extra message added to indicate maximum number of errors + Assert.assertEquals(validator.getNumErrors(), 6); + Assert.assertEquals(validator.getNumWarnings(), 4); } @Test @@ -129,6 +147,7 @@ public void testUnpairedRecords() throws IOException { Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_FIRST_OF_PAIR.getHistogramString()).getValue(), 1.0); Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_SECOND_OF_PAIR.getHistogramString()).getValue(), 1.0); Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_MATE_REF_INDEX.getHistogramString()).getValue(), 1.0); + Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_UNPAIRED_MATE_REFERENCE.getHistogramString()).getValue(), 1.0); } @Test @@ -155,6 +174,7 @@ public void testPairedRecords() throws IOException { Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_FLAG_MATE_UNMAPPED.getHistogramString()).getValue(), 1.0); Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_MATE_ALIGNMENT_START.getHistogramString()).getValue(), 2.0); Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_MATE_REF_INDEX.getHistogramString()).getValue(), 2.0); + Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_UNALIGNED_MATE_START.getHistogramString()).getValue(), 1.0); } @Test(dataProvider = "missingMateTestCases") @@ -214,6 +234,7 @@ public void testMappedRecords() throws IOException { Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_CIGAR.getHistogramString()).getValue(), 1.0); Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_READ_UNMAPPED.getHistogramString()).getValue(), 1.0); Assert.assertEquals(results.get(SAMValidationError.Type.MISSING_TAG_NM.getHistogramString()).getValue(), 1.0); + Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_CIGAR_SEQ_LENGTH.getHistogramString()).getValue(), 1.0); } @Test @@ -237,26 +258,32 @@ public void testNmFlagValidation() throws IOException { final Histogram results = executeValidation(samBuilder.getSamReader(), new ReferenceSequenceFile() { private int index = 0; + @Override public SAMSequenceDictionary getSequenceDictionary() { return null; } + @Override public ReferenceSequence nextSequence() { final byte[] bases = new byte[10000]; Arrays.fill(bases, (byte) 'A'); return new ReferenceSequence("foo", index++, bases); } + @Override public void reset() { this.index = 0; } + @Override public boolean isIndexed() { return false; } + @Override public ReferenceSequence getSequence(final String contig) { throw new UnsupportedOperationException(); } + @Override public ReferenceSequence getSubsequenceAt(final String contig, final long start, final long stop) { throw new UnsupportedOperationException(); } @@ -276,11 +303,10 @@ public void testMateCigarScenarios(final String scenario, final String inputFile throws Exception { final SamReader reader = SamReaderFactory.makeDefault().open(new File(TEST_DATA_DIR, inputFile)); final Histogram results = executeValidation(reader, null, IndexValidationStringency.EXHAUSTIVE); - Assert.assertNotNull(results.get(expectedError.getHistogramString())); - Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0); + Assert.assertNotNull(results.get(expectedError.getHistogramString()), scenario); + Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0, scenario); } - @DataProvider(name = "testMateCigarScenarios") public Object[][] testMateCigarScenarios() { return new Object[][]{ @@ -294,8 +320,8 @@ public void testTruncated(final String scenario, final String inputFile, final S throws Exception { final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, inputFile)); final Histogram results = executeValidation(reader, null, IndexValidationStringency.EXHAUSTIVE); - Assert.assertNotNull(results.get(expectedError.getHistogramString())); - Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0); + Assert.assertNotNull(results.get(expectedError.getHistogramString()), scenario); + Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0, scenario); } @DataProvider(name = "testTruncatedScenarios") @@ -376,9 +402,20 @@ public void testRedundantTags() throws Exception { public void testHeaderValidation() throws Exception { final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT) .open(new File(TEST_DATA_DIR, "buggyHeader.sam")); - final Histogram results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE); + final File referenceFile = new File(TEST_DATA_DIR, "../hg19mini.fasta"); + final ReferenceSequenceFile reference = new FastaSequenceFile(referenceFile, false); + final Histogram results = executeValidation(samReader, reference, IndexValidationStringency.EXHAUSTIVE); Assert.assertEquals(results.get(SAMValidationError.Type.UNRECOGNIZED_HEADER_TYPE.getHistogramString()).getValue(), 3.0); Assert.assertEquals(results.get(SAMValidationError.Type.HEADER_TAG_MULTIPLY_DEFINED.getHistogramString()).getValue(), 1.0); + Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_FILE_SEQ_DICT.getHistogramString()).getValue(), 1.0); + } + + @Test + public void testSeqQualMismatch() throws Exception { + final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT) + .open(new File(TEST_DATA_DIR, "seq_qual_len_mismatch.sam")); + final Histogram results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE); + Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_SEQ_QUAL_LENGTH.getHistogramString()).getValue(), 8.0); } @Test @@ -475,10 +512,24 @@ public void duplicateReadsOutOfOrder() throws Exception { "@RG\tID:0\tSM:Hi,Mom!\n" + "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; + final String SOTagCorrectlyProcessTestData = + "@HD\tVN:1.0\tSO:NOTKNOWN\n" + + "@SQ\tSN:chr1\tLN:101\n" + + "@RG\tID:0\tSM:Hi,Mom!\n" + + "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; + + final String GOTagCorrectlyProcessTestData = + "@HD\tVN:1.0\tGO:NOTKNOWN\n" + + "@SQ\tSN:chr1\tLN:101\n" + + "@RG\tID:0\tSM:Hi,Mom!\n" + + "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; + return new Object[][]{ {E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.E2_BASE_EQUALS_PRIMARY_BASE}, {E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_E2_LENGTH}, - {U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH} + {U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH}, + {SOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE}, + {GOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE} }; } @@ -500,16 +551,18 @@ public void tagCorrectlyProcessTest(byte[] bytesFromFile, @DataProvider(name = "validateBamFileTerminationData") public Object[][] validateBamFileTerminationData() throws IOException { return new Object[][]{ - {getBrokenFile(TERMINATION_GZIP_BLOCK_SIZE), SAMValidationError.Type.BAM_FILE_MISSING_TERMINATOR_BLOCK}, - {getBrokenFile(RANDOM_NUMBER_TRUNC_BYTE), SAMValidationError.Type.TRUNCATED_FILE} + {getBrokenFile(TERMINATION_GZIP_BLOCK_SIZE), SAMValidationError.Type.BAM_FILE_MISSING_TERMINATOR_BLOCK, 1, 0}, + {getBrokenFile(RANDOM_NUMBER_TRUNC_BYTE), SAMValidationError.Type.TRUNCATED_FILE, 0, 1} }; } @Test(dataProvider = "validateBamFileTerminationData") - public void validateBamFileTerminationTest(File file, SAMValidationError.Type errorType) throws IOException { + public void validateBamFileTerminationTest(final File file, final SAMValidationError.Type errorType, final int numWarnings, final int numErrors) throws IOException { final SamFileValidator samFileValidator = new SamFileValidator(new PrintWriter(System.out), 8000); samFileValidator.validateBamFileTermination(file); Assert.assertEquals(samFileValidator.getErrorsByType().get(errorType).getValue(), 1.0); + Assert.assertEquals(samFileValidator.getNumWarnings(), numWarnings); + Assert.assertEquals(samFileValidator.getNumErrors(), numErrors); } private Histogram executeValidation(final SamReader samReader, final ReferenceSequenceFile reference, diff --git a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java index 6cf49344b..d43f2fc14 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.structure.Container; import htsjdk.samtools.cram.structure.Slice; import org.testng.Assert; @@ -12,7 +13,7 @@ /** * Created by vadim on 25/08/2015. */ -public class CRAIEntryTest { +public class CRAIEntryTest extends HtsjdkTest { @Test public void testFromContainer() { diff --git a/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java b/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java index 7ebdb75e1..9e48d6b4e 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java @@ -1,23 +1,14 @@ package htsjdk.samtools.cram; -import htsjdk.samtools.BAMFileSpan; -import htsjdk.samtools.CRAMCRAIIndexer; -import htsjdk.samtools.DiskBasedBAMFileIndex; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.HtsjdkTest; +import htsjdk.samtools.*; import htsjdk.samtools.seekablestream.SeekableBufferedStream; import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.seekablestream.SeekableStream; import org.testng.Assert; import org.testng.annotations.Test; -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; +import java.io.*; import java.util.ArrayList; import java.util.List; import java.util.function.BiFunction; @@ -26,7 +17,7 @@ /** * Created by vadim on 25/08/2015. */ -public class CRAIIndexTest { +public class CRAIIndexTest extends HtsjdkTest { @Test public void testFind() throws IOException, CloneNotSupportedException { diff --git a/src/test/java/htsjdk/samtools/cram/LosslessRoundTripTest.java b/src/test/java/htsjdk/samtools/cram/LosslessRoundTripTest.java index 67cd4833c..1ae8e142a 100644 --- a/src/test/java/htsjdk/samtools/cram/LosslessRoundTripTest.java +++ b/src/test/java/htsjdk/samtools/cram/LosslessRoundTripTest.java @@ -1,31 +1,18 @@ package htsjdk.samtools.cram; -import htsjdk.samtools.CRAMFileReader; -import htsjdk.samtools.CRAMFileWriter; -import htsjdk.samtools.Cigar; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMReadGroupRecord; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.ValidationStringency; +import htsjdk.HtsjdkTest; +import htsjdk.samtools.*; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.reference.InMemoryReferenceSequenceFile; import org.testng.Assert; import org.testng.annotations.Test; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; +import java.io.*; /** * Created by vadim on 19/02/2016. */ -public class LosslessRoundTripTest { +public class LosslessRoundTripTest extends HtsjdkTest { @Test public void test_MD_NM() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); diff --git a/src/test/java/htsjdk/samtools/cram/VersionTest.java b/src/test/java/htsjdk/samtools/cram/VersionTest.java index 0602eb376..be2851eb6 100644 --- a/src/test/java/htsjdk/samtools/cram/VersionTest.java +++ b/src/test/java/htsjdk/samtools/cram/VersionTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram; +import htsjdk.HtsjdkTest; import htsjdk.samtools.CRAMFileWriter; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; @@ -26,7 +27,7 @@ /** * Created by vadim on 18/02/2016. */ -public class VersionTest { +public class VersionTest extends HtsjdkTest { /** * The test purpose is to ensure that a CRAM written by {@link CRAMFileWriter} adheres to CRAM3 specs expectations: * 1. version 3.+, via both actual byte comparison and CramIO API diff --git a/src/test/java/htsjdk/samtools/cram/build/CompressionHeaderFactoryTest.java b/src/test/java/htsjdk/samtools/cram/build/CompressionHeaderFactoryTest.java index a3d91cdc7..8e39d9f76 100644 --- a/src/test/java/htsjdk/samtools/cram/build/CompressionHeaderFactoryTest.java +++ b/src/test/java/htsjdk/samtools/cram/build/CompressionHeaderFactoryTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.build; +import htsjdk.HtsjdkTest; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.cram.encoding.readfeatures.Substitution; import htsjdk.samtools.cram.structure.CompressionHeader; @@ -17,7 +18,7 @@ /** * Created by vadim on 07/01/2016. */ -public class CompressionHeaderFactoryTest { +public class CompressionHeaderFactoryTest extends HtsjdkTest { @Test public void testAllEncodingsPresent() { final CompressionHeader header = new CompressionHeaderFactory().build(new ArrayList<>(), new SubstitutionMatrix(new long[256][256]), true); diff --git a/src/test/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java b/src/test/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java index cb004a729..cf4f91e51 100644 --- a/src/test/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java +++ b/src/test/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.build; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMSequenceRecord; @@ -19,7 +20,7 @@ /** * Created by vadim on 15/12/2015. */ -public class ContainerFactoryTest { +public class ContainerFactoryTest extends HtsjdkTest { @Test public void testUnmapped() throws IOException, IllegalAccessException { diff --git a/src/test/java/htsjdk/samtools/cram/build/ContainerParserTest.java b/src/test/java/htsjdk/samtools/cram/build/ContainerParserTest.java index fe25ce667..b16dc0f15 100644 --- a/src/test/java/htsjdk/samtools/cram/build/ContainerParserTest.java +++ b/src/test/java/htsjdk/samtools/cram/build/ContainerParserTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.build; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.ValidationStringency; @@ -24,7 +25,7 @@ /** * Created by vadim on 11/01/2016. */ -public class ContainerParserTest { +public class ContainerParserTest extends HtsjdkTest { @Test public void testEOF() throws IOException, IllegalAccessException { diff --git a/src/test/java/htsjdk/samtools/cram/build/CramIOTest.java b/src/test/java/htsjdk/samtools/cram/build/CramIOTest.java index 1035f242e..bab50dc44 100644 --- a/src/test/java/htsjdk/samtools/cram/build/CramIOTest.java +++ b/src/test/java/htsjdk/samtools/cram/build/CramIOTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.build; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.cram.common.CramVersions; @@ -15,7 +16,7 @@ /** * Created by vadim on 25/08/2015. */ -public class CramIOTest { +public class CramIOTest extends HtsjdkTest { @Test public void testCheckHeaderAndEOF_v2() throws IOException { final String id = "testid"; diff --git a/src/test/java/htsjdk/samtools/cram/build/Sam2CramRecordFactoryTest.java b/src/test/java/htsjdk/samtools/cram/build/Sam2CramRecordFactoryTest.java new file mode 100644 index 000000000..088f4f32f --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/build/Sam2CramRecordFactoryTest.java @@ -0,0 +1,109 @@ +package htsjdk.samtools.cram.build; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMUtils; +import htsjdk.samtools.cram.common.CramVersions; +import htsjdk.samtools.cram.encoding.readfeatures.ReadBase; +import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature; +import htsjdk.samtools.cram.encoding.readfeatures.Substitution; +import htsjdk.samtools.cram.structure.CramCompressionRecord; +import htsjdk.samtools.util.SequenceUtil; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +/** + * Created by vadim on 06/06/2017. + */ +public class Sam2CramRecordFactoryTest { + + /** + * This checks that all read bases returned in the record from {@link Sam2CramRecordFactory#createCramRecord(SAMRecord)} + * are from the BAM read base set. + */ + @Test + public void testReadBaseNormalization() { + final SAMFileHeader header = new SAMFileHeader(); + + final SAMRecord record = new SAMRecord(header); + record.setReadName("test"); + record.setReadUnmappedFlag(true); + record.setReadBases(SequenceUtil.getIUPACCodesString().getBytes()); + record.setBaseQualities(SAMRecord.NULL_QUALS); + + final Sam2CramRecordFactory sam2CramRecordFactory = new Sam2CramRecordFactory(null, header, CramVersions.CRAM_v3); + final CramCompressionRecord cramRecord = sam2CramRecordFactory.createCramRecord(record); + + Assert.assertNotEquals(cramRecord.readBases, record.getReadBases()); + Assert.assertEquals(cramRecord.readBases, SequenceUtil.toBamReadBasesInPlace(record.getReadBases())); + } + + @DataProvider(name = "emptyFeatureListProvider") + public Object[][] testPositive() { + return new Object[][]{ + // a matching base + {"A", "A", "!"}, + // a matching ambiguity base + {"R", "R", "!"}, + }; + } + + @Test(dataProvider = "emptyFeatureListProvider") + public void testAddMismatchReadFeaturesNoReadFeaturesForMatch(final String refBases, final String readBases, final String fastqScores) { + final List readFeatures = buildMatchOrMismatchReadFeatures(refBases, readBases, fastqScores); + Assert.assertTrue(readFeatures.isEmpty()); + } + + /** + * Test the outcome of a ACGTN mismatch. + * The result should always be a {@link Substitution} read feature. + */ + @Test + public void testAddMismatchReadFeaturesSingleSubstitution() { + final List readFeatures = buildMatchOrMismatchReadFeatures("A", "C", "!"); + + Assert.assertEquals(1, readFeatures.size()); + + final ReadFeature rf = readFeatures.get(0); + Assert.assertTrue(rf instanceof Substitution); + final Substitution substitution = (Substitution) rf; + Assert.assertEquals(1, substitution.getPosition()); + Assert.assertEquals('C', substitution.getBase()); + Assert.assertEquals('A', substitution.getReferenceBase()); + } + + /** + * Test the outcome of non-ACGTN ref and read bases mismatching each other. + * The result should be explicit read base and score capture via {@link ReadBase}. + */ + @Test + public void testAddMismatchReadFeaturesAmbiguityMismatch() { + final List readFeatures = buildMatchOrMismatchReadFeatures("R", "F", "1"); + Assert.assertEquals(1, readFeatures.size()); + + final ReadFeature rf = readFeatures.get(0); + Assert.assertTrue(rf instanceof ReadBase); + final ReadBase readBaseFeature = (ReadBase) rf; + Assert.assertEquals(1, readBaseFeature.getPosition()); + Assert.assertEquals('F', readBaseFeature.getBase()); + Assert.assertEquals(SAMUtils.fastqToPhred('1'), readBaseFeature.getQualityScore()); + } + + private List buildMatchOrMismatchReadFeatures(final String refBases, final String readBases, final String scores) { + final SAMFileHeader header = new SAMFileHeader(); + final CramCompressionRecord record = new CramCompressionRecord(); + record.alignmentStart = 1; + final List readFeatures = new ArrayList<>(); + final int fromPosInRead = 0; + final int alignmentStartOffset = 0; + final int nofReadBases = 1; + + final Sam2CramRecordFactory sam2CramRecordFactory = new Sam2CramRecordFactory(refBases.getBytes(), header, CramVersions.CRAM_v3); + sam2CramRecordFactory.addMismatchReadFeatures(record.alignmentStart, readFeatures, fromPosInRead, alignmentStartOffset, nofReadBases, readBases.getBytes(), SAMUtils.fastqToPhred(scores)); + return readFeatures; + } +} diff --git a/src/test/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanTest.java b/src/test/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanTest.java index f2ca2f2b1..fd24c6b8e 100644 --- a/src/test/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanTest.java +++ b/src/test/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.encoding.huffman.codec; +import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.io.DefaultBitInputStream; import htsjdk.samtools.cram.io.DefaultBitOutputStream; import htsjdk.samtools.cram.structure.ReadTag; @@ -13,7 +14,7 @@ /** * Created by vadim on 22/04/2015. */ -public class HuffmanTest { +public class HuffmanTest extends HtsjdkTest { @Test public void testHuffmanIntHelper() throws IOException { int size = 1000000; diff --git a/src/test/java/htsjdk/samtools/cram/encoding/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/encoding/rans/RansTest.java index ca846863b..8e05a12f1 100644 --- a/src/test/java/htsjdk/samtools/cram/encoding/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/encoding/rans/RansTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.encoding.rans; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; @@ -10,7 +11,7 @@ /** * Created by vadim on 22/04/2015. */ -public class RansTest { +public class RansTest extends HtsjdkTest { @Test public void testEmpty() { roundTrip(new byte[0]); diff --git a/src/test/java/htsjdk/samtools/cram/io/ITF8Test.java b/src/test/java/htsjdk/samtools/cram/io/ITF8Test.java index 5d95d2cc7..a206ad1f0 100644 --- a/src/test/java/htsjdk/samtools/cram/io/ITF8Test.java +++ b/src/test/java/htsjdk/samtools/cram/io/ITF8Test.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.io; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.Tuple; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -16,7 +17,7 @@ /** * Created by vadim on 03/02/2015. */ -public class ITF8Test { +public class ITF8Test extends HtsjdkTest { private ExposedByteArrayOutputStream testBAOS; private ByteArrayInputStream testBAIS; diff --git a/src/test/java/htsjdk/samtools/cram/io/LTF8Test.java b/src/test/java/htsjdk/samtools/cram/io/LTF8Test.java index 510379732..03d310dde 100644 --- a/src/test/java/htsjdk/samtools/cram/io/LTF8Test.java +++ b/src/test/java/htsjdk/samtools/cram/io/LTF8Test.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.io; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; @@ -14,7 +15,7 @@ /** * Created by vadim on 03/02/2015. */ -public class LTF8Test { +public class LTF8Test extends HtsjdkTest { private ExposedByteArrayOutputStream ltf8TestBAOS; private ByteArrayInputStream ltf8TestBAIS; diff --git a/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java b/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java index 34b4676d9..73859a46a 100644 --- a/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java +++ b/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.lossy; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SamInputResource; @@ -23,7 +24,7 @@ import static org.testng.Assert.*; -public class QualityScorePreservationTest { +public class QualityScorePreservationTest extends HtsjdkTest { @Test public void test1() { @@ -96,12 +97,10 @@ public void test2() { } } - private SAMFileHeader samFileHeader = new SAMFileHeader(); - private SAMRecord buildSAMRecord(String seqName, String line) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { - baos.write("@HD\tVN:1.0\tGO:none SO:coordinate\n".getBytes()); + baos.write("@HD\tVN:1.0\tGO:none\tSO:coordinate\n".getBytes()); baos.write(("@SQ\tSN:" + seqName + "\tLN:247249719\n").getBytes()); baos.write(line.replaceAll("\\s+", "\t").getBytes()); baos.close(); @@ -120,7 +119,7 @@ private SAMRecord buildSAMRecord(String seqName, String line) { @Test public void test3() { - String line1 = "98573 0 20 1 10 40M * 0 0 AAAAAAAAAA !!!!!!!!!!"; + String line1 = "98573 0 20 1 10 10M * 0 0 AAAAAAAAAA !!!!!!!!!!"; String seqName = "20"; byte[] ref = new byte[40]; diff --git a/src/test/java/htsjdk/samtools/cram/ref/EnaRefServiceTest.java b/src/test/java/htsjdk/samtools/cram/ref/EnaRefServiceTest.java index 852a513b4..7f537843e 100644 --- a/src/test/java/htsjdk/samtools/cram/ref/EnaRefServiceTest.java +++ b/src/test/java/htsjdk/samtools/cram/ref/EnaRefServiceTest.java @@ -1,11 +1,12 @@ package htsjdk.samtools.cram.ref; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; import java.io.IOException; -public class EnaRefServiceTest { +public class EnaRefServiceTest extends HtsjdkTest { @Test public void test() throws IOException, EnaRefService.GaveUpException { diff --git a/src/test/java/htsjdk/samtools/cram/ref/ReferenceSourceTest.java b/src/test/java/htsjdk/samtools/cram/ref/ReferenceSourceTest.java new file mode 100644 index 000000000..34ae95b1d --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/ref/ReferenceSourceTest.java @@ -0,0 +1,33 @@ +package htsjdk.samtools.cram.ref; + +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.reference.InMemoryReferenceSequenceFile; +import htsjdk.samtools.util.SequenceUtil; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Created by vadim on 29/06/2017. + */ +public class ReferenceSourceTest { + + @Test + public void testReferenceSourceUpperCasesBases() { + final String sequenceName = "1"; + final String nonIupacCharacters = "1=eE"; + final byte[] originalRefBases = (nonIupacCharacters + SequenceUtil.getIUPACCodesString()).getBytes(); + SAMSequenceRecord sequenceRecord = new SAMSequenceRecord(sequenceName, originalRefBases.length); + + InMemoryReferenceSequenceFile memoryReferenceSequenceFile = new InMemoryReferenceSequenceFile(); + memoryReferenceSequenceFile.add(sequenceName, Arrays.copyOf(originalRefBases, originalRefBases.length)); + Assert.assertEquals(memoryReferenceSequenceFile.getSequence(sequenceName).getBases(), originalRefBases); + + ReferenceSource referenceSource = new ReferenceSource(memoryReferenceSequenceFile); + byte[] refBasesFromSource = referenceSource.getReferenceBases(sequenceRecord, false); + + Assert.assertNotEquals(refBasesFromSource, originalRefBases); + Assert.assertEquals(refBasesFromSource, SequenceUtil.upperCase(originalRefBases)); + } +} diff --git a/src/test/java/htsjdk/samtools/cram/structure/CramCompressionRecordTest.java b/src/test/java/htsjdk/samtools/cram/structure/CramCompressionRecordTest.java index 03360bd6b..a455476fa 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/CramCompressionRecordTest.java +++ b/src/test/java/htsjdk/samtools/cram/structure/CramCompressionRecordTest.java @@ -1,11 +1,8 @@ package htsjdk.samtools.cram.structure; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.cram.encoding.readfeatures.Deletion; -import htsjdk.samtools.cram.encoding.readfeatures.InsertBase; -import htsjdk.samtools.cram.encoding.readfeatures.Insertion; -import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature; -import htsjdk.samtools.cram.encoding.readfeatures.SoftClip; +import htsjdk.samtools.cram.encoding.readfeatures.*; import org.testng.Assert; import org.testng.annotations.Test; @@ -14,7 +11,7 @@ /** * Created by vadim on 28/09/2015. */ -public class CramCompressionRecordTest { +public class CramCompressionRecordTest extends HtsjdkTest { @Test public void test_getAlignmentEnd() { CramCompressionRecord r = new CramCompressionRecord(); diff --git a/src/test/java/htsjdk/samtools/cram/structure/ReadTagTest.java b/src/test/java/htsjdk/samtools/cram/structure/ReadTagTest.java index 3ed0b4006..314fd2498 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/ReadTagTest.java +++ b/src/test/java/htsjdk/samtools/cram/structure/ReadTagTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.cram.structure; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.ValidationStringency; @@ -31,14 +32,9 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; -public class ReadTagTest { +public class ReadTagTest extends HtsjdkTest { @Test public void test () { diff --git a/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java b/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java index c52dccba1..eeb34ee09 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java +++ b/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java @@ -1,7 +1,7 @@ package htsjdk.samtools.cram.structure; +import htsjdk.HtsjdkTest; import htsjdk.samtools.CRAMFileReader; -import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.cram.CRAMException; @@ -17,7 +17,7 @@ /** * Created by vadim on 07/12/2015. */ -public class SliceTests { +public class SliceTests extends HtsjdkTest { @Test public void testUnmappedValidateRef() { Slice slice = new Slice(); diff --git a/src/test/java/htsjdk/samtools/cram/structure/SubstitutionMatrixTest.java b/src/test/java/htsjdk/samtools/cram/structure/SubstitutionMatrixTest.java index 31e770832..625118923 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/SubstitutionMatrixTest.java +++ b/src/test/java/htsjdk/samtools/cram/structure/SubstitutionMatrixTest.java @@ -1,17 +1,15 @@ package htsjdk.samtools.cram.structure; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.BeforeGroups; import org.testng.annotations.DataProvider; -import org.testng.annotations.Parameters; import org.testng.annotations.Test; -import java.util.Arrays; - /** * Created by Vadim on 12/03/2015. */ -public class SubstitutionMatrixTest { +public class SubstitutionMatrixTest extends HtsjdkTest { SubstitutionMatrix m; long[][] freqs; diff --git a/src/test/java/htsjdk/samtools/fastq/FastqEncoderTest.java b/src/test/java/htsjdk/samtools/fastq/FastqEncoderTest.java new file mode 100644 index 000000000..c367397a3 --- /dev/null +++ b/src/test/java/htsjdk/samtools/fastq/FastqEncoderTest.java @@ -0,0 +1,76 @@ +/* + * The MIT License + * + * Copyright (c) 2016 Daniel Gomez-Sanchez + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.fastq; + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordSetBuilder; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * @author Daniel Gomez-Sanchez (magicDGS) + */ +public class FastqEncoderTest extends HtsjdkTest { + + @Test + public void testAsFastqRecord() throws Exception { + final SAMRecord record = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "10M", null, 2); + record.setReadPairedFlag(true); + // test first of pair encoding + record.setFirstOfPairFlag(true); + testRecord(record.getReadName() + FastqConstants.FIRST_OF_PAIR, FastqEncoder.asFastqRecord(record), record); + record.setFirstOfPairFlag(false); + record.setSecondOfPairFlag(true); + testRecord(record.getReadName() + FastqConstants.SECOND_OF_PAIR, FastqEncoder.asFastqRecord(record), record); + record.setSecondOfPairFlag(false); + testRecord(record.getReadName(), FastqEncoder.asFastqRecord(record), record); + } + + private void testRecord(final String expectedReadName, final FastqRecord fastqRecord, final SAMRecord samRecord) { + Assert.assertEquals(fastqRecord.getReadName(), expectedReadName); + Assert.assertEquals(fastqRecord.getBaseQualities(), samRecord.getBaseQualities()); + Assert.assertEquals(fastqRecord.getReadBases(), samRecord.getReadBases()); + Assert.assertNull(fastqRecord.getBaseQualityHeader()); + } + + @Test + public void testAsSAMRecord() throws Exception { + // create a random record + final SAMRecord samRecord = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "10M", null, 2); + FastqRecord fastqRecord = new FastqRecord(samRecord.getReadName(), samRecord.getReadBases(), "", samRecord.getBaseQualities()); + testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); + fastqRecord = new FastqRecord(samRecord.getReadName() + FastqConstants.FIRST_OF_PAIR, samRecord.getReadBases(), "", samRecord.getBaseQualities()); + testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); + fastqRecord = new FastqRecord(samRecord.getReadName() + FastqConstants.SECOND_OF_PAIR, samRecord.getReadBases(), "", samRecord.getBaseQualities()); + testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); + } + + private void testConvertedSAMRecord(final SAMRecord converted, final SAMRecord original) { + Assert.assertEquals(converted.getReadName(), original.getReadName()); + Assert.assertEquals(converted.getBaseQualities(), original.getBaseQualities()); + Assert.assertEquals(converted.getReadBases(), original.getReadBases()); + Assert.assertTrue(converted.getReadUnmappedFlag()); + } +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java b/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java index f6f238eab..9a47a8688 100644 --- a/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java +++ b/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java @@ -1,9 +1,13 @@ package htsjdk.samtools.fastq; +import htsjdk.HtsjdkTest; +import htsjdk.samtools.util.TestUtil; import org.testng.Assert; import org.testng.annotations.Test; -public final class FastqRecordTest { +import java.util.ArrayList; + +public final class FastqRecordTest extends HtsjdkTest { @Test public void testBasic() { @@ -15,7 +19,7 @@ public void testBasic() { Assert.assertNull(fastqRecord.getBaseQualityHeader()); - Assert.assertEquals(fastqRecord.getReadHeader(), seqHeaderPrefix); + Assert.assertEquals(fastqRecord.getReadName(), seqHeaderPrefix); Assert.assertEquals(fastqRecord.getBaseQualityString(), qualLine); Assert.assertEquals(fastqRecord.getReadString(), seqLine); Assert.assertNotNull(fastqRecord.toString());//just check not nullness @@ -25,9 +29,9 @@ public void testBasic() { Assert.assertEquals(fastqRecord, fastqRecord); Assert.assertNotEquals(fastqRecord, "fred"); Assert.assertNotEquals("fred", fastqRecord); - Assert.assertEquals(fastqRecord.length(), seqLine.length()); + Assert.assertEquals(fastqRecord.getReadLength(), seqLine.length()); Assert.assertEquals(fastqRecord.getBaseQualityString().length(), fastqRecord.getReadString().length()); - Assert.assertEquals(fastqRecord.getReadString().length(), fastqRecord.length()); + Assert.assertEquals(fastqRecord.getReadString().length(), fastqRecord.getReadLength()); } @Test @@ -37,7 +41,7 @@ public void testBasicEmptyHeaderPrefix() { final String qualHeaderPrefix = ""; final String qualLine = ";<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; final FastqRecord fastqRecord = new FastqRecord(seqHeaderPrefix, seqLine, qualHeaderPrefix, qualLine); - Assert.assertNull(fastqRecord.getReadHeader()); + Assert.assertNull(fastqRecord.getReadName()); Assert.assertNull(fastqRecord.getBaseQualityHeader()); } @@ -57,6 +61,11 @@ public void testCopy() { Assert.assertSame(fastqRecord.getBaseQualityHeader(), fastqRecordCopy.getBaseQualityHeader()); } + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNullCopy() { + new FastqRecord(null); + } + @Test public void testNullSeq() { final String seqHeaderPrefix = "header"; @@ -201,4 +210,14 @@ public void testNotEqualLengths() { new FastqRecord("header", seqLine1, "qualHeaderPrefix", qualLine1); //Note: this does not blow up now but it will once we enforce that seqLine and qualLine be the same length } -} \ No newline at end of file + + @Test + public void testFastqSerialize() throws Exception { + final ArrayList records = new ArrayList<>(); + records.add(new FastqRecord("q1", "ACGTACGT", "", "########")); + records.add(new FastqRecord("q2", "CCAGCGTAATA", "", "????????###")); + records.add(new FastqRecord("q3", "NNNNNNNNNNNN", "", "############")); + + Assert.assertEquals(TestUtil.serializeAndDeserialize(records),records); + } +} diff --git a/src/test/java/htsjdk/samtools/fastq/FastqWriterTest.java b/src/test/java/htsjdk/samtools/fastq/FastqWriterTest.java deleted file mode 100644 index eba5c5b9f..000000000 --- a/src/test/java/htsjdk/samtools/fastq/FastqWriterTest.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * The MIT License - * - * Pierre Lindenbaum PhD - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.fastq; - -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import htsjdk.samtools.util.TestUtil; - -import java.io.File; -import java.util.ArrayList; - -/** - * test fastq - */ -public class FastqWriterTest { - private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/util/QualityEncodingDetectorTest"); - - @DataProvider(name = "fastqsource") - public Object[][] createTestData() { - return new Object[][]{ - {"solexa_full_range_as_solexa.fastq"}, - {"5k-30BB2AAXX.3.aligned.sam.fastq"} - }; - } - - @Test(dataProvider = "fastqsource") - public void testReadReadWriteFastq(final String basename) throws Exception { - final File tmpFile = File.createTempFile("test.", ".fastq"); - tmpFile.deleteOnExit(); - final FastqReader fastqReader = new FastqReader(new File(TEST_DATA_DIR,basename)); - final FastqWriterFactory writerFactory = new FastqWriterFactory(); - final FastqWriter fastqWriter = writerFactory.newWriter(tmpFile); - for(final FastqRecord rec: fastqReader) fastqWriter.write(rec); - fastqWriter.close(); - fastqReader.close(); - } - - @Test(dataProvider = "fastqsource") - public void testFastqSerialize(final String basename) throws Exception { - //write - final ArrayList records = new ArrayList<>(); - final FastqReader fastqReader = new FastqReader(new File(TEST_DATA_DIR,basename)); - for(final FastqRecord rec: fastqReader) { - records.add(rec); - if(records.size()>100) break; - } - fastqReader.close(); - Assert.assertEquals(TestUtil.serializeAndDeserialize(records),records); - } -} diff --git a/src/test/java/htsjdk/samtools/filter/FailsVendorReadQualityFilterTest.java b/src/test/java/htsjdk/samtools/filter/FailsVendorReadQualityFilterTest.java index cb2cb0545..ed83f094b 100644 --- a/src/test/java/htsjdk/samtools/filter/FailsVendorReadQualityFilterTest.java +++ b/src/test/java/htsjdk/samtools/filter/FailsVendorReadQualityFilterTest.java @@ -23,13 +23,14 @@ */ package htsjdk.samtools.filter; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordSetBuilder; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -public class FailsVendorReadQualityFilterTest { +public class FailsVendorReadQualityFilterTest extends HtsjdkTest { private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); private final FailsVendorReadQualityFilter filter = new FailsVendorReadQualityFilter(); diff --git a/src/test/java/htsjdk/samtools/filter/InsertSizeFilterTest.java b/src/test/java/htsjdk/samtools/filter/InsertSizeFilterTest.java index fc4937da4..48d8edc15 100644 --- a/src/test/java/htsjdk/samtools/filter/InsertSizeFilterTest.java +++ b/src/test/java/htsjdk/samtools/filter/InsertSizeFilterTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.filter; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordSetBuilder; import org.testng.Assert; @@ -7,7 +8,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -public class InsertSizeFilterTest { +public class InsertSizeFilterTest extends HtsjdkTest { private static final int READ_LENGTH = 20; private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); diff --git a/src/test/java/htsjdk/samtools/filter/IntervalKeepPairFilterTest.java b/src/test/java/htsjdk/samtools/filter/IntervalKeepPairFilterTest.java index 3d30255f5..7d3c23e79 100644 --- a/src/test/java/htsjdk/samtools/filter/IntervalKeepPairFilterTest.java +++ b/src/test/java/htsjdk/samtools/filter/IntervalKeepPairFilterTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.filter; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMRecordSetBuilder; import htsjdk.samtools.util.CollectionUtil; import org.testng.Assert; @@ -11,7 +12,7 @@ import java.util.ArrayList; import java.util.stream.StreamSupport; -public class IntervalKeepPairFilterTest { +public class IntervalKeepPairFilterTest extends HtsjdkTest { private static final int READ_LENGTH = 151; private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); diff --git a/src/test/java/htsjdk/samtools/filter/JavascriptSamRecordFilterTest.java b/src/test/java/htsjdk/samtools/filter/JavascriptSamRecordFilterTest.java index 78355760a..043f24d46 100644 --- a/src/test/java/htsjdk/samtools/filter/JavascriptSamRecordFilterTest.java +++ b/src/test/java/htsjdk/samtools/filter/JavascriptSamRecordFilterTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.filter; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; @@ -39,7 +40,7 @@ * @author Pierre Lindenbaum PhD Institut du Thorax - INSERM - Nantes - France */ -public class JavascriptSamRecordFilterTest { +public class JavascriptSamRecordFilterTest extends HtsjdkTest { final File testDir = new File("./src/test/resources/htsjdk/samtools"); @DataProvider diff --git a/src/test/java/htsjdk/samtools/filter/MappingQualityFilterTest.java b/src/test/java/htsjdk/samtools/filter/MappingQualityFilterTest.java index 2bffcd64a..9d9f7b819 100644 --- a/src/test/java/htsjdk/samtools/filter/MappingQualityFilterTest.java +++ b/src/test/java/htsjdk/samtools/filter/MappingQualityFilterTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.filter; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordSetBuilder; import org.testng.Assert; @@ -7,7 +8,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -public class MappingQualityFilterTest { +public class MappingQualityFilterTest extends HtsjdkTest { private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); @BeforeTest diff --git a/src/test/java/htsjdk/samtools/filter/OverclippedReadFilterTest.java b/src/test/java/htsjdk/samtools/filter/OverclippedReadFilterTest.java index bff84918c..e154e40ec 100644 --- a/src/test/java/htsjdk/samtools/filter/OverclippedReadFilterTest.java +++ b/src/test/java/htsjdk/samtools/filter/OverclippedReadFilterTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.filter; +import htsjdk.HtsjdkTest; import htsjdk.samtools.Cigar; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordSetBuilder; @@ -31,7 +32,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -public class OverclippedReadFilterTest { +public class OverclippedReadFilterTest extends HtsjdkTest { private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); private final int unclippedBasesThreshold = 30; diff --git a/src/test/java/htsjdk/samtools/filter/SolexaNoiseFilterTest.java b/src/test/java/htsjdk/samtools/filter/SolexaNoiseFilterTest.java index 96fa324b9..5ea20d406 100644 --- a/src/test/java/htsjdk/samtools/filter/SolexaNoiseFilterTest.java +++ b/src/test/java/htsjdk/samtools/filter/SolexaNoiseFilterTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.filter; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordSetBuilder; import org.testng.Assert; @@ -32,7 +33,7 @@ /** * Basic test for the SolexaNoiseFilter */ -public class SolexaNoiseFilterTest { +public class SolexaNoiseFilterTest extends HtsjdkTest { private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); private final SolexaNoiseFilter filter = new SolexaNoiseFilter(); diff --git a/src/test/java/htsjdk/samtools/filter/TagFilterTest.java b/src/test/java/htsjdk/samtools/filter/TagFilterTest.java index 6e0c70293..d885cbe9f 100644 --- a/src/test/java/htsjdk/samtools/filter/TagFilterTest.java +++ b/src/test/java/htsjdk/samtools/filter/TagFilterTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.filter; +import htsjdk.HtsjdkTest; import htsjdk.samtools.ReservedTagConstants; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordSetBuilder; @@ -36,7 +37,7 @@ /** * Tests for the TagFilter class */ -public class TagFilterTest { +public class TagFilterTest extends HtsjdkTest { private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); @@ -74,4 +75,4 @@ public void testTagFilter(final String testName, final String tag, final List expected = Files.lines(expectedFai.toPath()); + final Stream created = Files.lines(createdFai.toPath())) { + final List expectedLines = expected.filter(String::isEmpty).collect(Collectors.toList()); + final List createdLines = created.filter(String::isEmpty).collect(Collectors.toList()); + Assert.assertEquals(expectedLines, createdLines); + } + + // load the tmp index and check that both are the same + Assert.assertEquals(new FastaSequenceIndex(createdFai), new FastaSequenceIndex(expectedFai)); + } + +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java b/src/test/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java index bfef121df..c6fa1384a 100644 --- a/src/test/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java +++ b/src/test/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java @@ -24,19 +24,26 @@ package htsjdk.samtools.reference; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMException; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; +import java.io.FileReader; +import java.nio.file.Files; import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Test the fasta sequence index reader. */ -public class FastaSequenceIndexTest { +public class FastaSequenceIndexTest extends HtsjdkTest { private static File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/reference"); @DataProvider(name="homosapiens") @@ -253,4 +260,30 @@ public void testSpecialCharacters(FastaSequenceIndex specialCharactersIndex) { Assert.assertEquals(ent.getBasesPerLine(),70,"Contig file:gi|17981852|ref|NC_001807.4| bases per line is not correct"); Assert.assertEquals(ent.getBytesPerLine(),71,"Contig file:gi|17981852|ref|NC_001807.4| bytes per line is not correct"); } + + @Test + public void testWrite() throws Exception { + // gets the original file and index + final File originalFile = new File(TEST_DATA_DIR, "testing.fai"); + final FastaSequenceIndex originalIndex = new FastaSequenceIndex(originalFile); + + // write the index to a temp file and test if files are the same + final File fileToWrite = File.createTempFile("testing.toWrite", "fai"); + fileToWrite.deleteOnExit(); + originalIndex.write(fileToWrite.toPath()); + + // read all the files and compare line by line + try(final Stream original = Files.lines(originalFile.toPath()); + final Stream written = Files.lines(fileToWrite.toPath())) { + final List originalLines = original.filter(s -> ! s.isEmpty()).collect(Collectors.toList()); + final List actualLines = written.filter(s -> !s.isEmpty()).collect(Collectors.toList()); + Assert.assertEquals(actualLines, originalLines); + } + + // load the tmp index and check that both are the same + final FastaSequenceIndex writtenIndex = new FastaSequenceIndex(fileToWrite); + Assert.assertEquals(writtenIndex, originalIndex); + } + + } diff --git a/src/test/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java b/src/test/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java index 086b3be63..d352a0331 100644 --- a/src/test/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java +++ b/src/test/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java @@ -24,6 +24,7 @@ package htsjdk.samtools.reference; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMException; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.StringUtil; @@ -37,7 +38,7 @@ /** * Test the indexed fasta sequence file reader. */ -public class IndexedFastaSequenceFileTest{ +public class IndexedFastaSequenceFileTest extends HtsjdkTest { private static File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/reference"); private static File SEQUENCE_FILE = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.trimmed.fasta"); private static File SEQUENCE_FILE_NODICT = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.trimmed.nodict.fasta"); diff --git a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java index 6eeae7b99..5d827f890 100644 --- a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java +++ b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java @@ -1,6 +1,8 @@ package htsjdk.samtools.reference; +import htsjdk.HtsjdkTest; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -8,7 +10,7 @@ /** * Simple tests for the reference sequence file factory */ -public class ReferenceSequenceFileFactoryTests { +public class ReferenceSequenceFileFactoryTests extends HtsjdkTest { public static final File hg18 = new File("src/test/resources/htsjdk/samtools/reference/Homo_sapiens_assembly18.trimmed.fasta"); @Test public void testPositivePath() { @@ -36,4 +38,21 @@ Assert.assertTrue(f instanceof IndexedFastaSequenceFile, "Got non-indexed reader by default."); } + + @DataProvider + public Object[][] fastaNames() { + return new Object[][] { + {"break.fa", "break.dict"}, + {"break.txt.txt", "break.txt.dict"}, + {"break.fasta.fasta", "break.fasta.dict"}, + {"break.fa.gz", "break.dict"}, + {"break.txt.gz.txt.gz", "break.txt.gz.dict"}, + {"break.fasta.gz.fasta.gz", "break.fasta.gz.dict"} + }; + } + + @Test(dataProvider = "fastaNames") + public void testGetDefaultDictionaryForReferenceSequence(final String fastaFile, final String expectedDict) throws Exception { + Assert.assertEquals(ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(new File(fastaFile)), new File(expectedDict)); + } } diff --git a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileWalkerTest.java b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileWalkerTest.java index fa746d625..e6299c3c0 100644 --- a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileWalkerTest.java +++ b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileWalkerTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.reference; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMException; import htsjdk.samtools.util.CloserUtil; import org.testng.Assert; @@ -11,7 +12,7 @@ /** * Created by farjoun on 2/14/14. */ -public class ReferenceSequenceFileWalkerTest { +public class ReferenceSequenceFileWalkerTest extends HtsjdkTest { @DataProvider(name = "TestReference") diff --git a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceTests.java b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceTests.java index 797b0b33c..4bb922c54 100644 --- a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceTests.java +++ b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceTests.java @@ -24,6 +24,7 @@ package htsjdk.samtools.reference; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -38,7 +39,7 @@ * * @author Tim Fennell */ -public class ReferenceSequenceTests { +public class ReferenceSequenceTests extends HtsjdkTest { private static final byte[] BASES = "acgtACGTN".getBytes(); private final Random random = new Random(); diff --git a/src/test/java/htsjdk/samtools/seekablestream/ByteArraySeekableStreamTest.java b/src/test/java/htsjdk/samtools/seekablestream/ByteArraySeekableStreamTest.java new file mode 100644 index 000000000..04a228f94 --- /dev/null +++ b/src/test/java/htsjdk/samtools/seekablestream/ByteArraySeekableStreamTest.java @@ -0,0 +1,116 @@ +/* + * The MIT License + * + * Copyright (c) 2017 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + */ + +package htsjdk.samtools.seekablestream; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.IOException; + +/** + * Created by farjoun on 5/27/17. + */ +public class ByteArraySeekableStreamTest extends HtsjdkTest { + private final byte[] bytes = "ABCDE12345".getBytes(); + + @Test + public void testNormalBehavior() throws IOException { + ByteArraySeekableStream byteArraySeekableStream = new ByteArraySeekableStream(bytes); + + Assert.assertEquals(byteArraySeekableStream.length(), 10); + for (int i = 0; i < 10; i++) { + Assert.assertFalse(byteArraySeekableStream.eof()); + Assert.assertEquals(byteArraySeekableStream.position(), i); + Assert.assertEquals(byteArraySeekableStream.read(), bytes[i]); + } + + Assert.assertTrue(byteArraySeekableStream.eof()); + Assert.assertEquals(byteArraySeekableStream.position(), 10); + Assert.assertEquals(byteArraySeekableStream.read(), -1); + + final long i = 0; + byteArraySeekableStream.seek(i); + + Assert.assertEquals(byteArraySeekableStream.position(), i); + Assert.assertEquals(byteArraySeekableStream.read(), bytes[(int) i]); + + byte[] copy = new byte[10]; + + Assert.assertEquals(byteArraySeekableStream.read(copy), 9); + Assert.assertEquals(byteArraySeekableStream.position(), 10); + + byteArraySeekableStream.seek(0L); + + Assert.assertEquals(byteArraySeekableStream.read(copy), 10); + Assert.assertEquals(byteArraySeekableStream.position(), 10); + + Assert.assertEquals(copy, bytes); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testCantSeekNegative() throws IOException { + + ByteArraySeekableStream byteArraySeekableStream = new ByteArraySeekableStream(bytes); + + byteArraySeekableStream.seek(-1L); + + // if allowed to seek, this will throw OutOfBounds + final int f = byteArraySeekableStream.read(); + } + + @Test + public void testCantReadPostEof() throws IOException { + + ByteArraySeekableStream byteArraySeekableStream = new ByteArraySeekableStream(bytes); + byte[] copy = new byte[10]; + + byteArraySeekableStream.seek(10); + Assert.assertEquals(byteArraySeekableStream.read(copy), -1); + Assert.assertEquals(byteArraySeekableStream.read(), -1); + } + + @DataProvider(name = "abnormalReadRequests") + public Object[][] abnormalReadRequestsProvider() { + return new Object[][]{ + {new byte[10], -1, 0}, + {new byte[10], -1, -1}, + {new byte[10], 0, -1}, + {new byte[10], 0, -1}, + {new byte[10], 0, 11}, + {new byte[10], 6, 6}, + {new byte[10], 11, 0}, + }; + } + + @Test(dataProvider = "abnormalReadRequests", expectedExceptions = IndexOutOfBoundsException.class) + public void testAbnormalReadRequest(final byte[] b, final int off, final int length) throws IOException { + + ByteArraySeekableStream byteArraySeekableStream = new ByteArraySeekableStream(bytes); + int i = byteArraySeekableStream.read(b, off, length); + + Assert.assertEquals(i, -2); ///impossible + } +} diff --git a/src/test/java/htsjdk/samtools/seekablestream/SeekableBufferedStreamTest.java b/src/test/java/htsjdk/samtools/seekablestream/SeekableBufferedStreamTest.java index 9720218a0..8de587327 100644 --- a/src/test/java/htsjdk/samtools/seekablestream/SeekableBufferedStreamTest.java +++ b/src/test/java/htsjdk/samtools/seekablestream/SeekableBufferedStreamTest.java @@ -24,6 +24,7 @@ package htsjdk.samtools.seekablestream; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; @@ -33,7 +34,7 @@ import static org.testng.Assert.assertEquals; -public class SeekableBufferedStreamTest { +public class SeekableBufferedStreamTest extends HtsjdkTest { // private final File BAM_INDEX_FILE = new File("testdata/htsjdk/samtools/BAMFileIndexTest/index_test.bam.bai"); private final File BAM_FILE = new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam"); diff --git a/src/test/java/htsjdk/samtools/seekablestream/SeekableFTPStreamTest.java b/src/test/java/htsjdk/samtools/seekablestream/SeekableFTPStreamTest.java index 966c18e2a..4dfc8b78b 100644 --- a/src/test/java/htsjdk/samtools/seekablestream/SeekableFTPStreamTest.java +++ b/src/test/java/htsjdk/samtools/seekablestream/SeekableFTPStreamTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.seekablestream; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -35,7 +36,7 @@ * @author Jim Robinson * @since 10/3/11 */ -public class SeekableFTPStreamTest { +public class SeekableFTPStreamTest extends HtsjdkTest { static String urlString = "ftp://ftp.broadinstitute.org/pub/igv/TEST/test.txt"; diff --git a/src/test/java/htsjdk/samtools/seekablestream/SeekableFileStreamTest.java b/src/test/java/htsjdk/samtools/seekablestream/SeekableFileStreamTest.java index 35e154524..ddf54ef60 100644 --- a/src/test/java/htsjdk/samtools/seekablestream/SeekableFileStreamTest.java +++ b/src/test/java/htsjdk/samtools/seekablestream/SeekableFileStreamTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.seekablestream; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.BufferedLineReader; import org.testng.Assert; import org.testng.annotations.Test; @@ -36,7 +37,7 @@ * Time: 11:13:19 AM * To change this template use File | Settings | File Templates. */ -public class SeekableFileStreamTest { +public class SeekableFileStreamTest extends HtsjdkTest { @Test public void testSeek() throws Exception { diff --git a/src/test/java/htsjdk/samtools/seekablestream/SeekableMemoryStreamTest.java b/src/test/java/htsjdk/samtools/seekablestream/SeekableMemoryStreamTest.java index 09ad92d21..428090c9c 100644 --- a/src/test/java/htsjdk/samtools/seekablestream/SeekableMemoryStreamTest.java +++ b/src/test/java/htsjdk/samtools/seekablestream/SeekableMemoryStreamTest.java @@ -24,13 +24,14 @@ package htsjdk.samtools.seekablestream; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; import java.io.EOFException; import java.io.IOException; -public class SeekableMemoryStreamTest { +public class SeekableMemoryStreamTest extends HtsjdkTest { @Test public void test_getSource() { diff --git a/src/test/java/htsjdk/samtools/seekablestream/SeekablePathStreamTest.java b/src/test/java/htsjdk/samtools/seekablestream/SeekablePathStreamTest.java index 067f5be9f..8b1f151d4 100644 --- a/src/test/java/htsjdk/samtools/seekablestream/SeekablePathStreamTest.java +++ b/src/test/java/htsjdk/samtools/seekablestream/SeekablePathStreamTest.java @@ -26,10 +26,12 @@ import java.io.File; import java.nio.file.Files; import java.nio.file.Path; + +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; -public class SeekablePathStreamTest { +public class SeekablePathStreamTest extends HtsjdkTest { @Test public void testRead() throws Exception { diff --git a/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java b/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java index 5eb0af6f4..82f8f1cd9 100644 --- a/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java +++ b/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.seekablestream; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.TestUtil; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -9,7 +10,7 @@ import java.io.IOException; import java.net.URL; -public class SeekableStreamFactoryTest { +public class SeekableStreamFactoryTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); @Test diff --git a/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java b/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java index 297b892c5..eeba1d2ea 100644 --- a/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java +++ b/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.sra; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordIterator; import org.testng.Assert; @@ -12,7 +13,7 @@ import java.util.NoSuchElementException; @Test(groups = "sra") -public abstract class AbstractSRATest { +public abstract class AbstractSRATest extends HtsjdkTest { private static boolean canResolveNetworkAccession = false; private static String checkAccession = "SRR000123"; @@ -24,14 +25,14 @@ public final void checkIfCanResolve() { canResolveNetworkAccession = SRAAccession.isValid(checkAccession); } - @BeforeMethod + @BeforeMethod(groups = "sra") public final void assertSRAIsSupported() { if(SRAAccession.checkIfInitialized() != null){ throw new SkipException("Skipping SRA Test because SRA native code is unavailable."); } } - @BeforeMethod + @BeforeMethod(groups = "sra") public final void skipIfCantResolve(Method method, Object[] params) { String accession = null; diff --git a/src/test/java/htsjdk/samtools/util/AbstractLocusInfoTest.java b/src/test/java/htsjdk/samtools/util/AbstractLocusInfoTest.java index a21c74361..b20be4f39 100644 --- a/src/test/java/htsjdk/samtools/util/AbstractLocusInfoTest.java +++ b/src/test/java/htsjdk/samtools/util/AbstractLocusInfoTest.java @@ -24,6 +24,7 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMSequenceRecord; @@ -36,7 +37,7 @@ * @author Mariia_Zueva@epam.com, EPAM Systems, Inc. */ -public class AbstractLocusInfoTest { +public class AbstractLocusInfoTest extends HtsjdkTest { private final byte[] qualities = {30, 50, 50, 60, 60, 70, 70, 70, 80, 90, 30, 50, 50, 60, 60, 70, 70, 70, 80, 90}; private byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T', 'T', 'C', 'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T', 'T', 'C'}; private EdgingRecordAndOffset typedRecordAndOffset; diff --git a/src/test/java/htsjdk/samtools/util/AbstractLocusIteratorTestTemplate.java b/src/test/java/htsjdk/samtools/util/AbstractLocusIteratorTestTemplate.java index 0c08436e5..d1e2f0f2e 100644 --- a/src/test/java/htsjdk/samtools/util/AbstractLocusIteratorTestTemplate.java +++ b/src/test/java/htsjdk/samtools/util/AbstractLocusIteratorTestTemplate.java @@ -25,6 +25,7 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecordSetBuilder; import htsjdk.samtools.SAMSequenceDictionary; @@ -36,7 +37,7 @@ * @author Mariia_Zueva@epam.com, EPAM Systems, Inc. * */ -public abstract class AbstractLocusIteratorTestTemplate { +public abstract class AbstractLocusIteratorTestTemplate extends HtsjdkTest { /** Coverage for tests with the same reads */ final static int coverage = 2; @@ -65,4 +66,4 @@ static SAMRecordSetBuilder getRecordBuilder() { public abstract void testEmitUncoveredLoci(); public abstract void testSimpleGappedAlignment(); public abstract void testOverlappingGappedAlignmentsWithoutIndels(); -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/samtools/util/AbstractRecordAndOffsetTest.java b/src/test/java/htsjdk/samtools/util/AbstractRecordAndOffsetTest.java index 568c84c7c..372a590d3 100644 --- a/src/test/java/htsjdk/samtools/util/AbstractRecordAndOffsetTest.java +++ b/src/test/java/htsjdk/samtools/util/AbstractRecordAndOffsetTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import org.testng.annotations.BeforeTest; @@ -36,7 +37,7 @@ * */ -public class AbstractRecordAndOffsetTest { +public class AbstractRecordAndOffsetTest extends HtsjdkTest { private final byte[] qualities = {30, 40, 50, 60, 70, 80 ,90, 70, 80, 90}; private byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T', 'T', 'C'}; @@ -52,12 +53,11 @@ public void setUp(){ @Test public void testConstructor(){ - AbstractRecordAndOffset abstractRecordAndOffset = new AbstractRecordAndOffset(record, 0, 10, 3); + AbstractRecordAndOffset abstractRecordAndOffset = new AbstractRecordAndOffset(record, 0); assertArrayEquals(qualities, abstractRecordAndOffset.getBaseQualities()); assertArrayEquals(bases, abstractRecordAndOffset.getRecord().getReadBases()); assertEquals('A', abstractRecordAndOffset.getReadBase()); assertEquals(30, abstractRecordAndOffset.getBaseQuality()); assertEquals(0, abstractRecordAndOffset.getOffset()); - assertEquals(-1, abstractRecordAndOffset.getRefPos()); } } diff --git a/src/test/java/htsjdk/samtools/util/AsyncBlockCompressedInputStreamTest.java b/src/test/java/htsjdk/samtools/util/AsyncBlockCompressedInputStreamTest.java new file mode 100644 index 000000000..a1f9881a0 --- /dev/null +++ b/src/test/java/htsjdk/samtools/util/AsyncBlockCompressedInputStreamTest.java @@ -0,0 +1,92 @@ +/* + * The MIT License + * + * Copyright (c) 2016 Daniel Cameron + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.util; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +import htsjdk.samtools.seekablestream.SeekableFileStream; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +public class AsyncBlockCompressedInputStreamTest extends HtsjdkTest { + private final File BAM_FILE = new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam"); + @Test + public void testAsync() throws Exception { + BlockCompressedInputStream sync = new BlockCompressedInputStream(new SeekableFileStream(BAM_FILE)); + List expected = new ArrayList<>(); + List virtualOffset = new ArrayList<>(); + List length = new ArrayList<>(); + byte[] buffer = new byte[BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE / 2]; + virtualOffset.add(sync.getFilePointer()); + int len = sync.read(buffer); + length.add(len); + while (len > 0) { + expected.add(buffer); + buffer = new byte[buffer.length]; + len = sync.read(buffer); + length.add(len); + virtualOffset.add(sync.getFilePointer()); + } + sync.close(); + buffer = new byte[buffer.length]; + List list = new ArrayList<>(); + for (int i = 0; i < 8; i++) { + list.add(new AsyncBlockCompressedInputStream(new SeekableFileStream(BAM_FILE))); + } + // read till EOF + for (int i = 0; i < expected.size(); i++) { + for (BlockCompressedInputStream async : list) { + len = async.read(buffer); + Assert.assertEquals(len, (int)length.get(i)); + Assert.assertEquals(buffer[0], expected.get(i)[0]); + } + } + for (int j = 0; j < 128; j++) { + // seek and read + for (BlockCompressedInputStream async : list) { + async.seek(virtualOffset.get(0)); + } + for (int i = 0; i < Math.min(expected.size(), 8); i++) { + for (BlockCompressedInputStream async : list) { + len = async.read(buffer); + Assert.assertEquals(len, (int)length.get(i)); + Assert.assertEquals(buffer[0], expected.get(i)[0]); + } + } + } + for (BlockCompressedInputStream async : list) { + async.close(); + } + } + @Test + public void testFilePointer() throws Exception { + BlockCompressedInputStream sync = new BlockCompressedInputStream(BAM_FILE); + Assert.assertEquals(sync.getFilePointer(), 0); + sync.close(); + } +} diff --git a/src/test/java/htsjdk/samtools/util/AsyncBufferedIteratorTest.java b/src/test/java/htsjdk/samtools/util/AsyncBufferedIteratorTest.java index 817c60e54..e35dadc94 100644 --- a/src/test/java/htsjdk/samtools/util/AsyncBufferedIteratorTest.java +++ b/src/test/java/htsjdk/samtools/util/AsyncBufferedIteratorTest.java @@ -23,10 +23,11 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; -public class AsyncBufferedIteratorTest { +public class AsyncBufferedIteratorTest extends HtsjdkTest { private static class TestCloseableIterator implements CloseableIterator { private int[] results; private volatile int offset = 0; @@ -73,9 +74,15 @@ public void testBackgroundBlocks() throws InterruptedException { TestCloseableIterator it = new TestCloseableIterator(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }); AsyncBufferedIterator abi = new AsyncBufferedIterator(it, 3, 2, "testBackgroundBlocks"); Assert.assertNotNull(getThreadWithName("testBackgroundBlocks")); - Thread.sleep(10); // how do we write this test and not be subject to race conditions? + // how do we write this test and not be subject to race conditions? // should have read 9 records: 2*3 in the buffers, and another 3 read but - // blocking waiting to be added + // blocking waiting to be added + for (int i = 0; i < 64; i++) { + if (it.consumed() >= 9) { + break; + } + Thread.sleep(1); + } Assert.assertEquals(it.consumed(), 9); abi.close(); } diff --git a/src/test/java/htsjdk/samtools/util/AsyncWriterTest.java b/src/test/java/htsjdk/samtools/util/AsyncWriterTest.java index c807ceffb..1d2c3043f 100644 --- a/src/test/java/htsjdk/samtools/util/AsyncWriterTest.java +++ b/src/test/java/htsjdk/samtools/util/AsyncWriterTest.java @@ -23,10 +23,11 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; -public class AsyncWriterTest { +public class AsyncWriterTest extends HtsjdkTest { private static class MyException extends RuntimeException { final Integer item; public MyException(Integer item) { diff --git a/src/test/java/htsjdk/samtools/util/BinaryCodecTest.java b/src/test/java/htsjdk/samtools/util/BinaryCodecTest.java index 91e114729..b59c9527d 100644 --- a/src/test/java/htsjdk/samtools/util/BinaryCodecTest.java +++ b/src/test/java/htsjdk/samtools/util/BinaryCodecTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; @@ -44,7 +45,7 @@ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ -public class BinaryCodecTest { +public class BinaryCodecTest extends HtsjdkTest { public final static String TEST_BASENAME = "htsjdk-BinaryCodecTest"; @Test diff --git a/src/test/java/htsjdk/samtools/util/BlockCompressedFilePointerUtilTest.java b/src/test/java/htsjdk/samtools/util/BlockCompressedFilePointerUtilTest.java index 850b4bf62..38c3ec374 100644 --- a/src/test/java/htsjdk/samtools/util/BlockCompressedFilePointerUtilTest.java +++ b/src/test/java/htsjdk/samtools/util/BlockCompressedFilePointerUtilTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -31,8 +32,7 @@ import java.util.List; -public class BlockCompressedFilePointerUtilTest -{ +public class BlockCompressedFilePointerUtilTest extends HtsjdkTest { @Test public void basicTest() { diff --git a/src/test/java/htsjdk/samtools/util/BlockCompressedInputStreamTest.java b/src/test/java/htsjdk/samtools/util/BlockCompressedInputStreamTest.java new file mode 100644 index 000000000..4c9d532d0 --- /dev/null +++ b/src/test/java/htsjdk/samtools/util/BlockCompressedInputStreamTest.java @@ -0,0 +1,201 @@ +package htsjdk.samtools.util; + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.seekablestream.SeekableFileStream; +import htsjdk.samtools.util.zip.InflaterFactory; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.*; +import java.net.URL; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.zip.Inflater; + +public class BlockCompressedInputStreamTest extends HtsjdkTest { + // random data pulled from /dev/random then compressed using bgzip from tabix + private static final File BLOCK_UNCOMPRESSED = new File("src/test/resources/htsjdk/samtools/util/random.bin"); + private static final File BLOCK_COMPRESSED = new File("src/test/resources/htsjdk/samtools/util/random.bin.gz"); + private static final long[] BLOCK_COMPRESSED_OFFSETS = new long[] { 0, 0xfc2e, 0x1004d, 0x1fc7b, 0x2009a, }; + private static final long[] BLOCK_UNCOMPRESSED_END_POSITIONS = new long[] { 64512, 65536, 130048 }; + @Test + public void stream_should_match_uncompressed_stream() throws Exception { + byte[] uncompressed = Files.readAllBytes(BLOCK_UNCOMPRESSED.toPath()); + try (BlockCompressedInputStream stream = new BlockCompressedInputStream(new FileInputStream(BLOCK_COMPRESSED))) { + for (int i = 0; i < uncompressed.length; i++) { + Assert.assertEquals(stream.read(), Byte.toUnsignedInt(uncompressed[i])); + } + Assert.assertTrue(stream.endOfBlock()); + } + } + @Test + public void endOfBlock_should_be_true_only_when_entire_block_is_read() throws Exception { + long size = BLOCK_UNCOMPRESSED.length(); + // input file contains 5 blocks + List offsets = new ArrayList<>(); + for (int i = 0; i < BLOCK_UNCOMPRESSED_END_POSITIONS.length; i++) { + offsets.add(BLOCK_UNCOMPRESSED_END_POSITIONS[i]); + } + List endOfBlockTrue = new ArrayList<>(); + try (BlockCompressedInputStream stream = new BlockCompressedInputStream(new FileInputStream(BLOCK_COMPRESSED))) { + for (long i = 0; i < size; i++) { + if (stream.endOfBlock()) { + endOfBlockTrue.add(i); + } + stream.read(); + } + } + Assert.assertEquals(endOfBlockTrue, offsets); + } + @Test + public void decompression_should_cross_block_boundries() throws Exception { + byte[] uncompressed = Files.readAllBytes(BLOCK_UNCOMPRESSED.toPath()); + try (BlockCompressedInputStream stream = new BlockCompressedInputStream(new FileInputStream(BLOCK_COMPRESSED))) { + byte[] decompressed = new byte[uncompressed.length]; + stream.read(decompressed); + Assert.assertEquals(decompressed, uncompressed); + Assert.assertTrue(stream.endOfBlock()); + Assert.assertEquals(stream.read(), -1); + } + } + @Test + public void seek_should_read_block() throws Exception { + byte[] uncompressed = Files.readAllBytes(BLOCK_UNCOMPRESSED.toPath()); + try (SeekableFileStream sfs = new SeekableFileStream(BLOCK_COMPRESSED)) { + try (BlockCompressedInputStream stream = new BlockCompressedInputStream(sfs)) { + // seek to the start of the first block + for (int i = 0; i < BLOCK_COMPRESSED_OFFSETS.length-1; i++) { + stream.seek(BLOCK_COMPRESSED_OFFSETS[i] << 16); + Assert.assertEquals(sfs.position(), BLOCK_COMPRESSED_OFFSETS[i + 1]); + // check + byte[] actual = new byte[uncompressed.length]; + int len = stream.read(actual); + actual = Arrays.copyOf(actual, len); + byte[] expected = Arrays.copyOfRange(uncompressed, uncompressed.length - actual.length, uncompressed.length); + Assert.assertEquals(actual, expected); + } + } + } + } + @Test + public void available_should_return_number_of_bytes_left_in_current_block() throws Exception { + try (BlockCompressedInputStream stream = new BlockCompressedInputStream(BLOCK_COMPRESSED)) { + for (int i = 0; i < BLOCK_UNCOMPRESSED_END_POSITIONS[0]; i++) { + Assert.assertEquals(stream.available(), BLOCK_UNCOMPRESSED_END_POSITIONS[0] - i); + stream.read(); + } + } + } + + private static class CountingInflater extends Inflater { + // Must be static unfortunately, since there's no way to reach down into an inflater instance given a stream + static int inflateCalls = 0; + + CountingInflater(boolean gzipCompatible) { + super(gzipCompatible); + } + @Override + public int inflate(byte[] b, int off, int len) throws java.util.zip.DataFormatException { + inflateCalls++; + return super.inflate(b, off, len); + } + } + + private static class CountingInflaterFactory extends InflaterFactory { + @Override + public Inflater makeInflater( boolean gzipCompatible ) { + return new CountingInflater(gzipCompatible); + } + } + + @FunctionalInterface + private interface CheckedExceptionInputStreamSupplier { + InputStream get() throws IOException; + } + + private List writeTempBlockCompressedFileForInflaterTest( final File tempFile ) throws IOException { + final List linesWritten = new ArrayList<>(); + try ( final BlockCompressedOutputStream bcos = new BlockCompressedOutputStream(tempFile, 5) ) { + String s = "Hi, Mom!\n"; + bcos.write(s.getBytes()); //Call 1 + linesWritten.add(s); + s = "Hi, Dad!\n"; + bcos.write(s.getBytes()); //Call 2 + linesWritten.add(s); + bcos.flush(); + final StringBuilder sb = new StringBuilder(BlockCompressedStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE * 2); + s = "1234567890123456789012345678901234567890123456789012345678901234567890\n"; + while ( sb.length() <= BlockCompressedStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE ) { + sb.append(s); + linesWritten.add(s); + } + bcos.write(sb.toString().getBytes()); //Call 3 + } + return linesWritten; + } + + @DataProvider(name = "customInflaterInput") + public Object[][] customInflateInput() throws IOException { + final File tempFile = File.createTempFile("testCustomInflater.", ".bam"); + tempFile.deleteOnExit(); + final List linesWritten = writeTempBlockCompressedFileForInflaterTest(tempFile); + + final InflaterFactory countingInflaterFactory = new CountingInflaterFactory(); + + return new Object[][]{ + // set the default InflaterFactory to a CountingInflaterFactory + {(CheckedExceptionInputStreamSupplier) () -> new BlockCompressedInputStream(new FileInputStream(tempFile), false), linesWritten, 4, countingInflaterFactory}, + {(CheckedExceptionInputStreamSupplier) () -> new BlockCompressedInputStream(tempFile), linesWritten, 4, countingInflaterFactory}, + {(CheckedExceptionInputStreamSupplier) () -> new AsyncBlockCompressedInputStream(tempFile), linesWritten, 4, countingInflaterFactory}, + {(CheckedExceptionInputStreamSupplier) () -> new BlockCompressedInputStream(new URL("http://broadinstitute.github.io/picard/testdata/index_test.bam")), null, 21, countingInflaterFactory}, + // provide a CountingInflaterFactory explicitly + {(CheckedExceptionInputStreamSupplier) () -> new BlockCompressedInputStream(new FileInputStream(tempFile), false, countingInflaterFactory), linesWritten, 4, null}, + {(CheckedExceptionInputStreamSupplier) () -> new BlockCompressedInputStream(tempFile, countingInflaterFactory), linesWritten, 4, null}, + {(CheckedExceptionInputStreamSupplier) () -> new AsyncBlockCompressedInputStream(tempFile, countingInflaterFactory), linesWritten, 4, null}, + {(CheckedExceptionInputStreamSupplier) () -> new BlockCompressedInputStream(new URL("http://broadinstitute.github.io/picard/testdata/index_test.bam"), countingInflaterFactory), null, 21, null} + }; + } + + @Test(dataProvider = "customInflaterInput", singleThreaded = true) + public void testCustomInflater(final CheckedExceptionInputStreamSupplier bcisSupplier, + final List expectedOutput, + final int expectedInflateCalls, + final InflaterFactory customDefaultInflaterFactory) throws Exception + { + // clear inflate call counter in CountingInflater + CountingInflater.inflateCalls = 0; + + // If requested, set the global default InflaterFactory to a custom factory. Otherwise, set it to the default. + if ( customDefaultInflaterFactory != null ) { + BlockGunzipper.setDefaultInflaterFactory(customDefaultInflaterFactory); + } + else { + BlockGunzipper.setDefaultInflaterFactory(new InflaterFactory()); + } + + try (final BufferedReader reader = new BufferedReader(new InputStreamReader(bcisSupplier.get()))) { + String line; + for (int i = 0; (line = reader.readLine()) != null; ++i) { + // check expected output, if provided + if (expectedOutput != null) { + Assert.assertEquals(line + "\n", expectedOutput.get(i)); + } + } + } + + // verify custom inflater was used by checking number of inflate calls + Assert.assertEquals(CountingInflater.inflateCalls, expectedInflateCalls, "inflate calls"); + + // Reset the default InflaterFactory back to the default value + BlockGunzipper.setDefaultInflaterFactory(new InflaterFactory()); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testSetNullInflaterFactory() { + // test catching null InflaterFactory + BlockGunzipper.setDefaultInflaterFactory(null); + } +} diff --git a/src/test/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java b/src/test/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java index 8a0d97ffe..35175cd1d 100644 --- a/src/test/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java +++ b/src/test/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.samtools.FileTruncatedException; import htsjdk.samtools.util.zip.DeflaterFactory; import org.testng.Assert; @@ -39,7 +40,7 @@ import java.util.Random; import java.util.zip.Deflater; -public class BlockCompressedOutputStreamTest { +public class BlockCompressedOutputStreamTest extends HtsjdkTest { private static final String HTSJDK_TRIBBLE_RESOURCES = "src/test/resources/htsjdk/tribble/"; @@ -80,6 +81,7 @@ public void testBasic() throws Exception { Assert.assertEquals(bcis2.read(buffer), available, "Should read to end of block"); Assert.assertTrue(bcis2.endOfBlock(), "Should be at end of block"); bcis2.close(); + Assert.assertEquals(bcis2.read(buffer), -1, "Should be end of file"); } @DataProvider(name = "seekReadExceptionsData") @@ -88,24 +90,32 @@ public void testBasic() throws Exception { return new Object[][]{ {HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.truncated.gz", FileTruncatedException.class, BlockCompressedInputStream.PREMATURE_END_MSG + System.getProperty("user.dir") + "/" + - HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.truncated.gz", true, false, 0}, + HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.truncated.gz", true, false, false, 0}, {HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.truncated.hdr.gz", IOException.class, BlockCompressedInputStream.INCORRECT_HEADER_SIZE_MSG + System.getProperty("user.dir") + "/" + - HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.truncated.hdr.gz", true, false, 0}, + HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.truncated.hdr.gz", true, false, false, 0}, {HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.gz", IOException.class, - BlockCompressedInputStream.CANNOT_SEEK_STREAM_MSG, false, true, 0}, + BlockCompressedInputStream.CANNOT_SEEK_STREAM_MSG, false, true, false, 0}, + {HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.gz", IOException.class, + BlockCompressedInputStream.CANNOT_SEEK_CLOSED_STREAM_MSG, false, true, true, 0}, {HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.gz", IOException.class, BlockCompressedInputStream.INVALID_FILE_PTR_MSG + 1000 + " for " + System.getProperty("user.dir") + "/" + - HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.gz", true, true, 1000 } + HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.gz", true, true, false, 1000 } }; } @Test(dataProvider = "seekReadExceptionsData") - public void testSeekReadExceptions(final String filePath, final Class c, final String msg, final boolean isFile, final boolean isSeek, final int pos) throws Exception { + public void testSeekReadExceptions(final String filePath, final Class c, final String msg, final boolean isFile, final boolean isSeek, final boolean isClosed, + final int pos) throws Exception { final BlockCompressedInputStream bcis = isFile ? new BlockCompressedInputStream(new File(filePath)) : new BlockCompressedInputStream(new FileInputStream(filePath)); + + if ( isClosed ) { + bcis.close(); + } + boolean haveException = false; try { if ( isSeek ) { @@ -170,8 +180,8 @@ public void testCustomDeflater() throws Exception { final int[] deflateCalls = {0}; //Note: using and array is a HACK to fool the compiler class MyDeflater extends Deflater{ - MyDeflater(int level, boolean nowrap){ - super(level, nowrap); + MyDeflater(int level, boolean gzipCompatible){ + super(level, gzipCompatible); } @Override public int deflate(byte[] b, int off, int len) { @@ -181,8 +191,9 @@ public int deflate(byte[] b, int off, int len) { } final DeflaterFactory myDeflaterFactory= new DeflaterFactory(){ - public Deflater makeDeflater(final int compressionLevel, final boolean nowrap) { - return new MyDeflater(compressionLevel, nowrap); + @Override + public Deflater makeDeflater(final int compressionLevel, final boolean gzipCompatible) { + return new MyDeflater(compressionLevel, gzipCompatible); } }; final List linesWritten = new ArrayList<>(); @@ -210,5 +221,6 @@ public Deflater makeDeflater(final int compressionLevel, final boolean nowrap) { } bcis.close(); Assert.assertEquals(deflateCalls[0], 3, "deflate calls"); + Assert.assertEquals(reader.readLine(), null); } } diff --git a/src/test/java/htsjdk/samtools/util/BlockCompressedTerminatorTest.java b/src/test/java/htsjdk/samtools/util/BlockCompressedTerminatorTest.java index 5b5837229..4a14bd920 100644 --- a/src/test/java/htsjdk/samtools/util/BlockCompressedTerminatorTest.java +++ b/src/test/java/htsjdk/samtools/util/BlockCompressedTerminatorTest.java @@ -23,37 +23,103 @@ */ package htsjdk.samtools.util; +import com.google.common.jimfs.Configuration; +import com.google.common.jimfs.Jimfs; +import htsjdk.HtsjdkTest; +import htsjdk.samtools.SeekableByteChannelFromBuffer; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.EOFException; import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; /** * @author alecw@broadinstitute.org */ -public class BlockCompressedTerminatorTest { +public class BlockCompressedTerminatorTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/util"); + private static final File DEFECTIVE = new File(TEST_DATA_DIR, "defective_bgzf.bam"); + private static final File NO_TERMINATOR = new File(TEST_DATA_DIR, "no_bgzf_terminator.bam"); - @Test - public void testFileWithTerminator() throws Exception { + @DataProvider + public Object[][] getFiles() throws IOException { + return new Object[][]{ + {getValidCompressedFile(), BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK}, + {NO_TERMINATOR, BlockCompressedInputStream.FileTermination.HAS_HEALTHY_LAST_BLOCK}, + {DEFECTIVE, BlockCompressedInputStream.FileTermination.DEFECTIVE} + }; + } + + @Test( dataProvider = "getFiles") + public void testCheckTerminationForFiles(File compressedFile, BlockCompressedInputStream.FileTermination expected) throws IOException { + Assert.assertEquals(BlockCompressedInputStream.checkTermination(compressedFile), expected); + } + + @Test( dataProvider = "getFiles") + public void testCheckTerminationForPaths(File compressedFile, BlockCompressedInputStream.FileTermination expected) throws IOException { + try(FileSystem fs = Jimfs.newFileSystem("test", Configuration.unix())){ + final Path compressedFileInJimfs = Files.copy(compressedFile.toPath(), fs.getPath("something")); + Assert.assertEquals(BlockCompressedInputStream.checkTermination(compressedFileInJimfs), expected); + } + } + + @Test( dataProvider = "getFiles") + public void testCheckTerminationForSeekableByteChannels(File compressedFile, BlockCompressedInputStream.FileTermination expected) throws IOException { + try(SeekableByteChannel channel = Files.newByteChannel(compressedFile.toPath())){ + Assert.assertEquals(BlockCompressedInputStream.checkTermination(channel), expected); + } + } + + @Test(dataProvider = "getFiles") + public void testChannelPositionIsRestored(File compressedFile, BlockCompressedInputStream.FileTermination expected) throws IOException { + final long position = 50; + try(SeekableByteChannel channel = Files.newByteChannel(compressedFile.toPath())){ + channel.position(position); + Assert.assertEquals(channel.position(), position); + Assert.assertEquals(BlockCompressedInputStream.checkTermination(channel), expected); + Assert.assertEquals(channel.position(), position); + } + } + + private static File getValidCompressedFile() throws IOException { final File tmpCompressedFile = File.createTempFile("test.", ".bgzf"); tmpCompressedFile.deleteOnExit(); final BlockCompressedOutputStream os = new BlockCompressedOutputStream(tmpCompressedFile); os.write("Hi, Mom!\n".getBytes()); os.close(); - Assert.assertEquals(BlockCompressedInputStream.checkTermination(tmpCompressedFile), - BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK); + return tmpCompressedFile; } @Test - public void testValidFileWithoutTerminator() throws Exception { - Assert.assertEquals(BlockCompressedInputStream.checkTermination(new File(TEST_DATA_DIR, "no_bgzf_terminator.bam")), - BlockCompressedInputStream.FileTermination.HAS_HEALTHY_LAST_BLOCK); + public void testReadFullyReadsBytesCorrectly() throws IOException { + try(final SeekableByteChannel channel = Files.newByteChannel(DEFECTIVE.toPath())){ + final ByteBuffer readBuffer = ByteBuffer.allocate(10); + Assert.assertTrue(channel.size() > readBuffer.capacity()); + BlockCompressedInputStream.readFully(channel, readBuffer); + + ByteBuffer expected = ByteBuffer.allocate(10); + channel.position(0).read(expected); + Assert.assertEquals(readBuffer.array(), expected.array()); + } } - @Test - public void testDefectiveFile() throws Exception { - Assert.assertEquals(BlockCompressedInputStream.checkTermination(new File(TEST_DATA_DIR, "defective_bgzf.bam")), - BlockCompressedInputStream.FileTermination.DEFECTIVE); + @Test(expectedExceptions = EOFException.class) + public void testReadFullyThrowWhenItCantReadEnough() throws IOException { + try(final SeekableByteChannel channel = Files.newByteChannel(DEFECTIVE.toPath())){ + final ByteBuffer readBuffer = ByteBuffer.allocate(1000); + Assert.assertTrue(channel.size() < readBuffer.capacity()); + BlockCompressedInputStream.readFully(channel, readBuffer); + } } + + + } diff --git a/src/test/java/htsjdk/samtools/util/CigarElementUnitTest.java b/src/test/java/htsjdk/samtools/util/CigarElementUnitTest.java new file mode 100644 index 000000000..23607ac83 --- /dev/null +++ b/src/test/java/htsjdk/samtools/util/CigarElementUnitTest.java @@ -0,0 +1,43 @@ +package htsjdk.samtools.util; + + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class CigarElementUnitTest extends HtsjdkTest { + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNegativeLengthCheck(){ + final CigarElement element = new CigarElement(-1, CigarOperator.M); + } + + + @DataProvider + public Object[][] elementsForEquals() { + final CigarElement mElement = new CigarElement(10, CigarOperator.M); + return new Object[][] { + // same object + {mElement, mElement, true}, + // different equal objects + {mElement, new CigarElement(mElement.getLength(), mElement.getOperator()), true}, + // different lengths + {mElement, new CigarElement(mElement.getLength() + 1, mElement.getOperator()), false}, + // different operators + {mElement, new CigarElement(mElement.getLength(), CigarOperator.X), false}, + // different class + {mElement, mElement.toString(), false} + }; + } + + @Test(dataProvider = "elementsForEquals") + public void testEqualsAndHashCode(final CigarElement element, final Object other, final boolean isEquals) { + Assert.assertEquals(element.equals(other), isEquals); + if (isEquals) { + Assert.assertEquals(element.hashCode(), other.hashCode()); + } + } +} diff --git a/src/test/java/htsjdk/samtools/util/CigarUtilTest.java b/src/test/java/htsjdk/samtools/util/CigarUtilTest.java index 0aca3951a..6fe7b7199 100644 --- a/src/test/java/htsjdk/samtools/util/CigarUtilTest.java +++ b/src/test/java/htsjdk/samtools/util/CigarUtilTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.samtools.Cigar; import htsjdk.samtools.CigarElement; import htsjdk.samtools.TextCigarCodec; @@ -40,7 +41,7 @@ * * @author Martha Borkan mborkan@broadinstitute.org */ -public class CigarUtilTest { +public class CigarUtilTest extends HtsjdkTest { @Test(dataProvider="clipData") public void basicTest(final String testName, final int start, final String inputCigar, final boolean negativeStrand, diff --git a/src/test/java/htsjdk/samtools/util/CloseableIteratorTest.java b/src/test/java/htsjdk/samtools/util/CloseableIteratorTest.java index b96d1f67c..102b82436 100644 --- a/src/test/java/htsjdk/samtools/util/CloseableIteratorTest.java +++ b/src/test/java/htsjdk/samtools/util/CloseableIteratorTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; @@ -8,7 +9,7 @@ import java.util.List; import java.util.stream.Collectors; -public class CloseableIteratorTest { +public class CloseableIteratorTest extends HtsjdkTest { @Test public void testToList() { final List expected = Arrays.asList(1,2,3,4,5); diff --git a/src/test/java/htsjdk/samtools/util/CodeUtilTest.java b/src/test/java/htsjdk/samtools/util/CodeUtilTest.java index e8b9957d2..c4978c196 100644 --- a/src/test/java/htsjdk/samtools/util/CodeUtilTest.java +++ b/src/test/java/htsjdk/samtools/util/CodeUtilTest.java @@ -1,9 +1,10 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; -public class CodeUtilTest { +public class CodeUtilTest extends HtsjdkTest { @Test public void getOrElseTest() { diff --git a/src/test/java/htsjdk/samtools/util/ComparableTupleTest.java b/src/test/java/htsjdk/samtools/util/ComparableTupleTest.java index 7e8b082a5..708058d70 100644 --- a/src/test/java/htsjdk/samtools/util/ComparableTupleTest.java +++ b/src/test/java/htsjdk/samtools/util/ComparableTupleTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.variant.variantcontext.Allele; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -8,7 +9,7 @@ /** * Created by farjoun on 1/28/16. */ -public class ComparableTupleTest { +public class ComparableTupleTest extends HtsjdkTest { private enum Tenum { Hi, diff --git a/src/test/java/htsjdk/samtools/util/CoordSpanInputSteamTest.java b/src/test/java/htsjdk/samtools/util/CoordSpanInputSteamTest.java index 1b9088220..07de15873 100644 --- a/src/test/java/htsjdk/samtools/util/CoordSpanInputSteamTest.java +++ b/src/test/java/htsjdk/samtools/util/CoordSpanInputSteamTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.samtools.seekablestream.ByteArraySeekableStream; import org.testng.Assert; import org.testng.annotations.Test; @@ -15,7 +16,7 @@ /** * Created by vadim on 25/03/2015. */ -public class CoordSpanInputSteamTest { +public class CoordSpanInputSteamTest extends HtsjdkTest { @Test public void test_first_3_bytes() throws IOException { diff --git a/src/test/java/htsjdk/samtools/util/DateParserTest.java b/src/test/java/htsjdk/samtools/util/DateParserTest.java new file mode 100644 index 000000000..11ab2a6f8 --- /dev/null +++ b/src/test/java/htsjdk/samtools/util/DateParserTest.java @@ -0,0 +1,151 @@ +// DateParser.java +// $Id: DateParser.java,v 1.3 2001/01/04 13:26:19 bmahe Exp $ +// (c) COPYRIGHT MIT, INRIA and Keio, 2000. + +/* +W3C IPR SOFTWARE NOTICE + +Copyright 1995-1998 World Wide Web Consortium, (Massachusetts Institute of +Technology, Institut National de Recherche en Informatique et en +Automatique, Keio University). All Rights Reserved. +http://www.w3.org/Consortium/Legal/ + +This W3C work (including software, documents, or other related items) is +being provided by the copyright holders under the following license. By +obtaining, using and/or copying this work, you (the licensee) agree that you +have read, understood, and will comply with the following terms and +conditions: + +Permission to use, copy, and modify this software and its documentation, +with or without modification, for any purpose and without fee or royalty is +hereby granted, provided that you include the following on ALL copies of the +software and documentation or portions thereof, including modifications, +that you make: + + 1. The full text of this NOTICE in a location viewable to users of the + redistributed or derivative work. + 2. Any pre-existing intellectual property disclaimers, notices, or terms + and conditions. If none exist, a short notice of the following form + (hypertext is preferred, text is permitted) should be used within the + body of any redistributed or derivative code: "Copyright World Wide + Web Consortium, (Massachusetts Institute of Technology, Institut + National de Recherche en Informatique et en Automatique, Keio + University). All Rights Reserved. http://www.w3.org/Consortium/Legal/" + 3. Notice of any changes or modifications to the W3C files, including the + date changes were made. (We recommend you provide URIs to the location + from which the code is derived). + +In addition, creators of derivitive works must include the full text of this +NOTICE in a location viewable to users of the derivitive work. + +THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS +MAKE NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR +PURPOSE OR THAT THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE +ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. + +COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR +DOCUMENTATION. + +The name and trademarks of copyright holders may NOT be used in advertising +or publicity pertaining to the software without specific, written prior +permission. Title to copyright in this software and any associated +documentation will at all times remain with copyright holders. + +____________________________________ + +This formulation of W3C's notice and license became active on August 14 +1998. See the older formulation for the policy prior to this date. Please +see our Copyright FAQ for common questions about using materials from our +site, including specific terms and conditions for packages like libwww, +Amaya, and Jigsaw. Other questions about this notice can be directed to +site-policy@w3.org . + + + + +webmaster +(last updated 14-Aug-1998) + + */ + +package htsjdk.samtools.util; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Date; + +/** + * NOTE: This code has been taken from w3.org, and modified slightly to handle timezones of the form [-+]DDDD, + * and also to fix a bug in the application of time zone to the parsed date. + * + * Date parser for ISO 8601 format + * http://www.w3.org/TR/1998/NOTE-datetime-19980827 + * @version $Revision: 1.3 $ + * @author bmahe@w3.org + */ + +public class DateParserTest extends HtsjdkTest { + + private static void test(final String isodate) { + Date date = DateParser.parse(isodate); + final String isodateRoundTrip = DateParser.getIsoDate(date); + + final Date orig = DateParser.parse(isodate); + final Date roundTrip = DateParser.parse(isodateRoundTrip); + + assertDatesAreClose(orig, roundTrip); + } + + private static void test(final Date date) { + String isodate; + isodate = DateParser.getIsoDate(date); + final Date dateRoundTrip = DateParser.parse(isodate); + + assertDatesAreClose(date, dateRoundTrip); + Assert.assertTrue(Math.abs(date.getTime() - dateRoundTrip.getTime()) < 10); + } + + @DataProvider(name="dateDate") + public Object[][] dateData() { + return new Object[][]{ + {"1997-07-16T19:20:30.45-02:00"}, + {"1997-07-16T19:20:30+01:00"}, + {"1997-07-16T19:20:30+01:00"}, + {"1997-07-16T19:20"}, + {"1997-07-16"}, + {"1997-07"}, + {"1997"}, + }; + } + + @Test(dataProvider = "dateDate") + public static void testString(final String string) { + test(string); + } + + @Test(dataProvider = "dateDate") + public static void testDates(final String string) { + test(DateParser.parse(string)); + } + + @Test + public static void testDate() { + test(new Date()); + } + + public static void assertDatesAreClose(final Date lhs, final Date rhs) { + Assert.assertEquals(lhs.getYear(), rhs.getYear()); + Assert.assertEquals(lhs.getMonth(), rhs.getMonth()); + Assert.assertEquals(lhs.getDate(), rhs.getDate()); + Assert.assertEquals(lhs.getDay(), rhs.getDay()); + Assert.assertEquals(lhs.getHours(), rhs.getHours()); + Assert.assertEquals(lhs.getMinutes(), rhs.getMinutes()); + Assert.assertEquals(lhs.getSeconds(), rhs.getSeconds()); + Assert.assertEquals(lhs.getTimezoneOffset(), rhs.getTimezoneOffset()); + } +} diff --git a/src/test/java/htsjdk/samtools/util/DiskBackedQueueTest.java b/src/test/java/htsjdk/samtools/util/DiskBackedQueueTest.java index 88b05e2b7..95966520b 100644 --- a/src/test/java/htsjdk/samtools/util/DiskBackedQueueTest.java +++ b/src/test/java/htsjdk/samtools/util/DiskBackedQueueTest.java @@ -50,7 +50,9 @@ }; } + @Override @BeforeMethod void setup() { resetTmpDir(); } + @Override @AfterMethod void tearDown() { resetTmpDir(); } /** @@ -59,6 +61,7 @@ * @param numStringsToGenerate * @param maxRecordsInRam */ + @Override @Test(dataProvider = "diskBackedQueueProvider") public void testPositive(final String testName, final int numStringsToGenerate, final int maxRecordsInRam) { final String[] strings = new String[numStringsToGenerate]; diff --git a/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java b/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java index a4f6478b4..eeca090d7 100644 --- a/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java +++ b/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java @@ -24,6 +24,7 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import org.testng.annotations.BeforeTest; @@ -39,7 +40,7 @@ * */ -public class EdgingRecordAndOffsetTest { +public class EdgingRecordAndOffsetTest extends HtsjdkTest { private final byte[] qualities = {30, 50, 50, 60, 60, 70 ,70, 70, 80, 90}; private final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T', 'T', 'C'}; private SAMRecord record; diff --git a/src/test/java/htsjdk/samtools/util/HistogramTest.java b/src/test/java/htsjdk/samtools/util/HistogramTest.java index 62b1441ac..ef4446958 100644 --- a/src/test/java/htsjdk/samtools/util/HistogramTest.java +++ b/src/test/java/htsjdk/samtools/util/HistogramTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -12,7 +13,7 @@ import static java.lang.Math.abs; import static java.lang.StrictMath.pow; -public class HistogramTest { +public class HistogramTest extends HtsjdkTest { @Test(dataProvider = "histogramData") public void testHistogramFunctions(final int[] values, final double mean, final double stdev, final Integer trimByWidth) { diff --git a/src/test/java/htsjdk/samtools/util/IntervalListTest.java b/src/test/java/htsjdk/samtools/util/IntervalListTest.java index 6c5fcd43c..e138ee0e1 100644 --- a/src/test/java/htsjdk/samtools/util/IntervalListTest.java +++ b/src/test/java/htsjdk/samtools/util/IntervalListTest.java @@ -24,8 +24,8 @@ package htsjdk.samtools.util; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.HtsjdkTest; +import htsjdk.samtools.*; import htsjdk.variant.vcf.VCFFileReader; import org.testng.Assert; import org.testng.annotations.BeforeTest; @@ -33,6 +33,7 @@ import org.testng.annotations.Test; import java.io.File; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -45,7 +46,7 @@ /** * Tests the IntervalList class */ -public class IntervalListTest { +public class IntervalListTest extends HtsjdkTest { final SAMFileHeader fileHeader; final IntervalList list1, list2, list3; @@ -75,6 +76,15 @@ public IntervalListTest() { list3.add(new Interval("3", 50, 470)); } + @Test + public void testIntervalListFrom() { + final String testPath = "src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestComp.interval_list"; + final IntervalList fromFileList = IntervalList.fromFile(new File(testPath)); + final IntervalList fromPathList = IntervalList.fromPath(Paths.get(testPath)); + fromFileList.getHeader().getSequenceDictionary().assertSameDictionary(fromPathList.getHeader().getSequenceDictionary()); + Assert.assertEquals(CollectionUtil.makeCollection(fromFileList.iterator()), CollectionUtil.makeCollection(fromPathList.iterator())); + } + @DataProvider(name = "intersectData") public Object[][] intersectData() { final IntervalList intersect123 = new IntervalList(fileHeader); @@ -364,12 +374,97 @@ public void testSubtractSingletonIntervalLists(final IntervalList fromLists, fin } @Test(dataProvider = "subtractSingletonData") - public void testSubtractSingletonasListIntervalList(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) { + public void testSubtractSingletonAsListIntervalList(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) { Assert.assertEquals( CollectionUtil.makeCollection(IntervalList.subtract(Collections.singletonList(fromLists), Collections.singletonList(whatLists)).iterator()), CollectionUtil.makeCollection(list.iterator())); } + @DataProvider(name = "overlapsSingletonData") + public Object[][] overlapSingletonData() { + final IntervalList two_overlaps_one = new IntervalList(fileHeader); + final IntervalList three_overlaps_two = new IntervalList(fileHeader); + final IntervalList three_overlaps_one = new IntervalList(fileHeader); + final IntervalList one_overlaps_three = new IntervalList(fileHeader); + + // NB: commented lines below are there to show the intervals in the first list that will not be in the resulting list + + two_overlaps_one.add(new Interval("1", 50, 150)); + //two_overlaps_one.add(new Interval("1", 301, 500)); + two_overlaps_one.add(new Interval("2", 1, 150)); + two_overlaps_one.add(new Interval("2", 250, 270)); + two_overlaps_one.add(new Interval("2", 290, 400)); + + three_overlaps_two.add(new Interval("1", 25, 400)); + three_overlaps_two.add(new Interval("2", 200, 600)); + //three_overlaps_two.add(new Interval("3", 50, 470)); + + three_overlaps_one.add(new Interval("1", 25, 400)); + three_overlaps_one.add(new Interval("2", 200, 600)); + //three_overlaps_one.add(new Interval("3", 50, 470)); + + one_overlaps_three.add(new Interval("1", 1, 100)); + one_overlaps_three.add(new Interval("1", 101, 200)); + one_overlaps_three.add(new Interval("1", 202, 300)); + one_overlaps_three.add(new Interval("2", 200, 300)); + //one_overlaps_three.add(new Interval("2", 100, 150)); + + return new Object[][]{ + new Object[]{list1, list1, list1}, // should return itself + new Object[]{list1, IntervalList.invert(list1), new IntervalList(list1.getHeader())}, // should be empty + new Object[]{list2, list1, two_overlaps_one}, + new Object[]{list3, list2, three_overlaps_two}, + new Object[]{list3, list1, three_overlaps_one}, + new Object[]{list1, list3, one_overlaps_three} + }; + } + + @DataProvider(name = "overlapsData") + public Object[][] overlapData() { + final IntervalList three_overlaps_one_and_two = new IntervalList(fileHeader); + + three_overlaps_one_and_two.add(new Interval("1", 25, 400)); + three_overlaps_one_and_two.add(new Interval("2", 200, 600)); + //three_overlaps_one_and_two.add(new Interval("3", 50, 470)); + + return new Object[][]{ + new Object[]{CollectionUtil.makeList(list3), CollectionUtil.makeList(list1, list2), three_overlaps_one_and_two}, + }; + } + + @Test(dataProvider = "overlapsData") + public void testOverlapsIntervalLists(final List fromLists, final List whatLists, final IntervalList list) { + Assert.assertEquals( + CollectionUtil.makeCollection(IntervalList.overlaps(fromLists, whatLists).iterator()), + CollectionUtil.makeCollection(list.iterator())); + } + + @Test(dataProvider = "overlapsSingletonData") + public void testOverlapsSingletonIntervalLists(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) { + Assert.assertEquals( + CollectionUtil.makeCollection(IntervalList.overlaps(fromLists, whatLists).iterator()), + CollectionUtil.makeCollection(list.iterator())); + } + + @Test(dataProvider = "overlapsSingletonData") + public void testOverlapsSingletonAsListIntervalList(final IntervalList fromLists, final IntervalList whatLists, final IntervalList list) { + Assert.assertEquals( + CollectionUtil.makeCollection(IntervalList.overlaps(Collections.singletonList(fromLists), Collections.singletonList(whatLists)).iterator()), + CollectionUtil.makeCollection(list.iterator())); + } + + @Test(expectedExceptions = SAMException.class) + public void testOverlapsEmptyFirstList() { + IntervalList.overlaps(Collections.emptyList(), Collections.singletonList(list1)); + } + + @Test + public void testOverlapsEmptySecondList() { + Assert.assertEquals( + CollectionUtil.makeCollection(IntervalList.overlaps(Collections.singletonList(list1), Collections.emptyList()).iterator()), + Collections.emptyList()); + } + @DataProvider(name = "VCFCompData") public Object[][] VCFCompData() { return new Object[][]{ @@ -517,4 +612,16 @@ public void changeHeader() { Assert.assertTrue(false); } + + @Test public void uniqueIntervalsWithoutNames() { + final IntervalList test = new IntervalList(this.fileHeader); + test.add(new Interval("1", 100, 200)); + test.add(new Interval("1", 500, 600)); + test.add(new Interval("1", 550, 700)); + + for (final boolean concat : new boolean[]{true, false}) { + final IntervalList unique = test.uniqued(concat); + Assert.assertEquals(unique.size(), 2); + } + } } diff --git a/src/test/java/htsjdk/samtools/util/IntervalTreeMapTest.java b/src/test/java/htsjdk/samtools/util/IntervalTreeMapTest.java index 2e725ff43..5e975f917 100644 --- a/src/test/java/htsjdk/samtools/util/IntervalTreeMapTest.java +++ b/src/test/java/htsjdk/samtools/util/IntervalTreeMapTest.java @@ -23,12 +23,13 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; import java.util.Iterator; -public class IntervalTreeMapTest { +public class IntervalTreeMapTest extends HtsjdkTest { @Test public void testBasic() { IntervalTreeMap m=new IntervalTreeMap(); @@ -37,8 +38,8 @@ public void testBasic() { m.put(chr1Interval, chr1Interval); Interval chr2Interval = new Interval("chr2", 1,200); m.put(chr2Interval, chr2Interval); - - + + Assert.assertTrue(m.containsContained(new Interval("chr1", 9,101))); Assert.assertTrue(m.containsOverlapping(new Interval("chr1", 50,150))); Assert.assertFalse(m.containsOverlapping(new Interval("chr3", 1,100))); diff --git a/src/test/java/htsjdk/samtools/util/IntervalTreeTest.java b/src/test/java/htsjdk/samtools/util/IntervalTreeTest.java index 50d84c0b9..dcd225ec0 100644 --- a/src/test/java/htsjdk/samtools/util/IntervalTreeTest.java +++ b/src/test/java/htsjdk/samtools/util/IntervalTreeTest.java @@ -23,15 +23,21 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.Iterator; +import static htsjdk.samtools.util.IntervalTree.Node.HAS_OVERLAPPING_PART; + /** * @author alecw@broadinstitute.org */ -public class IntervalTreeTest { +@Test(singleThreaded=true) // to assure that the common resources aren't clobbered +public class IntervalTreeTest extends HtsjdkTest { @Test public void testNoMatches() { @@ -57,29 +63,147 @@ private int countElements(final Iterator> it) { return ret; } + private final IntervalTree intervalTree = new IntervalTree(); + + @BeforeMethod + public void init(){ //due to the destructive nature of removeMany test... + intervalTree.clear(); + + // each interval has a "name:length" + intervalTree.put(1, 10, "foo1:10"); + intervalTree.put(2, 9, "foo2:8"); + intervalTree.put(3, 8, "foo3:6"); + intervalTree.put(4, 7, "foo4:4"); + intervalTree.put(5, 6, "foo5:2"); + intervalTree.put(1, 9, "foo6:9"); + } + + @Test + public void testLength(){ + + Iterator> iterator = intervalTree.iterator(); + Iterable> iterable = () -> iterator; + + for (IntervalTree.Node node : iterable) { + Assert.assertEquals(node.getLength(), Integer.parseInt(node.getValue().replaceAll(".*:", ""))); + } + } + + @DataProvider(name="adjacentIntervalsTestData") + public Object[][] adjacentIntervalsTestData() { + return new Object[][]{ + {1, 4, 5, 10, true}, + {1, 3, 5, 10, false}, + {1, 4, 6, 10, false}, + {1, 2, 6, 10, false}, + {1, 10, 6, 10, false}, + {1, 10, 11, 20, true}, + {1, 10, 11, 20, true}, + }; + } + + @Test(dataProvider = "adjacentIntervalsTestData") + public void testAdjacent(int start1, int end1, int start2, int end2, boolean areAdjacent){ + + final IntervalTree.Node node1 = new IntervalTree.Node<>(start1, end1, "one"); + final IntervalTree.Node node2 = new IntervalTree.Node<>(start2, end2, "two"); + + Assert.assertTrue(node1.isAdjacent(node2) == areAdjacent); + Assert.assertTrue(node2.isAdjacent(node1) == areAdjacent); + } + + + @Test + public void testRank() { + for (IntervalTree.Node node: intervalTree) { + Assert.assertEquals(intervalTree.findByIndex( + intervalTree.getIndex(node.getStart(), node.getEnd())), node); + } + } + + @Test + public void testIterator() { + + final IntervalTree.Node testNode = new IntervalTree.Node<>(3, 4, "foobar1"); + int count = 0; + Iterator> iterator = intervalTree.iterator(testNode.getStart(), testNode.getEnd()); + Iterable> iterable = () -> iterator; + for (IntervalTree.Node node : iterable) { + Assert.assertTrue(node.compare(testNode.getStart(), testNode.getEnd()) <= 0); + count++; + } + Assert.assertEquals(count, 3); // foobar3, foobar4, and foobar5 only. + } + + @Test + public void testRemoveMany() { + Iterator> iterator = intervalTree.reverseIterator(); + Iterable> iterable = () -> iterator; + + for (IntervalTree.Node node : iterable) { + intervalTree.removeNode(node); + } + Assert.assertEquals(intervalTree.size(), 0); + } + + @Test + public void testRevIterator() { + + final IntervalTree.Node testNode = new IntervalTree.Node<>(3, 4, "foobar1"); + int count = 0; + Iterator> iterator = intervalTree.reverseIterator(testNode.getStart(), testNode.getEnd()); + Iterable> iterable = () -> iterator; + for (IntervalTree.Node node : iterable) { + Assert.assertTrue(node.compare(testNode.getStart(), testNode.getEnd()) >= 0); + count++; + } + Assert.assertEquals(count, 3); // foobar1, foobar2, and foobar6 + } + + + @Test + public void testOverlapIterator() { + + final IntervalTree.Node testNode = new IntervalTree.Node<>(3, 4, "foobar1"); + int count = 0; + Iterator> iterator = intervalTree.overlappers(testNode.getStart(), testNode.getEnd()); + Iterable> iterable = () -> iterator; + for (IntervalTree.Node node : iterable) { + Assert.assertTrue( (testNode.getRelationship(node) & HAS_OVERLAPPING_PART) != 0, String.format("%s with %s = %d", node.toString(), testNode.toString(), node.getRelationship(testNode))); + count++; + } + Assert.assertEquals(count, 5); // foobar1, foobar2, foobar3, foobar4, and foobar6 + } + + + @Test + public void testTotalRevIterator() { + + int count = 0; + Iterator> iterator = intervalTree.reverseIterator(); + Iterable> iterable = () -> iterator; + + for (IntervalTree.Node ignored : iterable) { + count++; + } + Assert.assertEquals(count, intervalTree.size()); // foobar1, foobar2, and foobar6 + } + @Test public void testMatches() { - final IntervalTree intervalTree = new IntervalTree(); - intervalTree.put(1, 10, "foo1"); - intervalTree.put(2, 9, "foo2"); - intervalTree.put(3, 8, "foo3"); - intervalTree.put(4, 7, "foo4"); - intervalTree.put(5, 6, "foo5"); - intervalTree.put(1, 9, "foo6"); - // Single match Assert.assertEquals(countElements(intervalTree.overlappers(10, 10)), 1, "Test single overlap"); - Assert.assertTrue(iteratorContains(intervalTree.overlappers(10, 10), "foo1"), "Test single overlap for correct overlapee"); + Assert.assertTrue(iteratorContains(intervalTree.overlappers(10, 10), "foo1:10"), "Test single overlap for correct overlapee"); // Multiple matches Assert.assertEquals(countElements(intervalTree.overlappers(7, 8)), 5, "Test multiple overlap"); - Assert.assertTrue(iteratorContains(intervalTree.overlappers(7, 8), "foo1"), "Test multiple overlap for correct overlapees"); - Assert.assertTrue(iteratorContains(intervalTree.overlappers(7, 8), "foo2"), "Test multiple overlap for correct overlapees"); - Assert.assertTrue(iteratorContains(intervalTree.overlappers(7, 8), "foo3"), "Test multiple overlap for correct overlapees"); - Assert.assertTrue(iteratorContains(intervalTree.overlappers(7, 8), "foo4"), "Test multiple overlap for correct overlapees"); - Assert.assertTrue(iteratorContains(intervalTree.overlappers(7, 8), "foo6"), "Test multiple overlap for correct overlapees"); - Assert.assertTrue(!iteratorContains(intervalTree.overlappers(7, 8), "foo5"), "Test multiple overlap for correct overlapees"); + Assert.assertTrue( iteratorContains(intervalTree.overlappers(7, 8), "foo1:10"), "Test multiple overlap for correct overlapees"); + Assert.assertTrue( iteratorContains(intervalTree.overlappers(7, 8), "foo2:8"), "Test multiple overlap for correct overlapees"); + Assert.assertTrue( iteratorContains(intervalTree.overlappers(7, 8), "foo3:6"), "Test multiple overlap for correct overlapees"); + Assert.assertTrue( iteratorContains(intervalTree.overlappers(7, 8), "foo4:4"), "Test multiple overlap for correct overlapees"); + Assert.assertTrue( iteratorContains(intervalTree.overlappers(7, 8), "foo6:9"), "Test multiple overlap for correct overlapees"); + Assert.assertTrue(!iteratorContains(intervalTree.overlappers(7, 8), "foo5:2"), "Test multiple overlap for correct overlapees"); } private boolean iteratorContains(final Iterator> nodeIterator, final String s) { @@ -184,4 +308,6 @@ public void testRemove() { Assert.assertEquals(intervalTree.remove(46402360, 46402594), "frob"); intervalTree.checkMaxEnds(); } + + } diff --git a/src/test/java/htsjdk/samtools/util/IoUtilTest.java b/src/test/java/htsjdk/samtools/util/IoUtilTest.java index 0e4cd7a1c..645d20d42 100644 --- a/src/test/java/htsjdk/samtools/util/IoUtilTest.java +++ b/src/test/java/htsjdk/samtools/util/IoUtilTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; @@ -41,7 +42,7 @@ import java.util.Arrays; import java.util.List; -public class IoUtilTest { +public class IoUtilTest extends HtsjdkTest { private static final File SLURP_TEST_FILE = new File("src/test/resources/htsjdk/samtools/io/slurptest.txt"); private static final File EMPTY_FILE = new File("src/test/resources/htsjdk/samtools/io/empty.txt"); diff --git a/src/test/java/htsjdk/samtools/util/Iso8601DateTest.java b/src/test/java/htsjdk/samtools/util/Iso8601DateTest.java index ce0ae08c1..93b9d6544 100644 --- a/src/test/java/htsjdk/samtools/util/Iso8601DateTest.java +++ b/src/test/java/htsjdk/samtools/util/Iso8601DateTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; @@ -31,7 +32,7 @@ /** * @author alecw@broadinstitute.org */ -public class Iso8601DateTest { +public class Iso8601DateTest extends HtsjdkTest { @Test public void testBasic() { final String dateStr = "2008-12-15"; diff --git a/src/test/java/htsjdk/samtools/util/IupacTest.java b/src/test/java/htsjdk/samtools/util/IupacTest.java index 64b78c003..86b0a410e 100644 --- a/src/test/java/htsjdk/samtools/util/IupacTest.java +++ b/src/test/java/htsjdk/samtools/util/IupacTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.samtools.BamFileIoUtils; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileWriter; @@ -38,7 +39,7 @@ import java.io.File; import java.util.Arrays; -public class IupacTest { +public class IupacTest extends HtsjdkTest { @Test(dataProvider = "basicDataProvider") public void basic(final String tempFileExtension) throws Exception { final File outputFile = File.createTempFile("iupacTest.", tempFileExtension); diff --git a/src/test/java/htsjdk/samtools/util/LogTest.java b/src/test/java/htsjdk/samtools/util/LogTest.java new file mode 100644 index 000000000..a9b82b128 --- /dev/null +++ b/src/test/java/htsjdk/samtools/util/LogTest.java @@ -0,0 +1,41 @@ +package htsjdk.samtools.util; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.file.Files; +import java.util.List; + +public class LogTest extends HtsjdkTest { + + private final Log log = Log.getInstance(getClass()); + + @Test + public void testLogToFile() throws IOException { + final File logFile = File.createTempFile(getClass().getSimpleName(), ".tmp"); + logFile.deleteOnExit(); + + final Log.LogLevel originalLogLevel = Log.getGlobalLogLevel(); + final PrintStream originalStream = Log.getGlobalPrintStream(); + + try (final PrintStream stream = new PrintStream(new FileOutputStream(logFile.getPath(), true))) { + Log.setGlobalPrintStream(stream); + Log.setGlobalLogLevel(Log.LogLevel.DEBUG); + final String words = "Hello World"; + log.info(words); + final List list = Files.readAllLines(logFile.toPath()); + Assert.assertEquals(Log.getGlobalLogLevel(), Log.LogLevel.DEBUG); + Assert.assertEquals(list.size(), 1); + Assert.assertTrue(list.get(0).contains(words)); + } finally { + Log.setGlobalLogLevel(originalLogLevel); + Log.setGlobalPrintStream(originalStream); + } + } +} diff --git a/src/test/java/htsjdk/samtools/util/MergingIteratorTest.java b/src/test/java/htsjdk/samtools/util/MergingIteratorTest.java index d36bb6d3b..e5964acf7 100644 --- a/src/test/java/htsjdk/samtools/util/MergingIteratorTest.java +++ b/src/test/java/htsjdk/samtools/util/MergingIteratorTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; @@ -34,7 +35,7 @@ import java.util.LinkedList; import java.util.Queue; -public class MergingIteratorTest { +public class MergingIteratorTest extends HtsjdkTest { private static class QueueBackedIterator implements CloseableIterator { diff --git a/src/test/java/htsjdk/samtools/util/OverlapDetectorTest.java b/src/test/java/htsjdk/samtools/util/OverlapDetectorTest.java index ecde96560..d8adf2e2d 100644 --- a/src/test/java/htsjdk/samtools/util/OverlapDetectorTest.java +++ b/src/test/java/htsjdk/samtools/util/OverlapDetectorTest.java @@ -1,12 +1,13 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.*; -public class OverlapDetectorTest { +public class OverlapDetectorTest extends HtsjdkTest { @DataProvider(name="intervalsMultipleContigs") public Object[][] intervalsMultipleContigs(){ diff --git a/src/test/java/htsjdk/samtools/util/PositionalOutputStreamTest.java b/src/test/java/htsjdk/samtools/util/PositionalOutputStreamTest.java index 49de11d9c..939c74858 100644 --- a/src/test/java/htsjdk/samtools/util/PositionalOutputStreamTest.java +++ b/src/test/java/htsjdk/samtools/util/PositionalOutputStreamTest.java @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015 Daniel Gómez-Sánchez + * Copyright (c) 2015 Daniel Gomez-Sanchez * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -24,6 +24,7 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; @@ -33,7 +34,7 @@ /** * @author Daniel Gomez-Sanchez (magicDGS) */ -public class PositionalOutputStreamTest { +public class PositionalOutputStreamTest extends HtsjdkTest { @Test public void basicPositionTest() throws Exception { @@ -59,4 +60,4 @@ public void write(int b) throws IOException {} Assert.assertEquals(wrapped.getPosition(), position); } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/samtools/util/QualityEncodingDetectorTest.java b/src/test/java/htsjdk/samtools/util/QualityEncodingDetectorTest.java index 9e014d7b7..071312d9b 100644 --- a/src/test/java/htsjdk/samtools/util/QualityEncodingDetectorTest.java +++ b/src/test/java/htsjdk/samtools/util/QualityEncodingDetectorTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMRecordSetBuilder; import htsjdk.samtools.SamReader; @@ -13,7 +14,7 @@ import java.util.Arrays; import java.util.List; -public class QualityEncodingDetectorTest { +public class QualityEncodingDetectorTest extends HtsjdkTest { private static class Testcase { private final File f; diff --git a/src/test/java/htsjdk/samtools/util/RelativeIso8601DateTest.java b/src/test/java/htsjdk/samtools/util/RelativeIso8601DateTest.java index e4e9ef993..0e0c9b265 100644 --- a/src/test/java/htsjdk/samtools/util/RelativeIso8601DateTest.java +++ b/src/test/java/htsjdk/samtools/util/RelativeIso8601DateTest.java @@ -1,5 +1,6 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; @@ -9,7 +10,7 @@ /** @author mccowan */ -public class RelativeIso8601DateTest { +public class RelativeIso8601DateTest extends HtsjdkTest { // 1 second resolution is ISO date private final static double DELTA_FOR_TIME = 1000; diff --git a/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java b/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java index 262b7c93f..5bcea40df 100644 --- a/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java +++ b/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java @@ -61,7 +61,7 @@ public void testBasicIterator() { int pos = startPosition; for (final SamLocusIterator.LocusInfo li : sli) { Assert.assertEquals(li.getPosition(), pos++); - Assert.assertEquals(li.getRecordAndPositions().size(), coverage); + Assert.assertEquals(li.getRecordAndOffsets().size(), coverage); Assert.assertEquals(li.size(), coverage); // make sure that we are not accumulating indels Assert.assertEquals(li.getDeletedInRecord().size(), 0); @@ -86,7 +86,7 @@ public void testMissingQualityString() { int pos = 165; for (final SamLocusIterator.LocusInfo li : sli) { Assert.assertEquals(li.getPosition(), pos++); - Assert.assertEquals(li.getRecordAndPositions().size(), 2); + Assert.assertEquals(li.getRecordAndOffsets().size(), 2); Assert.assertEquals(li.size(), 2); } } @@ -123,7 +123,7 @@ public void testEmitUncoveredLoci() { } else { expectedReads = 0; } - Assert.assertEquals(li.getRecordAndPositions().size(), expectedReads); + Assert.assertEquals(li.getRecordAndOffsets().size(), expectedReads); Assert.assertEquals(li.size(), expectedReads); // make sure that we are not accumulating indels Assert.assertEquals(li.getDeletedInRecord().size(), 0); @@ -161,7 +161,7 @@ public void testQualityFilter() { // make sure we accumulated depth coverage for even positions, coverage/2 for odd positions int pos = startPosition; for (final SamLocusIterator.LocusInfo li : sli) { - Assert.assertEquals(li.getRecordAndPositions().size(), (pos % 2 == 0) ? coverage / 2 : coverage); + Assert.assertEquals(li.getRecordAndOffsets().size(), (pos % 2 == 0) ? coverage / 2 : coverage); Assert.assertEquals(li.size(), (pos % 2 == 0) ? coverage / 2 : coverage); Assert.assertEquals(li.getPosition(), pos++); // make sure that we are not accumulating indels @@ -200,7 +200,7 @@ public void testSimpleDeletion() { Assert.assertEquals(li.getPosition(), pos++); if (isDeletedPosition) { // make sure there are no reads without indels - Assert.assertEquals(li.getRecordAndPositions().size(), 0); + Assert.assertEquals(li.getRecordAndOffsets().size(), 0); Assert.assertEquals(li.size(), coverage); // should include deletions // make sure that we are accumulating indels @@ -208,7 +208,7 @@ public void testSimpleDeletion() { Assert.assertEquals(li.getInsertedInRecord().size(), 0); } else { // make sure we are accumulating normal coverage - Assert.assertEquals(li.getRecordAndPositions().size(), coverage); + Assert.assertEquals(li.getRecordAndOffsets().size(), coverage); Assert.assertEquals(li.size(), coverage); // make sure that we are not accumulating indels @@ -241,7 +241,7 @@ public void testSimpleInsertion() { for (final SamLocusIterator.LocusInfo li : sli) { Assert.assertEquals(li.getPosition(), pos++); // make sure we are accumulating normal coverage - Assert.assertEquals(li.getRecordAndPositions().size(), coverage); + Assert.assertEquals(li.getRecordAndOffsets().size(), coverage); Assert.assertEquals(li.size(), coverage); // make sure that we are not accumulating deletions @@ -278,7 +278,7 @@ public void testStartWithInsertion() { for (final SamLocusIterator.LocusInfo li : sli) { Assert.assertEquals(li.getPosition(), pos); // accumulation of coverage - Assert.assertEquals(li.getRecordAndPositions().size(), (indelPosition) ? 0 : coverage); + Assert.assertEquals(li.getRecordAndOffsets().size(), (indelPosition) ? 0 : coverage); Assert.assertEquals(li.size(), (indelPosition) ? 0 : coverage); // no accumulation of deletions @@ -319,7 +319,7 @@ public void testStartWithSoftClipAndInsertion() { for (final SamLocusIterator.LocusInfo li : sli) { Assert.assertEquals(li.getPosition(), pos); // accumulation of coverage - Assert.assertEquals(li.getRecordAndPositions().size(), (indelPosition) ? 0 : coverage); + Assert.assertEquals(li.getRecordAndOffsets().size(), (indelPosition) ? 0 : coverage); Assert.assertEquals(li.size(), (indelPosition) ? 0 : coverage); // no accumulation of deletions Assert.assertEquals(li.getDeletedInRecord().size(), 0); @@ -364,7 +364,7 @@ public void testNBeforeInsertion() { } Assert.assertEquals(li.getPosition(), pos); // accumulation of coverage - Assert.assertEquals(li.getRecordAndPositions().size(), (pos == endN) ? 0 : coverage); + Assert.assertEquals(li.getRecordAndOffsets().size(), (pos == endN) ? 0 : coverage); Assert.assertEquals(li.size(), (pos == endN) ? 0 : coverage); // no accumulation of deletions Assert.assertEquals(li.getDeletedInRecord().size(), 0); @@ -416,7 +416,7 @@ public void testNBeforeDeletion() { final boolean insideDeletion = incIndels && (pos >= startDel && pos <= endDel); Assert.assertEquals(li.getPosition(), pos); // accumulation of coverage - Assert.assertEquals(li.getRecordAndPositions().size(), (insideDeletion) ? 0 : coverage); + Assert.assertEquals(li.getRecordAndOffsets().size(), (insideDeletion) ? 0 : coverage); Assert.assertEquals(li.size(), coverage); // either will be all deletions, or all non-deletions, but always of size `coverage`. // accumulation of deletions Assert.assertEquals(li.getDeletedInRecord().size(), (insideDeletion) ? coverage : 0); @@ -506,18 +506,18 @@ public void testSimpleGappedAlignment() { if (inDelRange) { // check the coverage for insertion and normal records Assert.assertEquals(li.getDeletedInRecord().size(), coverage); - Assert.assertEquals(li.getRecordAndPositions().size(), 0); + Assert.assertEquals(li.getRecordAndOffsets().size(), 0); Assert.assertEquals(li.size(), coverage); // includes deletions // check the offset for the deletion Assert.assertEquals(li.getDeletedInRecord().get(0).getOffset(), expectedReadOffsets[i]); Assert.assertEquals(li.getDeletedInRecord().get(1).getOffset(), expectedReadOffsets[i]); } else { // if it is not a deletion, perform the same test as before - Assert.assertEquals(li.getRecordAndPositions().size(), coverage); + Assert.assertEquals(li.getRecordAndOffsets().size(), coverage); Assert.assertEquals(li.size(), coverage); // Assert.assertEquals(li.getDeletedInRecord().size(), 0); - Assert.assertEquals(li.getRecordAndPositions().get(0).getOffset(), expectedReadOffsets[i]); - Assert.assertEquals(li.getRecordAndPositions().get(1).getOffset(), expectedReadOffsets[i]); + Assert.assertEquals(li.getRecordAndOffsets().get(0).getOffset(), expectedReadOffsets[i]); + Assert.assertEquals(li.getRecordAndOffsets().get(1).getOffset(), expectedReadOffsets[i]); } ++i; } @@ -576,12 +576,12 @@ public void testOverlappingGappedAlignmentsWithoutIndels() { i = 0; for (final SamLocusIterator.LocusInfo li : sli) { - Assert.assertEquals(li.getRecordAndPositions().size(), expectedDepths[i]); + Assert.assertEquals(li.getRecordAndOffsets().size(), expectedDepths[i]); Assert.assertEquals(li.size(), expectedDepths[i]); Assert.assertEquals(li.getPosition(), expectedReferencePositions[i]); - Assert.assertEquals(li.getRecordAndPositions().size(), expectedReadOffsets[i].length); + Assert.assertEquals(li.getRecordAndOffsets().size(), expectedReadOffsets[i].length); for (int j = 0; j < expectedReadOffsets[i].length; ++j) { - Assert.assertEquals(li.getRecordAndPositions().get(j).getOffset(), expectedReadOffsets[i][j]); + Assert.assertEquals(li.getRecordAndOffsets().get(j).getOffset(), expectedReadOffsets[i][j]); } // make sure that we are not accumulating indels Assert.assertEquals(li.getDeletedInRecord().size(), 0); @@ -652,12 +652,12 @@ public void testOverlappingGappedAlignmentsWithIndels() { i = 0; for (final SamLocusIterator.LocusInfo li : sli) { // checking the same as without indels - Assert.assertEquals(li.getRecordAndPositions().size(), expectedDepths[i]); + Assert.assertEquals(li.getRecordAndOffsets().size(), expectedDepths[i]); Assert.assertEquals(li.size(), expectedDepths[i] + expectedDelDepths[i]); // include deletions Assert.assertEquals(li.getPosition(), expectedReferencePositions[i]); - Assert.assertEquals(li.getRecordAndPositions().size(), expectedReadOffsets[i].length); + Assert.assertEquals(li.getRecordAndOffsets().size(), expectedReadOffsets[i].length); for (int j = 0; j < expectedReadOffsets[i].length; ++j) { - Assert.assertEquals(li.getRecordAndPositions().get(j).getOffset(), expectedReadOffsets[i][j]); + Assert.assertEquals(li.getRecordAndOffsets().get(j).getOffset(), expectedReadOffsets[i][j]); } // check the deletions Assert.assertEquals(li.getDeletedInRecord().size(), expectedDelDepths[i]); @@ -670,4 +670,4 @@ public void testOverlappingGappedAlignmentsWithIndels() { } } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java b/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java index 008cca507..81a949094 100644 --- a/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java +++ b/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java @@ -23,8 +23,8 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import htsjdk.samtools.*; -import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; import org.testng.Assert; @@ -32,14 +32,12 @@ import org.testng.annotations.Test; import java.io.File; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; +import java.util.*; /** * @author alecw@broadinstitute.org */ -public class SequenceUtilTest { +public class SequenceUtilTest extends HtsjdkTest { private static final String HEADER = "@HD\tVN:1.0\tSO:unsorted\n"; private static final String SEQUENCE_NAME= "@SQ\tSN:phix174.seq\tLN:5386\tUR:/seq/references/PhiX174/v0/PhiX174.fasta\tAS:PhiX174\tM5:3332ed720ac7eaa9b3655c06f6b9e196"; @@ -144,6 +142,11 @@ public void testCountMismatches(final String readString, final String cigar, fin final SAMRecord rec = new SAMRecord(null); rec.setReadName("test"); rec.setReadString(readString); + final byte[] byteArray = new byte[readString.length()]; + + Arrays.fill(byteArray, (byte)33); + + rec.setBaseQualities(byteArray); rec.setCigarString(cigar); final byte[] refBases = StringUtil.stringToBytes(reference); @@ -151,6 +154,9 @@ public void testCountMismatches(final String readString, final String cigar, fin final int nExact = SequenceUtil.countMismatches(rec, refBases, -1, false, false); Assert.assertEquals(nExact, expectedMismatchesExact); + final int sumMismatchesQualityExact = SequenceUtil.sumQualitiesOfMismatches(rec, refBases, -1, false); + Assert.assertEquals(sumMismatchesQualityExact, expectedMismatchesExact * 33); + final int nAmbiguous = SequenceUtil.countMismatches(rec, refBases, -1, false, true); Assert.assertEquals(nAmbiguous, expectedMismatchesAmbiguous); } @@ -175,6 +181,58 @@ public void testCountMismatches(final String readString, final String cigar, fin }; } + @DataProvider(name="mismatchBisulfiteCountsDataProvider") + public Object[][] mismatchBisulfiteCountsDataProvider() { + + List tests = new ArrayList<>(); + final List bases = Arrays.asList("A","C","T","G"); + + for (final String base : bases) { + for (final String ref : bases) { + for (final Boolean strand : Arrays.asList(true, false)) { + + final Integer count; + + if (base.equals(ref)) count = 0; + else if (base.equals("A") && ref.equals("G") && !strand) count = 0; + else if (base.equals("T") && ref.equals("C") && strand) count = 0; + else count = 1; + + tests.add(new Object[]{base, "1M", ref, strand, count}); + + } + } + } + return tests.toArray(new Object[1][]); + } + + + @Test(dataProvider = "mismatchBisulfiteCountsDataProvider") + public void testMismatchBisulfiteCounts(final String readString, final String cigar, final String reference, + final boolean positiveStrand, final int expectedMismatches) { + + final byte baseQuality = 30; + final SAMRecord rec = new SAMRecord(null); + rec.setReadName("test"); + rec.setReadString(readString); + rec.setReadNegativeStrandFlag(!positiveStrand); + final byte[] byteArray = new byte[readString.length()]; + + Arrays.fill(byteArray,baseQuality); + + rec.setBaseQualities(byteArray); + rec.setCigarString(cigar); + + final byte[] refBases = StringUtil.stringToBytes(reference); + + final int nExact = SequenceUtil.countMismatches(rec, refBases, -1, true, false); + Assert.assertEquals(nExact, expectedMismatches); + + final int sumMismatchesQualityExact = SequenceUtil.sumQualitiesOfMismatches(rec, refBases, -1, true); + Assert.assertEquals(sumMismatchesQualityExact, expectedMismatches * baseQuality); + + } + @Test(dataProvider = "countInsertedAndDeletedBasesTestCases") public void testCountInsertedAndDeletedBases(final String cigarString, final int insertedBases, final int deletedBases) { final Cigar cigar = TextCigarCodec.decode(cigarString); @@ -204,11 +262,11 @@ public void testCountInsertedAndDeletedBases(final String cigarString, final int @Test(dataProvider = "testKmerGenerationTestCases") public void testKmerGeneration(final int length, final String[] expectedKmers) { - final Set actualSet = new HashSet(); + final Set actualSet = new HashSet<>(); for (final byte[] kmer : SequenceUtil.generateAllKmers(length)) { actualSet.add(StringUtil.bytesToString(kmer)); } - final Set expectedSet = new HashSet(Arrays.asList(expectedKmers)); + final Set expectedSet = new HashSet<>(Arrays.asList(expectedKmers)); Assert.assertTrue(actualSet.equals(expectedSet)); } @@ -450,4 +508,115 @@ public void testCalculateNmTag() { } }); } + + @DataProvider(name = "testNmFromCigarProvider") + Object[][] testNmFromCigar() { + return new Object[][]{ + {"1M", 0}, + {"1S1D", 1}, + {"1H3X", 3}, + {"1H5=3M2X", 2}, + {"5P5M", 0}, + {"5S8I", 8} + }; + } + + @Test(dataProvider = "testNmFromCigarProvider") + public void testNmTagFromCigar(final String cigarString, final int expectedNmValue) { + final SAMRecord rec = new SAMRecord(null); + rec.setReadName("test"); + rec.setCigarString(cigarString); + + Assert.assertEquals(SequenceUtil.calculateSamNmTagFromCigar(rec),expectedNmValue); + } + + @Test + public void testReverseComplement() { + Assert.assertEquals(SequenceUtil.reverseComplement("ABCDEFGHIJKLMNOPQRSTUVWXYZ"),"ZYXWVUASRQPONMLKJIHCFEDGBT"); + Assert.assertEquals(SequenceUtil.reverseComplement("abcdefghijklmnopqrstuvwxy"),"yxwvuasrqponmlkjihcfedgbt"); //missing "z" on purpose so that we test both even-lengthed and odd-lengthed strings + } + + @Test + public void testUpperCase() { + Assert.assertEquals(SequenceUtil.upperCase(StringUtil.stringToBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ")), StringUtil.stringToBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ")); + Assert.assertEquals(SequenceUtil.upperCase(StringUtil.stringToBytes("abcdefghijklmnopqrstuvwxyz")), StringUtil.stringToBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ")); + Assert.assertEquals(SequenceUtil.upperCase(StringUtil.stringToBytes("1234567890!@#$%^&*()")), StringUtil.stringToBytes("1234567890!@#$%^&*()")); + } + + @Test + public void testReverseQualities() { + + final byte[] qualities1 = new byte[] {10, 20, 30, 40}; + SequenceUtil.reverseQualities(qualities1); + assertEquals(qualities1, new byte[] {40, 30, 20, 10}); + + final byte[] qualities2 = {10, 20, 30}; + SequenceUtil.reverseQualities(qualities2); + assertEquals(qualities2, new byte[]{30, 20, 10}); + } + + private void assertEquals(final byte[] actual, final byte[] expected) { + Assert.assertEquals(actual.length, expected.length, "Arrays do not have equal lengths"); + + for (int i = 0; i < actual.length; ++i) { + Assert.assertEquals(actual[i], expected[i], "Array differ at position " + i); + } + } + + @Test + public void testIsACGTN() { + for (byte base = Byte.MIN_VALUE; base < Byte.MAX_VALUE; base++) { + if (base == 'A' || base == 'C' || base == 'G' || base == 'T' || base == 'N') { + Assert.assertTrue(SequenceUtil.isUpperACGTN(base)); + } else { + Assert.assertFalse(SequenceUtil.isUpperACGTN(base)); + } + } + } + + @Test + public void testIsIUPAC() { + final String iupacString = ".aAbBcCdDgGhHkKmMnNrRsStTvVwWyY"; + for (byte code=0; code iterator() { return this; } + @Override public boolean hasNext() { return numElementsGenerated < numElementsToGenerate; } + @Override public String next() { ++numElementsGenerated; return Integer.toString(random.nextInt()); } + @Override public void remove() { throw new UnsupportedOperationException(); } @@ -150,6 +155,7 @@ public void remove() { static class StringComparator implements Comparator { + @Override public int compare(final String s, final String s1) { return s.compareTo(s1); } @@ -160,6 +166,7 @@ public int compare(final String s, final String s1) { OutputStream os; InputStream is; + @Override public SortingCollection.Codec clone() { return new StringCodec(); } @@ -169,6 +176,7 @@ public int compare(final String s, final String s1) { * * @param os */ + @Override public void setOutputStream(final OutputStream os) { this.os = os; } @@ -178,6 +186,7 @@ public void setOutputStream(final OutputStream os) { * * @param is */ + @Override public void setInputStream(final InputStream is) { this.is = is; } @@ -187,6 +196,7 @@ public void setInputStream(final InputStream is) { * * @param val what to write */ + @Override public void encode(final String val) { try { byteBuffer.clear(); @@ -204,6 +214,7 @@ public void encode(final String val) { * @return null if no more records. Should throw exception if EOF is encountered in the middle of * a record. */ + @Override public String decode() { try { byteBuffer.clear(); diff --git a/src/test/java/htsjdk/samtools/util/SortingLongCollectionTest.java b/src/test/java/htsjdk/samtools/util/SortingLongCollectionTest.java index 4817ef5b1..bcfa77e9c 100644 --- a/src/test/java/htsjdk/samtools/util/SortingLongCollectionTest.java +++ b/src/test/java/htsjdk/samtools/util/SortingLongCollectionTest.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.AfterTest; import org.testng.annotations.BeforeTest; @@ -36,7 +37,7 @@ /** * @author alecw@broadinstitute.org */ -public class SortingLongCollectionTest { +public class SortingLongCollectionTest extends HtsjdkTest { // Create a separate directory for files so it is possible to confirm that the directory is emptied private final File tmpDir = new File(System.getProperty("java.io.tmpdir") + "/" + System.getProperty("user.name"), "SortingCollectionTest"); diff --git a/src/test/java/htsjdk/samtools/util/StringLineReaderTest.java b/src/test/java/htsjdk/samtools/util/StringLineReaderTest.java index 9919f891b..f90565024 100644 --- a/src/test/java/htsjdk/samtools/util/StringLineReaderTest.java +++ b/src/test/java/htsjdk/samtools/util/StringLineReaderTest.java @@ -23,10 +23,11 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; -public class StringLineReaderTest { +public class StringLineReaderTest extends HtsjdkTest { private static final String[] TERMINATORS = {"\r", "\n", "\r\n"}; private static final boolean[] LAST_LINE_TERMINATED = {false, true}; diff --git a/src/test/java/htsjdk/samtools/util/StringUtilTest.java b/src/test/java/htsjdk/samtools/util/StringUtilTest.java deleted file mode 100644 index dbb2a0709..000000000 --- a/src/test/java/htsjdk/samtools/util/StringUtilTest.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.util; - -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -/** - * @author alecw@broadinstitute.org - */ -public class StringUtilTest { - @Test(dataProvider = "provider") - public void testSplit(final String input, final String[] expectedResult, final boolean concatenateExcess) { - String[] ret = new String[expectedResult.length]; - int tokensExpected; - for (tokensExpected = 0; tokensExpected < expectedResult.length && expectedResult[tokensExpected] != null; - ++tokensExpected) { - } - final int tokensFound; - if (concatenateExcess) { - tokensFound = StringUtil.splitConcatenateExcessTokens(input, ret, ':'); - } else { - tokensFound = StringUtil.split(input, ret, ':'); - } - Assert.assertEquals(tokensFound, tokensExpected); - Assert.assertEquals(ret, expectedResult); - } - - @DataProvider(name="provider") - public Object[][] splitScenarios() { - return new Object[][] { - {"A:BB:C", new String[]{"A", "BB", "C"}, false}, - {"A:BB:C", new String[]{"A", "BB", "C"}, true}, - {"A:BB", new String[]{"A", "BB", null}, false}, - {"A:BB", new String[]{"A", "BB", null}, true}, - {"A:BB:", new String[]{"A", "BB", null}, false}, - {"A:BB:", new String[]{"A", "BB", null}, true}, - {"A:BB:C:DDD", new String[]{"A", "BB", "C"}, false}, - {"A:BB:C:DDD", new String[]{"A", "BB", "C:DDD"}, true}, - {"A:", new String[]{"A", null, null}, false}, - {"A:", new String[]{"A", null, null}, true}, - {"A", new String[]{"A", null, null}, false}, - {"A", new String[]{"A", null, null}, true}, - {"A:BB:C", new String[]{"A", "BB", "C"}, false}, - {"A:BB:C:", new String[]{"A", "BB", "C:"}, true}, - }; - } - - @DataProvider(name="withinHammingDistanceProvider") - public Object[][] isWithinHammingDistanceProvider() { - return new Object[][] { - {"ATAC", "GCAT", 3, true}, - {"ATAC", "GCAT", 2, false}, - {"ATAC", "GCAT", 1, false}, - {"ATAC", "GCAT", 0, false} - }; - } - - @Test(dataProvider = "withinHammingDistanceProvider") - public void testIsWithinHammingDistance(final String s1, final String s2, final int maxHammingDistance, final boolean expectedResult) { - Assert.assertEquals(StringUtil.isWithinHammingDistance(s1, s2, maxHammingDistance), expectedResult); - } - - @DataProvider(name="withinHammingDistanceExceptionProvider") - public Object[][] isWithinHammingDistanceException() { - return new Object[][] { - {"ATAC", "GCT" , 3}, - {"ATAC", "AT" , 2}, - {"ATAC", "T" , 1}, - {"" , "GCAT", 0} - }; - } - - @Test(dataProvider = "withinHammingDistanceExceptionProvider", expectedExceptions = IllegalArgumentException.class) - public void testIsWithinHammingDistanceExceptions(final String s1, final String s2, final int maxHammingDistance) { - StringUtil.isWithinHammingDistance(s1, s2, maxHammingDistance); - } - - @Test(dataProvider = "withinHammingDistanceExceptionProvider", expectedExceptions = IllegalArgumentException.class) - public void testHammingDistanceExceptions(final String s1, final String s2, final int maxHammingDistance) { - StringUtil.hammingDistance(s1, s2); - } - - @DataProvider(name="hammingDistanceProvider") - public Object[][] hammingDistance() { - return new Object[][] { - {"ATAC" , "GCAT" , 3}, - {"ATAGC", "ATAGC", 0}, - {"ATAC" , "atac" , 4}, // Hamming distance is case sensitive. - {"" , "" , 0}, // Two empty strings should have Hamming distance of 0. - {"nAGTN", "nAGTN", 0} // Ensure that matching Ns are not counted as mismatches. - }; - } - - @Test(dataProvider = "hammingDistanceProvider") - public void testHammingDistance(final String s1, final String s2, final int expectedResult) { - Assert.assertEquals(StringUtil.hammingDistance(s1, s2), expectedResult); - } - -} diff --git a/src/test/java/htsjdk/samtools/util/TrimmingUtilTest.java b/src/test/java/htsjdk/samtools/util/TrimmingUtilTest.java index 12cffc671..811083976 100644 --- a/src/test/java/htsjdk/samtools/util/TrimmingUtilTest.java +++ b/src/test/java/htsjdk/samtools/util/TrimmingUtilTest.java @@ -23,13 +23,14 @@ */ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; /** * Tests for a simple phred-style quality trimming algorithm. */ -public class TrimmingUtilTest { +public class TrimmingUtilTest extends HtsjdkTest { @Test public void testEasyCases() { Assert.assertEquals(TrimmingUtil.findQualityTrimPoint(byteArray(30,30,30,30,30, 2, 2, 2, 2, 2), 15), 5); diff --git a/src/test/java/htsjdk/samtools/util/TupleTest.java b/src/test/java/htsjdk/samtools/util/TupleTest.java index bed4550f1..431466ddf 100644 --- a/src/test/java/htsjdk/samtools/util/TupleTest.java +++ b/src/test/java/htsjdk/samtools/util/TupleTest.java @@ -1,12 +1,13 @@ package htsjdk.samtools.util; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.Test; /** * Created by farjoun on 1/29/16. */ -public class TupleTest { +public class TupleTest extends HtsjdkTest { @Test public void testEquals() throws Exception { @@ -59,4 +60,4 @@ public void testToString() throws Exception { Assert.assertEquals(new Tuple<>(null, null).toString(), "[null, null]"); } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java b/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java index 6d65e9dfc..947d319fe 100644 --- a/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java @@ -1,5 +1,9 @@ package htsjdk.tribble; +import com.google.common.jimfs.Configuration; +import com.google.common.jimfs.Jimfs; +import htsjdk.HtsjdkTest; +import htsjdk.samtools.FileTruncatedException; import htsjdk.samtools.util.TestUtil; import htsjdk.tribble.bed.BEDCodec; import htsjdk.tribble.bed.BEDFeature; @@ -15,6 +19,10 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.*; +import java.util.function.Function; import static org.testng.Assert.*; @@ -22,11 +30,25 @@ * @author jacob * @date 2013-Apr-10 */ -public class AbstractFeatureReaderTest { +public class AbstractFeatureReaderTest extends HtsjdkTest { final static String HTTP_INDEXED_VCF_PATH = TestUtil.BASE_URL_FOR_HTTP_TESTS + "ex2.vcf"; final static String LOCAL_MIRROR_HTTP_INDEXED_VCF_PATH = VariantBaseTest.variantTestDataRoot + "ex2.vcf"; + //the "mangled" versions of the files have an extra byte added to the front of the file that makes them invalid + private static final String TEST_PATH = "src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/"; + private static final String MANGLED_VCF = TEST_PATH + "mangledBaseVariants.vcf"; + private static final String MANGLED_VCF_INDEX = TEST_PATH + "mangledBaseVariants.vcf.idx"; + private static final String VCF = TEST_PATH + "baseVariants.vcf"; + private static final String VCF_INDEX = TEST_PATH + "baseVariants.vcf.idx"; + private static final String VCF_TABIX_BLOCK_GZIPPED = TEST_PATH + "baseVariants.vcf.gz"; + private static final String VCF_TABIX_INDEX = TEST_PATH + "baseVariants.vcf.gz.tbi"; + private static final String MANGLED_VCF_TABIX_BLOCK_GZIPPED = TEST_PATH + "baseVariants.mangled.vcf.gz"; + private static final String MANGLED_VCF_TABIX_INDEX = TEST_PATH + "baseVariants.mangled.vcf.gz.tbi"; + + //wrapper which skips the first byte of a file and leaves the rest unchanged + private static final Function WRAPPER = SkippingByteChannel::new; + /** * Asserts readability and correctness of VCF over HTTP. The VCF is indexed and requires and index. */ @@ -65,12 +87,12 @@ public void testLoadBEDFTP() throws Exception { }; } - @Test(enabled = true, dataProvider = "blockCompressedExtensionExtensionStrings") + @Test(dataProvider = "blockCompressedExtensionExtensionStrings") public void testBlockCompressionExtensionString(final String testString, final boolean expected) { Assert.assertEquals(AbstractFeatureReader.hasBlockCompressedExtension(testString), expected); } - @Test(enabled = true, dataProvider = "blockCompressedExtensionExtensionStrings") + @Test(dataProvider = "blockCompressedExtensionExtensionStrings") public void testBlockCompressionExtensionFile(final String testString, final boolean expected) { Assert.assertEquals(AbstractFeatureReader.hasBlockCompressedExtension(new File(testString)), expected); } @@ -103,10 +125,147 @@ public void testBlockCompressionExtensionFile(final String testString, final boo }; } - @Test(enabled = true, dataProvider = "blockCompressedExtensionExtensionURIStrings") + @Test(dataProvider = "blockCompressedExtensionExtensionURIStrings") public void testBlockCompressionExtension(final String testURIString, final boolean expected) throws URISyntaxException { URI testURI = URI.create(testURIString); Assert.assertEquals(AbstractFeatureReader.hasBlockCompressedExtension(testURI), expected); } + + @DataProvider(name = "vcfFileAndWrapperCombinations") + private static Object[][] vcfFileAndWrapperCombinations(){ + return new Object[][] { + {VCF, VCF_INDEX, null, null}, + {MANGLED_VCF, MANGLED_VCF_INDEX, WRAPPER, WRAPPER}, + {VCF, MANGLED_VCF_INDEX, null, WRAPPER}, + {MANGLED_VCF, VCF_INDEX, WRAPPER, null}, + {MANGLED_VCF_TABIX_BLOCK_GZIPPED, MANGLED_VCF_TABIX_INDEX, WRAPPER, WRAPPER}, + {VCF_TABIX_BLOCK_GZIPPED, MANGLED_VCF_TABIX_INDEX, null, WRAPPER}, + {MANGLED_VCF_TABIX_BLOCK_GZIPPED, VCF_TABIX_INDEX, WRAPPER, null}, + {VCF_TABIX_BLOCK_GZIPPED, VCF_TABIX_INDEX, null, null}, + }; + } + + @Test(dataProvider = "vcfFileAndWrapperCombinations") + public void testGetFeatureReaderWithPathAndWrappers(String file, String index, + Function wrapper, + Function indexWrapper) throws IOException, URISyntaxException { + try(FileSystem fs = Jimfs.newFileSystem("test", Configuration.unix()); + final AbstractFeatureReader featureReader = getFeatureReader(file, index, wrapper, + indexWrapper, + new VCFCodec(), + fs)){ + Assert.assertTrue(featureReader.hasIndex()); + Assert.assertEquals(featureReader.iterator().toList().size(), 26); + Assert.assertEquals(featureReader.query("1", 190, 210).toList().size(), 3); + Assert.assertEquals(featureReader.query("2", 190, 210).toList().size(), 1); + } + } + + @DataProvider(name = "failsWithoutWrappers") + private static Object[][] failsWithoutWrappers(){ + return new Object[][] { + {MANGLED_VCF, MANGLED_VCF_INDEX}, + {VCF, MANGLED_VCF_INDEX}, + {MANGLED_VCF, VCF_INDEX}, + {MANGLED_VCF_TABIX_BLOCK_GZIPPED, MANGLED_VCF_TABIX_INDEX}, + {VCF_TABIX_BLOCK_GZIPPED, MANGLED_VCF_TABIX_INDEX}, + {MANGLED_VCF_TABIX_BLOCK_GZIPPED, VCF_TABIX_INDEX}, + }; + } + + @Test(dataProvider = "failsWithoutWrappers", expectedExceptions = {TribbleException.class, FileTruncatedException.class}) + public void testFailureIfNoWrapper(String file, String index) throws IOException, URISyntaxException { + try(final FileSystem fs = Jimfs.newFileSystem("test", Configuration.unix()); + final FeatureReader reader = getFeatureReader(file, index, null, null, new VCFCodec(), fs)){ + // should have exploded by now + } + } + + private static AbstractFeatureReader getFeatureReader(String vcf, String index, + Function wrapper, + Function indexWrapper, + FeatureCodec codec, + FileSystem fileSystem) throws IOException, URISyntaxException { + final Path vcfInJimfs = getTribbleFileInJimfs(vcf, index, fileSystem); + return AbstractFeatureReader.getFeatureReader( + vcfInJimfs.toUri().toString(), + null, + codec, + true, + wrapper, + indexWrapper); + } + + /** + * skip the first byte of a SeekableByteChannel + */ + private static class SkippingByteChannel implements SeekableByteChannel{ + private final int toSkip; + private final SeekableByteChannel input; + + private SkippingByteChannel(SeekableByteChannel input) { + this.toSkip = 1; + try { + this.input = input; + input.position(toSkip); + } catch (final IOException e){ + throw new RuntimeException(e); + } + } + + @Override + public boolean isOpen() { + return input.isOpen(); + } + + @Override + public void close() throws IOException { + input.close(); + } + + @Override + public int read(ByteBuffer dst) throws IOException { + return input.read(dst); + } + + @Override + public int write(ByteBuffer src) throws IOException { + throw new UnsupportedOperationException("Read only"); + } + + @Override + public long position() throws IOException { + return input.position() - toSkip; + } + + @Override + public SeekableByteChannel position(long newPosition) throws IOException { + if (newPosition < 0 ){ + throw new RuntimeException("negative position not allowed"); + } + return input.position( newPosition + toSkip); + } + + @Override + public long size() throws IOException { + return input.size() - toSkip; + } + + @Override + public SeekableByteChannel truncate(long size) throws IOException { + return input.truncate(size + toSkip); + } + }; + + private static Path getTribbleFileInJimfs(String vcf, String index, FileSystem fileSystem) throws IOException, URISyntaxException { + final FileSystem fs = fileSystem; + final Path root = fs.getPath("/"); + final Path vcfPath = Paths.get(vcf); + final Path idxPath = Paths.get(index); + final Path idxDestination = Paths.get(AbstractFeatureReader.isTabix(vcf, index) ? Tribble.tabixIndexFile(vcf) : Tribble.indexFile(vcf)); + Files.copy(idxPath, root.resolve(idxDestination.getFileName().toString())); + return Files.copy(vcfPath, root.resolve(vcfPath.getFileName().toString())); + } + } diff --git a/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java b/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java index eff8939d8..eac19742a 100644 --- a/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java +++ b/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java @@ -1,5 +1,6 @@ package htsjdk.tribble; +import htsjdk.HtsjdkTest; import htsjdk.tribble.bed.BEDCodec; import htsjdk.tribble.example.ExampleBinaryCodec; import htsjdk.tribble.readers.LineIterator; @@ -13,7 +14,7 @@ import java.util.List; -public class BinaryFeaturesTest { +public class BinaryFeaturesTest extends HtsjdkTest { @DataProvider(name = "BinaryFeatureSources") public Object[][] createData1() { return new Object[][] { diff --git a/src/test/java/htsjdk/tribble/FeatureReaderTest.java b/src/test/java/htsjdk/tribble/FeatureReaderTest.java index d62693c19..f43b5b15d 100644 --- a/src/test/java/htsjdk/tribble/FeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/FeatureReaderTest.java @@ -1,5 +1,6 @@ package htsjdk.tribble; +import htsjdk.HtsjdkTest; import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.LocationAware; @@ -22,7 +23,7 @@ import java.util.List; -public class FeatureReaderTest { +public class FeatureReaderTest extends HtsjdkTest { private final static File asciiBedFile = new File(TestUtils.DATA_DIR + "test.bed"); private File binaryBedFile; private final static File tabixBedFile = new File(TestUtils.DATA_DIR + "test.tabix.bed.gz"); diff --git a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java index afdd827e6..37a5295dc 100644 --- a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java @@ -1,7 +1,7 @@ package htsjdk.tribble; +import htsjdk.HtsjdkTest; import htsjdk.tribble.readers.LineIterator; -import htsjdk.tribble.TestUtils; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFCodec; import org.testng.Assert; @@ -9,56 +9,9 @@ import org.testng.annotations.Test; import java.io.IOException; -import java.net.URISyntaxException; -import static org.testng.Assert.assertEquals; - -public class TribbleIndexFeatureReaderTest { - - @DataProvider(name = "extensionURIStrings") - public Object[][] createBlockCompressedExtensionURIs() { - return new Object[][]{ - {"testzip.gz", true}, - {"testzip.GZ", true}, - {"testzip.gZ", true}, - {"testzip.Gz", true}, - - {"test", false}, - {"test.gzip", false}, - {"test.bgz", false}, - {"test.bgzf", false}, - {"test.bzip2", false}, - - {"file://testzip.gz", true}, - {"file://apath/testzip.gz", true}, - - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gz", true}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.GZ", true}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gzip", false}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgz", false}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgzf", false}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bzip2", false}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877", false}, - - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gz?alt=media", true}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.GZ?alt=media", true}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gzip?alt=media", false}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgz?alt=media", false}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgzf?alt=media", false}, - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bzip2?alt=media", false}, - - {"ftp://ftp.broadinstitute.org/distribution/igv/TEST/cpgIslands.hg18.gz", true}, - {"ftp://ftp.broadinstitute.org/distribution/igv/TEST/cpgIslands.hg18.bed", false}, - - {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gz", true}, - }; - } - - @Test(enabled = true, dataProvider = "extensionURIStrings") - public void testGZExtension(final String testString, final boolean expected) throws URISyntaxException { - Assert.assertEquals(TribbleIndexedFeatureReader.isGZIPPath(testString), expected); - } +public class TribbleIndexFeatureReaderTest extends HtsjdkTest { @DataProvider(name = "featureFileStrings") public Object[][] createFeatureFileStrings() { @@ -74,7 +27,7 @@ public void testGZExtension(final String testString, final boolean expected) thr public void testIndexedGZIPVCF(final String testPath, final int expectedCount) throws IOException { final VCFCodec codec = new VCFCodec(); try (final TribbleIndexedFeatureReader featureReader = - new TribbleIndexedFeatureReader(testPath, codec, false)) { + new TribbleIndexedFeatureReader<>(testPath, codec, false)) { final CloseableTribbleIterator localIterator = featureReader.iterator(); int count = 0; for (final Feature feat : featureReader.iterator()) { diff --git a/src/test/java/htsjdk/tribble/TribbleTest.java b/src/test/java/htsjdk/tribble/TribbleTest.java index e8366c4b0..3874c7f71 100644 --- a/src/test/java/htsjdk/tribble/TribbleTest.java +++ b/src/test/java/htsjdk/tribble/TribbleTest.java @@ -1,5 +1,6 @@ package htsjdk.tribble; +import htsjdk.HtsjdkTest; import htsjdk.tribble.util.TabixUtils; import org.testng.Assert; import org.testng.annotations.Test; @@ -7,7 +8,7 @@ import java.io.File; -public class TribbleTest { +public class TribbleTest extends HtsjdkTest { @Test public void testStandardIndex() { diff --git a/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java b/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java index dbf23a0e5..cc0255b62 100644 --- a/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java +++ b/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java @@ -24,6 +24,7 @@ package htsjdk.tribble.bed; +import htsjdk.HtsjdkTest; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.Feature; import htsjdk.tribble.TestUtils; @@ -43,7 +44,7 @@ import java.io.IOException; import java.util.List; -public class BEDCodecTest { +public class BEDCodecTest extends HtsjdkTest { @Test public void testSimpleDecode() { diff --git a/src/test/java/htsjdk/tribble/gelitext/GeliTextTest.java b/src/test/java/htsjdk/tribble/gelitext/GeliTextTest.java deleted file mode 100644 index c670bf182..000000000 --- a/src/test/java/htsjdk/tribble/gelitext/GeliTextTest.java +++ /dev/null @@ -1,100 +0,0 @@ -package htsjdk.tribble.gelitext; - -import htsjdk.tribble.AbstractFeatureReader; -import htsjdk.tribble.FeatureReader; -import htsjdk.tribble.TestUtils; -import htsjdk.tribble.index.Index; -import htsjdk.tribble.index.IndexFactory; -import org.testng.Assert; -import org.testng.annotations.BeforeSuite; -import org.testng.annotations.Test; - -import java.io.File; -import java.io.IOException; -import java.util.Iterator; - - -/** - * @author aaron - *

    - * Class GeliTextTest - *

    - * test out the geli text source codec and feature - */ -@Deprecated -public class GeliTextTest { - public static final File testFile = new File(TestUtils.DATA_DIR + "testGeliText.txt"); - public static Index index; - private FeatureReader source; - - // setup a new source before each class - - @BeforeSuite - public void beforeTest() { - index = IndexFactory.createLinearIndex(testFile, new GeliTextCodec()); - source = AbstractFeatureReader.getFeatureReader(testFile.getAbsolutePath(), new GeliTextCodec(), index); - } - - @Test - public void testReadAllLines() { - // Query - try { - Iterator iter = source.query("22", 14438070, 14592250); - int count = 0; - while (iter.hasNext()) { - GeliTextFeature feat = iter.next(); - count++; - } - Assert.assertEquals(count, 50); - } catch (IOException e) { - Assert.fail("failed to generate iterator from feature source"); - } - } - - @Test - public void testGetSubRegion() { - // Query - try { - Iterator iter = source.query("22", 14438070, 14539060); // should be the first 41 records - int count = 0; - while (iter.hasNext()) { - GeliTextFeature feat = iter.next(); - count++; - } - Assert.assertEquals(count, 41); - } catch (IOException e) { - Assert.fail("failed to generate iterator from feature source"); - } - } - - @Test - public void testFirstRecord() { - // Query - try { - Iterator iter = source.query("22", 14438070, 14592250); - int count = 0; - - GeliTextFeature feat = iter.next(); - // check the first records contents - // 22 14438070 A 0 0 GG 33.2618 33.2618 0 0 0 0 0 0 0 33.2618 0 0 - Assert.assertTrue("22".equals(feat.getContig())); - Assert.assertEquals(feat.getStart(), 14438070); - Assert.assertEquals('A', feat.getRefBase()); - Assert.assertEquals(feat.getDepthOfCoverage(), 0.0, 0.0001); - Assert.assertEquals(feat.getMaximumMappingQual(), 0.0, 0.0001); - Assert.assertTrue(DiploidGenotype.GG.equals(feat.getGenotype())); - Assert.assertEquals(feat.getDepthOfCoverage(), 0.0, 0.0001); - Assert.assertEquals(feat.getLODBestToReference(), 33.2618, 0.0001); - Assert.assertEquals(feat.getLODBestToNext(), 33.2618, 0.0001); - for (int x = 0; x < feat.getLikelihoods().length; x++) { - if (x == DiploidGenotype.GG.ordinal()) - Assert.assertEquals(feat.getLikelihoods()[x], 33.2618, 0.0001); - else - Assert.assertEquals(feat.getLikelihoods()[x], 0, 0.0001); - } - - } catch (IOException e) { - Assert.fail("failed to generate iterator from feature source"); - } - } -} diff --git a/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java b/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java index 016049f32..964a3c3d6 100644 --- a/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java +++ b/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java @@ -23,16 +23,14 @@ */ package htsjdk.tribble.index; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.TestUtils; import htsjdk.tribble.TribbleException; import htsjdk.tribble.bed.BEDCodec; -import htsjdk.tribble.index.linear.LinearIndex; import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.index.tabix.TabixIndex; -import htsjdk.tribble.util.LittleEndianOutputStream; import htsjdk.variant.vcf.VCFCodec; import htsjdk.variant.vcf.VCFFileReader; import org.testng.Assert; @@ -40,15 +38,13 @@ import org.testng.annotations.Test; import java.io.File; -import java.io.IOException; -import java.io.OutputStream; import java.util.List; /** * User: jacob * Date: 2012-Aug-23 */ -public class IndexFactoryTest { +public class IndexFactoryTest extends HtsjdkTest { final File sortedBedFile = new File(TestUtils.DATA_DIR + "bed/Unigene.sample.bed"); final File unsortedBedFile = new File(TestUtils.DATA_DIR + "bed/unsorted.bed"); diff --git a/src/test/java/htsjdk/tribble/index/IndexTest.java b/src/test/java/htsjdk/tribble/index/IndexTest.java index aa179a9a2..d1ff18eb7 100644 --- a/src/test/java/htsjdk/tribble/index/IndexTest.java +++ b/src/test/java/htsjdk/tribble/index/IndexTest.java @@ -1,12 +1,15 @@ package htsjdk.tribble.index; +import com.google.common.jimfs.Configuration; +import com.google.common.jimfs.Jimfs; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.FeatureCodec; import htsjdk.tribble.TestUtils; import htsjdk.tribble.Tribble; import htsjdk.tribble.bed.BEDCodec; +import htsjdk.tribble.index.interval.IntervalTreeIndex; import htsjdk.tribble.index.linear.LinearIndex; -import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.index.tabix.TabixIndex; import htsjdk.tribble.util.LittleEndianOutputStream; import htsjdk.tribble.util.TabixUtils; @@ -18,11 +21,13 @@ import java.io.File; import java.io.IOException; import java.io.OutputStream; +import java.nio.file.FileSystem; +import java.nio.file.Path; import java.util.ArrayList; import java.util.List; -public class IndexTest { +public class IndexTest extends HtsjdkTest { private final static String CHR = "1"; private final static File MassiveIndexFile = new File(TestUtils.DATA_DIR + "Tb.vcf.idx"); @@ -94,4 +99,36 @@ public void testWriteIndex(final File inputFile, final IndexFactory.IndexType ty index.write(new LittleEndianOutputStream(nullOutputStrem)); } + @Test(dataProvider = "writeIndexData") + public void testWritePathIndex(final File inputFile, final IndexFactory.IndexType type, final FeatureCodec codec) throws Exception { + try (final FileSystem fs = Jimfs.newFileSystem("test", Configuration.unix())) { + // create the index + final Index index = IndexFactory.createIndex(inputFile, codec, type); + final Path path = fs.getPath(inputFile.getName() + ".index"); + // write the index to a file + index.write(path); + + // test if the index does not blow up with the path constructor + switch (type) { + case TABIX: + new TabixIndex(path); + break; + case LINEAR: + new LinearIndex(path); + break; + case INTERVAL_TREE: + new IntervalTreeIndex(path); + break; + } + } + } + + @Test(dataProvider = "writeIndexData") + public void testWriteBasedOnNonRegularFeatureFile(final File inputFile, final IndexFactory.IndexType type, final FeatureCodec codec) throws Exception { + final File tmpFolder = IOUtil.createTempDir("NonRegultarFeatureFile", null); + // create the index + final Index index = IndexFactory.createIndex(inputFile, codec, type); + // try to write based on the tmpFolder + Assert.assertThrows(IOException.class, () -> index.writeBasedOnFeatureFile(tmpFolder)); + } } diff --git a/src/test/java/htsjdk/tribble/index/interval/IntervalTreeTest.java b/src/test/java/htsjdk/tribble/index/interval/IntervalTreeTest.java index ca4708933..9a8a0a68e 100644 --- a/src/test/java/htsjdk/tribble/index/interval/IntervalTreeTest.java +++ b/src/test/java/htsjdk/tribble/index/interval/IntervalTreeTest.java @@ -18,6 +18,7 @@ package htsjdk.tribble.index.interval; +import htsjdk.HtsjdkTest; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.CloseableTribbleIterator; import htsjdk.tribble.FeatureReader; @@ -42,7 +43,7 @@ * User: jrobinso * Date: Mar 24, 2010 */ -public class IntervalTreeTest { +public class IntervalTreeTest extends HtsjdkTest { static IntervalTree tree; diff --git a/src/test/java/htsjdk/tribble/index/linear/LinearIndexTest.java b/src/test/java/htsjdk/tribble/index/linear/LinearIndexTest.java index 09f920e41..e20dc1589 100644 --- a/src/test/java/htsjdk/tribble/index/linear/LinearIndexTest.java +++ b/src/test/java/htsjdk/tribble/index/linear/LinearIndexTest.java @@ -18,6 +18,7 @@ package htsjdk.tribble.index.linear; +import htsjdk.HtsjdkTest; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.CloseableTribbleIterator; import htsjdk.tribble.FeatureReader; @@ -38,7 +39,7 @@ import java.util.List; import java.util.Set; -public class LinearIndexTest { +public class LinearIndexTest extends HtsjdkTest { private static final File RANDOM_FILE = new File("notMeaningful"); private final static Block CHR1_B1 = new Block(1, 10); diff --git a/src/test/java/htsjdk/tribble/index/tabix/TabixIndexTest.java b/src/test/java/htsjdk/tribble/index/tabix/TabixIndexTest.java index 6981b8751..0473a3d90 100644 --- a/src/test/java/htsjdk/tribble/index/tabix/TabixIndexTest.java +++ b/src/test/java/htsjdk/tribble/index/tabix/TabixIndexTest.java @@ -23,6 +23,7 @@ */ package htsjdk.tribble.index.tabix; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.BlockCompressedOutputStream; import htsjdk.tribble.index.IndexFactory; import htsjdk.tribble.util.LittleEndianOutputStream; @@ -40,7 +41,7 @@ import java.io.IOException; import java.util.Iterator; -public class TabixIndexTest { +public class TabixIndexTest extends HtsjdkTest { private static final File SMALL_TABIX_FILE = new File("src/test/resources/htsjdk/tribble/tabix/trioDup.vcf.gz.tbi"); private static final File BIGGER_TABIX_FILE = new File("src/test/resources/htsjdk/tribble/tabix/bigger.vcf.gz.tbi"); diff --git a/src/test/java/htsjdk/tribble/readers/AsciiLineReaderTest.java b/src/test/java/htsjdk/tribble/readers/AsciiLineReaderTest.java index 822f6cf6a..b0a8de371 100644 --- a/src/test/java/htsjdk/tribble/readers/AsciiLineReaderTest.java +++ b/src/test/java/htsjdk/tribble/readers/AsciiLineReaderTest.java @@ -1,10 +1,11 @@ package htsjdk.tribble.readers; +import htsjdk.HtsjdkTest; import htsjdk.tribble.TestUtils; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; +import org.testng.Assert; import org.testng.annotations.Test; +import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.InputStream; @@ -15,17 +16,7 @@ * User: jacob * Date: 2012/05/09 */ -public class AsciiLineReaderTest { - @BeforeMethod - public void setUp() throws Exception { - - } - - @AfterMethod - public void tearDown() throws Exception { - - } - +public class AsciiLineReaderTest extends HtsjdkTest { /** * Test that we read the correct number of lines * from a file @@ -49,4 +40,32 @@ public void testReadLines() throws Exception { assertEquals(expectedNumber, actualLines); } + + @Test public void voidTestLineEndingLength() throws Exception { + final String input = "Hello\nThis\rIs A Silly Test\r\nSo There"; + final InputStream is = new ByteArrayInputStream(input.getBytes()); + final AsciiLineReader in = new AsciiLineReader(is); + + Assert.assertEquals(in.getLineTerminatorLength(), -1); + Assert.assertEquals(in.readLine(), "Hello"); + Assert.assertEquals(in.getLineTerminatorLength(), 1); + Assert.assertEquals(in.readLine(), "This"); + Assert.assertEquals(in.getLineTerminatorLength(), 1); + Assert.assertEquals(in.readLine(), "Is A Silly Test"); + Assert.assertEquals(in.getLineTerminatorLength(), 2); + Assert.assertEquals(in.readLine(), "So There"); + Assert.assertEquals(in.getLineTerminatorLength(), 0); + } + + @Test public void voidTestLineEndingLengthAtEof() throws Exception { + final String input = "Hello\nWorld\r\n"; + final InputStream is = new ByteArrayInputStream(input.getBytes()); + final AsciiLineReader in = new AsciiLineReader(is); + + Assert.assertEquals(in.getLineTerminatorLength(), -1); + Assert.assertEquals(in.readLine(), "Hello"); + Assert.assertEquals(in.getLineTerminatorLength(), 1); + Assert.assertEquals(in.readLine(), "World"); + Assert.assertEquals(in.getLineTerminatorLength(), 2); + } } diff --git a/src/test/java/htsjdk/tribble/readers/LongLineBufferedReaderTest.java b/src/test/java/htsjdk/tribble/readers/LongLineBufferedReaderTest.java index 6c4c94673..3e498e17c 100644 --- a/src/test/java/htsjdk/tribble/readers/LongLineBufferedReaderTest.java +++ b/src/test/java/htsjdk/tribble/readers/LongLineBufferedReaderTest.java @@ -1,5 +1,6 @@ package htsjdk.tribble.readers; +import htsjdk.HtsjdkTest; import htsjdk.tribble.TestUtils; import org.testng.Assert; import org.testng.annotations.Test; @@ -11,7 +12,7 @@ /** * @author mccowan */ -public class LongLineBufferedReaderTest { +public class LongLineBufferedReaderTest extends HtsjdkTest { /** * Test that we read the correct number of lines diff --git a/src/test/java/htsjdk/tribble/readers/PositionalBufferedStreamTest.java b/src/test/java/htsjdk/tribble/readers/PositionalBufferedStreamTest.java index 3dd7cf38e..8d9db2a7b 100644 --- a/src/test/java/htsjdk/tribble/readers/PositionalBufferedStreamTest.java +++ b/src/test/java/htsjdk/tribble/readers/PositionalBufferedStreamTest.java @@ -1,5 +1,6 @@ package htsjdk.tribble.readers; +import htsjdk.HtsjdkTest; import htsjdk.tribble.TestUtils; import org.testng.Assert; import org.testng.annotations.AfterMethod; @@ -20,7 +21,7 @@ * User: jacob * Date: 2012/05/09 */ -public class PositionalBufferedStreamTest { +public class PositionalBufferedStreamTest extends HtsjdkTest { InputStream FileIs; long expectedBytes; diff --git a/src/test/java/htsjdk/tribble/readers/ReaderTest.java b/src/test/java/htsjdk/tribble/readers/ReaderTest.java index d700e041b..7ac1d5787 100644 --- a/src/test/java/htsjdk/tribble/readers/ReaderTest.java +++ b/src/test/java/htsjdk/tribble/readers/ReaderTest.java @@ -1,6 +1,7 @@ package htsjdk.tribble.readers; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -18,7 +19,7 @@ /** * Tests for streams and readers */ -public class ReaderTest { +public class ReaderTest extends HtsjdkTest { @BeforeClass public void setup() throws IOException { } diff --git a/src/test/java/htsjdk/tribble/readers/SynchronousLineReaderUnitTest.java b/src/test/java/htsjdk/tribble/readers/SynchronousLineReaderUnitTest.java index fbb5d188a..0c0deab41 100644 --- a/src/test/java/htsjdk/tribble/readers/SynchronousLineReaderUnitTest.java +++ b/src/test/java/htsjdk/tribble/readers/SynchronousLineReaderUnitTest.java @@ -1,5 +1,6 @@ package htsjdk.tribble.readers; +import htsjdk.HtsjdkTest; import htsjdk.tribble.TestUtils; import org.testng.Assert; import org.testng.annotations.Test; @@ -12,7 +13,7 @@ /** * @author mccowan */ -public class SynchronousLineReaderUnitTest { +public class SynchronousLineReaderUnitTest extends HtsjdkTest { @Test public void testLineReaderIterator_streamConstructor() throws Exception { final File filePath = new File(TestUtils.DATA_DIR + "gwas/smallp.gwas"); diff --git a/src/test/java/htsjdk/tribble/readers/TabixReaderTest.java b/src/test/java/htsjdk/tribble/readers/TabixReaderTest.java index d7b36dfab..3b47f417b 100644 --- a/src/test/java/htsjdk/tribble/readers/TabixReaderTest.java +++ b/src/test/java/htsjdk/tribble/readers/TabixReaderTest.java @@ -1,6 +1,7 @@ package htsjdk.tribble.readers; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.TestUtil; import htsjdk.tribble.TestUtils; import org.testng.Assert; @@ -23,7 +24,7 @@ * Time: 8:57:40 PM * To change this template use File | Settings | File Templates. */ -public class TabixReaderTest { +public class TabixReaderTest extends HtsjdkTest { static String tabixFile = TestUtils.DATA_DIR + "tabix/trioDup.vcf.gz"; static TabixReader tabixReader; diff --git a/src/test/java/htsjdk/tribble/util/ParsingUtilsTest.java b/src/test/java/htsjdk/tribble/util/ParsingUtilsTest.java index 1c3ad1f54..c974790dd 100644 --- a/src/test/java/htsjdk/tribble/util/ParsingUtilsTest.java +++ b/src/test/java/htsjdk/tribble/util/ParsingUtilsTest.java @@ -1,12 +1,17 @@ package htsjdk.tribble.util; +import com.google.common.jimfs.Configuration; +import com.google.common.jimfs.Jimfs; +import htsjdk.HtsjdkTest; +import htsjdk.samtools.util.IOUtil; import org.testng.Assert; import org.testng.annotations.Test; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; +import java.io.*; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.Arrays; import java.util.List; @@ -14,7 +19,7 @@ /** * Parsing utils tests */ -public class ParsingUtilsTest { +public class ParsingUtilsTest extends HtsjdkTest { static final String AVAILABLE_FTP_URL = "ftp://ftp.broadinstitute.org/pub/igv/TEST/test.txt"; static final String UNAVAILABLE_FTP_URL = "ftp://www.example.com/file.txt"; @@ -118,6 +123,37 @@ public void testSplitJoinEmptyFirst() { } @Test + public void testFileDoesExist() throws IOException{ + File tempFile = File.createTempFile(getClass().getSimpleName(), ".tmp"); + tempFile.deleteOnExit(); + tstExists(tempFile.getAbsolutePath(), true); + tstExists(tempFile.toURI().toString(), true); + } + + @Test + public void testFileDoesNotExist() throws IOException{ + File tempFile = File.createTempFile(getClass().getSimpleName(), ".tmp"); + tempFile.delete(); + tstExists(tempFile.getAbsolutePath(), false); + tstExists(tempFile.toURI().toString(), false); + } + + @Test + public void testInMemoryNioFileDoesExist() throws IOException{ + FileSystem fs = Jimfs.newFileSystem(Configuration.unix()); + Path file = fs.getPath("/file"); + Files.createFile(file); + tstExists(file.toUri().toString(), true); + } + + @Test + public void testInMemoryNioFileDoesNotExist() throws IOException{ + FileSystem fs = Jimfs.newFileSystem(Configuration.unix()); + Path file = fs.getPath("/file"); + tstExists(file.toUri().toString(), false); + } + + @Test public void testFTPDoesExist() throws IOException{ tstExists(AVAILABLE_FTP_URL, true); } @@ -143,6 +179,26 @@ private void tstExists(String path, boolean expectExists) throws IOException{ } @Test + public void testFileOpenInputStream() throws IOException{ + File tempFile = File.createTempFile(getClass().getSimpleName(), ".tmp"); + tempFile.deleteOnExit(); + OutputStream os = IOUtil.openFileForWriting(tempFile); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); + writer.write("hello"); + writer.close(); + tstStream(tempFile.getAbsolutePath()); + tstStream(tempFile.toURI().toString()); + } + + @Test + public void testInMemoryNioFileOpenInputStream() throws IOException{ + FileSystem fs = Jimfs.newFileSystem(Configuration.unix()); + Path file = fs.getPath("/file"); + Files.write(file, "hello".getBytes("UTF-8")); + tstStream(file.toUri().toString()); + } + + @Test public void testFTPOpenInputStream() throws IOException{ tstStream(AVAILABLE_FTP_URL); } diff --git a/src/test/java/htsjdk/tribble/util/ftp/FTPClientTest.java b/src/test/java/htsjdk/tribble/util/ftp/FTPClientTest.java index 3979b0858..6b77f913e 100644 --- a/src/test/java/htsjdk/tribble/util/ftp/FTPClientTest.java +++ b/src/test/java/htsjdk/tribble/util/ftp/FTPClientTest.java @@ -1,5 +1,6 @@ package htsjdk.tribble.util.ftp; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.ftp.FTPClient; import htsjdk.samtools.util.ftp.FTPReply; import org.testng.Assert; @@ -15,7 +16,7 @@ * @author Jim Robinson * @since 10/3/11 */ -public class FTPClientTest { +public class FTPClientTest extends HtsjdkTest { static String host = "ftp.broadinstitute.org"; static String file = "/pub/igv/TEST/test.txt"; diff --git a/src/test/java/htsjdk/tribble/util/ftp/FTPUtilsTest.java b/src/test/java/htsjdk/tribble/util/ftp/FTPUtilsTest.java index a5f3b0e58..87000ee14 100644 --- a/src/test/java/htsjdk/tribble/util/ftp/FTPUtilsTest.java +++ b/src/test/java/htsjdk/tribble/util/ftp/FTPUtilsTest.java @@ -1,5 +1,6 @@ package htsjdk.tribble.util.ftp; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.ftp.FTPUtils; import org.testng.annotations.Test; @@ -12,7 +13,7 @@ * @author Jim Robinson * @since 10/4/11 */ -public class FTPUtilsTest { +public class FTPUtilsTest extends HtsjdkTest { @Test public void testResourceAvailable() throws Exception { diff --git a/src/test/java/htsjdk/tribble/util/popgen/HardyWeinbergCalculationTest.java b/src/test/java/htsjdk/tribble/util/popgen/HardyWeinbergCalculationTest.java index fcf1bea0b..d2b54555c 100644 --- a/src/test/java/htsjdk/tribble/util/popgen/HardyWeinbergCalculationTest.java +++ b/src/test/java/htsjdk/tribble/util/popgen/HardyWeinbergCalculationTest.java @@ -1,5 +1,6 @@ package htsjdk.tribble.util.popgen; +import htsjdk.HtsjdkTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -7,7 +8,7 @@ /** * Created by farjoun on 7/18/14. */ -public class HardyWeinbergCalculationTest { +public class HardyWeinbergCalculationTest extends HtsjdkTest { @DataProvider public Object[][] testHwCalculateData() { diff --git a/src/test/java/htsjdk/variant/PrintVariantsExampleTest.java b/src/test/java/htsjdk/variant/PrintVariantsExampleTest.java index c82f2dbf3..9f273a94d 100644 --- a/src/test/java/htsjdk/variant/PrintVariantsExampleTest.java +++ b/src/test/java/htsjdk/variant/PrintVariantsExampleTest.java @@ -25,20 +25,19 @@ package htsjdk.variant; +import htsjdk.HtsjdkTest; import htsjdk.samtools.util.IOUtil; import htsjdk.variant.example.PrintVariantsExample; import org.testng.Assert; -import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.List; -import java.util.OptionalInt; import java.util.stream.IntStream; -public class PrintVariantsExampleTest { +public class PrintVariantsExampleTest extends HtsjdkTest { @Test public void testExampleWriteFile() throws IOException { final File tempFile = File.createTempFile("example", ".vcf"); diff --git a/src/test/java/htsjdk/variant/VariantBaseTest.java b/src/test/java/htsjdk/variant/VariantBaseTest.java index 87345a054..7a3417b52 100644 --- a/src/test/java/htsjdk/variant/VariantBaseTest.java +++ b/src/test/java/htsjdk/variant/VariantBaseTest.java @@ -25,6 +25,7 @@ package htsjdk.variant; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.variant.variantcontext.Genotype; @@ -44,7 +45,7 @@ /** * Base class for test classes within org.broadinstitute.variant */ -public class VariantBaseTest { +public class VariantBaseTest extends HtsjdkTest { public static final String variantTestDataRoot = new File("src/test/resources/htsjdk/variant/").getAbsolutePath() + "/"; diff --git a/src/test/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractorTest.java b/src/test/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractorTest.java index 9fb13e802..af3241112 100644 --- a/src/test/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractorTest.java +++ b/src/test/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractorTest.java @@ -23,6 +23,7 @@ */ package htsjdk.variant.utils; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.SequenceUtil; import org.testng.annotations.DataProvider; @@ -34,7 +35,7 @@ /** * @author farjoun on 4/9/14. */ -public class SAMSequenceDictionaryExtractorTest { +public class SAMSequenceDictionaryExtractorTest extends HtsjdkTest { String path = "src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/"; @DataProvider(name = "testExtractDictionaries") diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java index 613dec57a..b8476592e 100644 --- a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java +++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java @@ -25,6 +25,7 @@ package htsjdk.variant.variantcontext; +import htsjdk.HtsjdkTest; import htsjdk.tribble.FeatureCodec; import htsjdk.tribble.FeatureCodecHeader; import htsjdk.tribble.Tribble; @@ -69,7 +70,7 @@ * @author Your Name * @since Date created */ -public class VariantContextTestProvider { +public class VariantContextTestProvider extends HtsjdkTest { final private static boolean ENABLE_GENOTYPE_TESTS = true; final private static boolean ENABLE_A_AND_G_TESTS = true; final private static boolean ENABLE_VARARRAY_TESTS = true; @@ -1011,4 +1012,4 @@ public static void main( String argv[] ) { throw new RuntimeException(e); } } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java index 14056f833..3d6851598 100644 --- a/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java @@ -178,8 +178,8 @@ public void testMultipleSNPAlleleOrdering() { final List allelesUnnaturalOrder = Arrays.asList(Aref, T, C); VariantContext naturalVC = snpBuilder.alleles(allelesNaturalOrder).make(); VariantContext unnaturalVC = snpBuilder.alleles(allelesUnnaturalOrder).make(); - Assert.assertEquals(new ArrayList(naturalVC.getAlleles()), allelesNaturalOrder); - Assert.assertEquals(new ArrayList(unnaturalVC.getAlleles()), allelesUnnaturalOrder); + Assert.assertEquals(new ArrayList<>(naturalVC.getAlleles()), allelesNaturalOrder); + Assert.assertEquals(new ArrayList<>(unnaturalVC.getAlleles()), allelesUnnaturalOrder); } @Test @@ -371,7 +371,7 @@ public void testBadConstructorArgs3() { @Test (expectedExceptions = Throwable.class) public void testBadConstructorArgs4() { - new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Collections.emptyList()).make(); + new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Collections.emptyList()).make(); } @Test (expectedExceptions = Exception.class) @@ -528,7 +528,7 @@ public void testFilters() { Assert.assertTrue(vc.filtersWereApplied()); Assert.assertNotNull(vc.getFiltersMaybeNull()); - Set filters = new HashSet(Arrays.asList("BAD_SNP_BAD!", "REALLY_BAD_SNP", "CHRIST_THIS_IS_TERRIBLE")); + Set filters = new HashSet<>(Arrays.asList("BAD_SNP_BAD!", "REALLY_BAD_SNP", "CHRIST_THIS_IS_TERRIBLE")); vc = new VariantContextBuilder(vc).filters(filters).make(); Assert.assertFalse(vc.isNotFiltered()); @@ -570,12 +570,16 @@ public void testVCFfromGenotypes() { Genotype g5 = GenotypeBuilder.create("AC", Arrays.asList(Aref, C)); VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make(); - VariantContext vc12 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true); - VariantContext vc1 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName())), true); - VariantContext vc23 = vc.subContextFromSamples(new HashSet(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true); - VariantContext vc4 = vc.subContextFromSamples(new HashSet(Arrays.asList(g4.getSampleName())), true); - VariantContext vc14 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true); - VariantContext vc125 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName(), g2.getSampleName(), g5.getSampleName())), true); + VariantContext vc12 = vc.subContextFromSamples( + new HashSet<>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true); + VariantContext vc1 = vc.subContextFromSamples(new HashSet<>(Arrays.asList(g1.getSampleName())), true); + VariantContext vc23 = vc.subContextFromSamples( + new HashSet<>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true); + VariantContext vc4 = vc.subContextFromSamples(new HashSet<>(Arrays.asList(g4.getSampleName())), true); + VariantContext vc14 = vc.subContextFromSamples( + new HashSet<>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true); + VariantContext vc125 = vc.subContextFromSamples( + new HashSet<>(Arrays.asList(g1.getSampleName(), g2.getSampleName(), g5.getSampleName())), true); Assert.assertTrue(vc12.isPolymorphicInSamples()); Assert.assertTrue(vc23.isPolymorphicInSamples()); @@ -676,7 +680,7 @@ public String toString() { @DataProvider(name = "getAlleles") public Object[][] mergeAllelesData() { - List tests = new ArrayList(); + List tests = new ArrayList<>(); tests.add(new Object[]{new GetAllelesTest("A*", Aref)}); tests.add(new Object[]{new GetAllelesTest("A*/C", Aref, C)}); @@ -747,7 +751,7 @@ public String toString() { VariantContext sites = new VariantContextBuilder("sites", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).make(); VariantContext genotypes = new VariantContextBuilder(sites).source("genotypes").genotypes(g1, g2, g3).make(); - List tests = new ArrayList(); + List tests = new ArrayList<>(); tests.add(new Object[]{new SitesAndGenotypesVC("sites", sites)}); tests.add(new Object[]{new SitesAndGenotypesVC("genotypes", genotypes)}); @@ -822,7 +826,7 @@ public void runModifyVCTests(SitesAndGenotypesVC cfg) { boolean updateAlleles; private SubContextTest(Collection samples, boolean updateAlleles) { - this.samples = new HashSet(samples); + this.samples = new HashSet<>(samples); this.updateAlleles = updateAlleles; } @@ -833,10 +837,10 @@ public String toString() { @DataProvider(name = "SubContextTest") public Object[][] MakeSubContextTest() { - List tests = new ArrayList(); + List tests = new ArrayList<>(); for ( boolean updateAlleles : Arrays.asList(true, false)) { - tests.add(new Object[]{new SubContextTest(Collections.emptySet(), updateAlleles)}); + tests.add(new Object[]{new SubContextTest(Collections.emptySet(), updateAlleles)}); tests.add(new Object[]{new SubContextTest(Collections.singleton("MISSING"), updateAlleles)}); tests.add(new Object[]{new SubContextTest(Collections.singleton("AA"), updateAlleles)}); tests.add(new Object[]{new SubContextTest(Collections.singleton("AT"), updateAlleles)}); @@ -871,7 +875,7 @@ public void runSubContextTest(SubContextTest cfg) { Assert.assertEquals(sub.getID(), vc.getID()); Assert.assertEquals(sub.getAttributes(), vc.getAttributes()); - Set expectedGenotypes = new HashSet(); + Set expectedGenotypes = new HashSet<>(); if ( cfg.samples.contains(g1.getSampleName()) ) expectedGenotypes.add(g1); if ( cfg.samples.contains(g2.getSampleName()) ) expectedGenotypes.add(g2); if ( cfg.samples.contains(g3.getSampleName()) ) expectedGenotypes.add(g3); @@ -881,10 +885,10 @@ public void runSubContextTest(SubContextTest cfg) { // these values depend on the results of sub if ( cfg.updateAlleles ) { // do the work to see what alleles should be here, and which not - List expectedAlleles = new ArrayList(); + List expectedAlleles = new ArrayList<>(); expectedAlleles.add(Aref); - Set genotypeAlleles = new HashSet(); + Set genotypeAlleles = new HashSet<>(); for ( final Genotype g : expectedGC ) genotypeAlleles.addAll(g.getAlleles()); genotypeAlleles.remove(Aref); @@ -925,7 +929,7 @@ public String toString() { @DataProvider(name = "SampleNamesTest") public Object[][] MakeSampleNamesTest() { - List tests = new ArrayList(); + List tests = new ArrayList<>(); tests.add(new Object[]{new SampleNamesTest(Arrays.asList("1"), Arrays.asList("1"))}); tests.add(new Object[]{new SampleNamesTest(Arrays.asList("2", "1"), Arrays.asList("1", "2"))}); @@ -959,7 +963,7 @@ public void runSampleNamesTest(SampleNamesTest cfg) { VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make(); // same sample names => success - Assert.assertTrue(vc.getSampleNames().equals(new HashSet(cfg.sampleNames)), "vc.getSampleNames() = " + vc.getSampleNames()); + Assert.assertTrue(vc.getSampleNames().equals(new HashSet<>(cfg.sampleNames)), "vc.getSampleNames() = " + vc.getSampleNames()); Assert.assertEquals(vc.getSampleNamesOrderedByName(), cfg.sampleNamesInOrder, "vc.getSampleNamesOrderedByName() = " + vc.getSampleNamesOrderedByName()); assertGenotypesAreInOrder(vc.getGenotypesOrderedByName(), cfg.sampleNamesInOrder); @@ -1147,7 +1151,7 @@ private VariantContext createTestVariantContextRsIds(final String rsId) { fullyDecoded, toValidate); } private Set makeRsIDsSet(final String... rsIds) { - return new HashSet(Arrays.asList(rsIds)); + return new HashSet<>(Arrays.asList(rsIds)); } @@ -1226,14 +1230,14 @@ private VariantContext createValidateAlternateAllelesContext(final List /** AN : total number of alleles in called genotypes **/ // with AN set and hom-ref, we expect AN to be 2 for Aref/Aref - final Map attributesAN = new HashMap(); + final Map attributesAN = new HashMap<>(); attributesAN.put(VCFConstants.ALLELE_NUMBER_KEY, "2"); final VariantContext vcANSet = createValidateChromosomeCountsContext(Arrays.asList(Aref), attributesAN, homRef); // with AN set, one no-call (no-calls get ignored by getCalledChrCount() in VariantContext) // we expect AN to be 1 for Aref/no-call - final Map attributesANNoCall = new HashMap(); + final Map attributesANNoCall = new HashMap<>(); attributesANNoCall.put(VCFConstants.ALLELE_NUMBER_KEY, "1"); final VariantContext vcANSetNoCall = createValidateChromosomeCountsContext(Arrays.asList(Aref), attributesANNoCall, homRefNoCall); @@ -1241,42 +1245,42 @@ private VariantContext createValidateAlternateAllelesContext(final List /** AC : allele count in genotypes, for each ALT allele, in the same order as listed **/ // with AC set, and T/T, we expect AC to be 2 (for 2 counts of ALT T) - final Map attributesAC = new HashMap(); + final Map attributesAC = new HashMap<>(); attributesAC.put(VCFConstants.ALLELE_COUNT_KEY, "2"); final VariantContext vcACSet = createValidateChromosomeCountsContext(Arrays.asList(Aref, T), attributesAC, homVarT); // with AC set and no ALT (GT is 0/0), we expect AC count to be 0 - final Map attributesACNoAlts = new HashMap(); + final Map attributesACNoAlts = new HashMap<>(); attributesACNoAlts.put(VCFConstants.ALLELE_COUNT_KEY, "0"); final VariantContext vcACSetNoAlts = createValidateChromosomeCountsContext(Arrays.asList(Aref), attributesACNoAlts, homRef); // with AC set, and two different ALTs (T and C), with GT of 1/2, we expect a count of 1 for each. // With two ALTs, a list is expected, so we set the attribute as a list of 1,1 - final Map attributesACTwoAlts = new HashMap(); + final Map attributesACTwoAlts = new HashMap<>(); attributesACTwoAlts.put(VCFConstants.ALLELE_COUNT_KEY, Arrays.asList("1", "1")); final VariantContext vcACSetTwoAlts = createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACTwoAlts, hetVarTC); // with AC set, and two different ALTs (T and C), with no GT, we expect a 2 count values. - final Map attributesACNoGtTwoAlts = new HashMap(); + final Map attributesACNoGtTwoAlts = new HashMap<>(); attributesACNoGtTwoAlts.put(VCFConstants.ALLELE_COUNT_KEY, Arrays.asList("1", "1")); final VariantContext vcACNoGtSetTwoAlts = - createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACNoGtTwoAlts, null); + createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACNoGtTwoAlts, (Genotype[]) null); // with AF set, and two different ALTs (T and C), with GT of 1/2, we expect two frequncy values. // With two ALTs, a list is expected, so we set the attribute as a list of 0.5,0.5 - final Map attributesAFTwoAlts = new HashMap(); + final Map attributesAFTwoAlts = new HashMap<>(); attributesAFTwoAlts.put(VCFConstants.ALLELE_FREQUENCY_KEY, Arrays.asList("0.5", "0.5")); final VariantContext vcAFSetTwoAlts = createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesAFTwoAlts, hetVarTC); // with AF set, and two different ALTs (T and C), with no GT, we expect two frequency values. - final Map attributesAFNoGtTwoAlts = new HashMap(); + final Map attributesAFNoGtTwoAlts = new HashMap<>(); attributesAFNoGtTwoAlts.put(VCFConstants.ALLELE_FREQUENCY_KEY, Arrays.asList("0.5", "0.5")); final VariantContext vcAFNoGtSetTwoAlts = - createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesAFNoGtTwoAlts, null); + createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesAFNoGtTwoAlts, (Genotype[]) null); return new Object[][]{ {vcNoGenotypes}, @@ -1284,6 +1288,7 @@ private VariantContext createValidateAlternateAllelesContext(final List {vcANSetNoCall}, {vcACSet}, {vcACSetNoAlts}, + {vcACSetTwoAlts}, {vcACNoGtSetTwoAlts}, {vcAFSetTwoAlts}, {vcAFNoGtSetTwoAlts} @@ -1303,60 +1308,60 @@ public void testValidateChromosomeCounts(final VariantContext vc) { /** AN : total number of alleles in called genotypes **/ // with AN set and hom-ref, we expect AN to be 2 for Aref/Aref, so 3 will fail - final Map attributesAN = new HashMap(); + final Map attributesAN = new HashMap<>(); attributesAN.put(VCFConstants.ALLELE_NUMBER_KEY, "3"); final VariantContext vcANSet = createValidateChromosomeCountsContext(Arrays.asList(Aref), attributesAN, homRef); // with AN set, one no-call (no-calls get ignored by getCalledChrCount() in VariantContext) // we expect AN to be 1 for Aref/no-call, so 2 will fail - final Map attributesANNoCall = new HashMap(); + final Map attributesANNoCall = new HashMap<>(); attributesANNoCall.put(VCFConstants.ALLELE_NUMBER_KEY, "2"); final VariantContext vcANSetNoCall = createValidateChromosomeCountsContext(Arrays.asList(Aref), attributesANNoCall, homRefNoCall); /** AC : allele count in genotypes, for each ALT allele, in the same order as listed **/ // with AC set but no ALTs, we expect a count of 0, so the wrong count will fail here - final Map attributesACWrongCount = new HashMap(); + final Map attributesACWrongCount = new HashMap<>(); attributesACWrongCount.put(VCFConstants.ALLELE_COUNT_KEY, "2"); final VariantContext vcACWrongCount = createValidateChromosomeCountsContext(Arrays.asList(Aref), attributesACWrongCount, homRef); // with AC set, two ALTs, but AC is not a list with count for each ALT - final Map attributesACTwoAlts = new HashMap(); + final Map attributesACTwoAlts = new HashMap<>(); attributesACTwoAlts.put(VCFConstants.ALLELE_COUNT_KEY, "1"); final VariantContext vcACSetTwoAlts = createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACTwoAlts, hetVarTC); // with AC set, two ALTs, and a list is correctly used, but wrong counts (we expect counts to be 1,1) - final Map attributesACTwoAltsWrongCount = new HashMap(); + final Map attributesACTwoAltsWrongCount = new HashMap<>(); attributesACTwoAltsWrongCount.put(VCFConstants.ALLELE_COUNT_KEY, Arrays.asList("1", "2")); final VariantContext vcACSetTwoAltsWrongCount = createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACTwoAltsWrongCount, hetVarTC); // with AC set, two ALTs, but only count for one ALT (we expect two items in the list: 1,1) - final Map attributesACTwoAltsOneAltCount = new HashMap(); + final Map attributesACTwoAltsOneAltCount = new HashMap<>(); attributesACTwoAltsOneAltCount.put(VCFConstants.ALLELE_COUNT_KEY, Arrays.asList("1")); final VariantContext vcACSetTwoAltsOneAltCount = createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACTwoAltsOneAltCount, hetVarTC); // with AC set, no GT, two ALTs, but only count for one ALT (we expect two items in the list: 1,1) - final Map attributesACNoGtTwoAltsOneAltCount = new HashMap(); + final Map attributesACNoGtTwoAltsOneAltCount = new HashMap<>(); attributesACNoGtTwoAltsOneAltCount.put(VCFConstants.ALLELE_COUNT_KEY, Arrays.asList("1")); final VariantContext vcACNoGtSetTwoAltsOneAltCount = - createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACNoGtTwoAltsOneAltCount, null); + createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACNoGtTwoAltsOneAltCount, (Genotype[])null); // with AF set, two ALTs, but only frequency for one ALT (we expect two items in the list - final Map attributesAFTwoAltsWrongFreq = new HashMap(); + final Map attributesAFTwoAltsWrongFreq = new HashMap<>(); attributesAFTwoAltsWrongFreq.put(VCFConstants.ALLELE_FREQUENCY_KEY, Arrays.asList("0.5")); final VariantContext vcAFSetTwoAltsWrongFreq = createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesAFTwoAltsWrongFreq, hetVarTC); // with AF set, no GT, two ALTs, but only frequency for one ALT (we expect two items in the list - final Map attributesAFNoGtTwoAltsWrongCount = new HashMap(); + final Map attributesAFNoGtTwoAltsWrongCount = new HashMap<>(); attributesAFNoGtTwoAltsWrongCount.put(VCFConstants.ALLELE_FREQUENCY_KEY, Arrays.asList("0.5")); final VariantContext vcAFNoGtSetTwoAltsWrongFreq = - createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesAFNoGtTwoAltsWrongCount, null); + createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesAFNoGtTwoAltsWrongCount, (Genotype[])null); return new Object[][]{ {vcANSet}, diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java index bebd39384..78bf565a7 100644 --- a/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java @@ -31,14 +31,10 @@ import htsjdk.variant.vcf.VCFConstants; import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; +import java.util.*; /** @@ -55,6 +51,10 @@ private static final VariantContextUtils.JexlVCMatchExp exp = new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.get().createExpression("QUAL > 500.0")); + private static final JexlVCMatchExp missingValueExpression = new VariantContextUtils.JexlVCMatchExp( + "Zis10", VariantContextUtils.engine.get().createExpression("Z==10")); + + // SNP alleles: A[ref]/T[alt] at chr1:10. One (crappy) sample, one (bare minimum) VC. private static final SimpleFeature eventLoc = new SimpleFeature("chr1", 10, 10); private static final Allele Aref = Allele.create("A", true); @@ -87,7 +87,45 @@ public void testGetValue() { // eval our known expression Assert.assertTrue(!jexlMap.get(exp)); } - + + @Test(dataProvider = "getMissingValueTestData") + public void testMissingBehaviorThroughMatch(VariantContext vc, JexlMissingValueTreatment missingValueTreatment, boolean expected, Class expectedException){ + if(expectedException == null) { + Assert.assertEquals(VariantContextUtils.match(vc, null, missingValueExpression, missingValueTreatment), expected); + } else { + Assert.assertThrows(expectedException, () -> VariantContextUtils.match(vc, null, missingValueExpression, missingValueTreatment)); + } + } + + @Test(dataProvider = "getMissingValueTestData") + public void testMissingBehavior(VariantContext vc, JexlMissingValueTreatment missingValueTreatment, boolean expected, Class expectedException){ + final JEXLMap jexlMap = new JEXLMap(Collections.singletonList(missingValueExpression), vc, null, missingValueTreatment); + if(expectedException == null) { + Assert.assertEquals((boolean) jexlMap.get(missingValueExpression), expected); + } else { + Assert.assertThrows(expectedException, () -> jexlMap.get(missingValueExpression)); + } + } + + @DataProvider + public Object[][] getMissingValueTestData(){ + final List alleles = Arrays.asList(Aref, Talt); + VariantContextBuilder vcb = new VariantContextBuilder("test", "chr1", 10, 10, alleles); + VariantContext noZ = vcb.make(); + VariantContext hasZ = vcb.attribute("Z", 0).make(); + + return new Object[][]{ + {noZ, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT, false, null}, + {hasZ, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT, false, null}, //the value isn't missing but the expression is false + {noZ, JexlMissingValueTreatment.TREAT_AS_MATCH, true, null}, + {hasZ, JexlMissingValueTreatment.TREAT_AS_MATCH, false, null}, //the value isn't missing but the expression is false + {noZ, JexlMissingValueTreatment.TREAT_AS_MISMATCH, false, null}, + {hasZ, JexlMissingValueTreatment.TREAT_AS_MISMATCH, false, null}, + {noZ, JexlMissingValueTreatment.THROW, false, IllegalArgumentException.class}, + {hasZ, JexlMissingValueTreatment.THROW, false, null} + }; + } + // Testing the new 'FT' and 'isPassFT' expressions in the JEXL map @Test public void testJEXLGenotypeFilters() { diff --git a/src/test/java/htsjdk/variant/variantcontext/filter/CompoundFilterTest.java b/src/test/java/htsjdk/variant/variantcontext/filter/CompoundFilterTest.java index 0a4985373..efa788efb 100644 --- a/src/test/java/htsjdk/variant/variantcontext/filter/CompoundFilterTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/filter/CompoundFilterTest.java @@ -1,5 +1,6 @@ package htsjdk.variant.variantcontext.filter; +import htsjdk.HtsjdkTest; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; @@ -15,7 +16,7 @@ /** * Created by farjoun on 9/9/15. */ -public class CompoundFilterTest { +public class CompoundFilterTest extends HtsjdkTest { static AllPassFilter pass = new AllPassFilter(); static AllFailFilter fail = new AllFailFilter(); @@ -75,4 +76,4 @@ public void testCompoundFilter(final VariantContextFilter filter, final boolean shouldPass) { Assert.assertEquals(filter.test(vc), shouldPass, filter.toString()); } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/variant/variantcontext/filter/FilteringVariantContextIteratorTest.java b/src/test/java/htsjdk/variant/variantcontext/filter/FilteringVariantContextIteratorTest.java index d8decfdd9..eeb221378 100644 --- a/src/test/java/htsjdk/variant/variantcontext/filter/FilteringVariantContextIteratorTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/filter/FilteringVariantContextIteratorTest.java @@ -24,6 +24,7 @@ package htsjdk.variant.variantcontext.filter; +import htsjdk.HtsjdkTest; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFFileReader; import org.testng.Assert; @@ -36,7 +37,7 @@ * Tests for testing the (VariantContext)FilteringVariantContextIterator, and the HeterozygosityFilter */ -public class FilteringVariantContextIteratorTest { +public class FilteringVariantContextIteratorTest extends HtsjdkTest { final File testDir = new File("src/test/resources/htsjdk/variant"); @DataProvider diff --git a/src/test/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilterTest.java b/src/test/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilterTest.java index 809133ff3..a615f8140 100644 --- a/src/test/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilterTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilterTest.java @@ -23,6 +23,7 @@ */ package htsjdk.variant.variantcontext.filter; +import htsjdk.HtsjdkTest; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.GenotypeBuilder; import htsjdk.variant.variantcontext.VariantContext; @@ -37,7 +38,7 @@ import java.util.Iterator; import java.util.List; -public class GenotypeQualityFilterTest { +public class GenotypeQualityFilterTest extends HtsjdkTest { Allele refA = Allele.create("A", true); Allele G = Allele.create("G", false); diff --git a/src/test/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java b/src/test/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java index b4cd3a84f..e2e988184 100644 --- a/src/test/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java @@ -23,6 +23,7 @@ */ package htsjdk.variant.variantcontext.filter; +import htsjdk.HtsjdkTest; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.GenotypeBuilder; import htsjdk.variant.variantcontext.VariantContext; @@ -37,7 +38,7 @@ import java.util.Iterator; import java.util.List; -public class HeterozygosityFilterTest { +public class HeterozygosityFilterTest extends HtsjdkTest { Allele refA = Allele.create("A", true); Allele G = Allele.create("G", false); diff --git a/src/test/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java b/src/test/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java index 3993b792f..7fb98c33b 100644 --- a/src/test/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java @@ -23,6 +23,7 @@ */ package htsjdk.variant.variantcontext.filter; +import htsjdk.HtsjdkTest; import htsjdk.variant.vcf.VCFFileReader; import org.testng.Assert; @@ -36,7 +37,7 @@ * @author Pierre Lindenbaum PhD Institut du Thorax - INSERM - Nantes - France */ -public class JavascriptVariantFilterTest { +public class JavascriptVariantFilterTest extends HtsjdkTest { final File testDir = new File("src/test/resources/htsjdk/variant"); @DataProvider diff --git a/src/test/java/htsjdk/variant/variantcontext/filter/PassingVariantFilterTest.java b/src/test/java/htsjdk/variant/variantcontext/filter/PassingVariantFilterTest.java index 3cbb60ca3..da2826495 100644 --- a/src/test/java/htsjdk/variant/variantcontext/filter/PassingVariantFilterTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/filter/PassingVariantFilterTest.java @@ -1,5 +1,6 @@ package htsjdk.variant.variantcontext.filter; +import htsjdk.HtsjdkTest; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; @@ -16,7 +17,7 @@ /** * Created by farjoun on 9/10/15. */ -public class PassingVariantFilterTest { +public class PassingVariantFilterTest extends HtsjdkTest { Allele refA = Allele.create("A", true); Allele G = Allele.create("G", false); @@ -43,4 +44,4 @@ public void testPassingVariantFilter(final VariantContext vc, final boolean shou Assert.assertEquals(passingVariantFilter.test(vc), shouldPass, vc.toString()); } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/variant/variantcontext/filter/SnpFilterTest.java b/src/test/java/htsjdk/variant/variantcontext/filter/SnpFilterTest.java index 74f1bb5de..e091ca0b6 100644 --- a/src/test/java/htsjdk/variant/variantcontext/filter/SnpFilterTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/filter/SnpFilterTest.java @@ -1,5 +1,6 @@ package htsjdk.variant.variantcontext.filter; +import htsjdk.HtsjdkTest; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; @@ -16,7 +17,7 @@ /** * Created by farjoun on 9/9/15. */ -public class SnpFilterTest { +public class SnpFilterTest extends HtsjdkTest { Allele refA = Allele.create("A", true); Allele refAG = Allele.create("AG", true); diff --git a/src/test/java/htsjdk/variant/variantcontext/writer/TabixOnTheFlyIndexCreationTest.java b/src/test/java/htsjdk/variant/variantcontext/writer/TabixOnTheFlyIndexCreationTest.java index 2fd1520ba..f8c8fd193 100644 --- a/src/test/java/htsjdk/variant/variantcontext/writer/TabixOnTheFlyIndexCreationTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/writer/TabixOnTheFlyIndexCreationTest.java @@ -23,6 +23,7 @@ */ package htsjdk.variant.variantcontext.writer; +import htsjdk.HtsjdkTest; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.CloseableTribbleIterator; import htsjdk.tribble.FeatureReader; @@ -36,7 +37,7 @@ import java.io.File; import java.util.EnumSet; -public class TabixOnTheFlyIndexCreationTest { +public class TabixOnTheFlyIndexCreationTest extends HtsjdkTest { private static final File SMALL_VCF = new File("src/test/resources/htsjdk/tribble/tabix/trioDup.vcf.gz"); @Test public void simpleTest() throws Exception { diff --git a/src/test/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java index 179c4cb2f..5e33e5c44 100644 --- a/src/test/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java @@ -396,4 +396,12 @@ public void testModifyOption() { Assert.assertFalse(builder.isOptionSet(option)); // has been unset } } + + @Test + public void testStdOut() { + final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile("/dev/stdout").clearOptions().build(); + OutputStream s = ((VCFWriter) writer).getOutputStream(); + Assert.assertNotNull(((VCFWriter) writer).getOutputStream()); + Assert.assertNotEquals(((VCFWriter) writer).getStreamName(), IndexingVariantContextWriter.DEFAULT_READER_NAME); + } } diff --git a/src/test/java/htsjdk/variant/vcf/VCFEncoderTest.java b/src/test/java/htsjdk/variant/vcf/VCFEncoderTest.java index 2c4ff0f08..6d4c23b9d 100644 --- a/src/test/java/htsjdk/variant/vcf/VCFEncoderTest.java +++ b/src/test/java/htsjdk/variant/vcf/VCFEncoderTest.java @@ -1,5 +1,6 @@ package htsjdk.variant.vcf; +import htsjdk.HtsjdkTest; import htsjdk.tribble.util.ParsingUtils; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.GenotypeBuilder; @@ -18,7 +19,7 @@ import java.util.Set; import java.util.TreeSet; -public class VCFEncoderTest { +public class VCFEncoderTest extends HtsjdkTest { @DataProvider(name = "VCFWriterDoubleFormatTestData") public Object[][] makeVCFWriterDoubleFormatTestData() { diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java index e9135cc72..89d07f48a 100644 --- a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java +++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java @@ -37,6 +37,7 @@ import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -120,10 +121,18 @@ public void testVCFHeaderSampleRenamingSingleSampleVCF() throws Exception { } } - @Test - public void testVCFHeaderDictionaryMerging() { - VCFHeader headerOne = new VCFFileReader(new File(variantTestDataRoot + "dbsnp_135.b37.1000.vcf"), false).getFileHeader(); - VCFHeader headerTwo = new VCFHeader(headerOne); // deep copy + @DataProvider + public Object[][] testVCFHeaderDictionaryMergingData() { + return new Object[][]{ + {"diagnosis_targets_testfile.vcf"}, // numerically ordered contigs + {"dbsnp_135.b37.1000.vcf"} // lexicographically ordered contigs + }; + } + + @Test(dataProvider = "testVCFHeaderDictionaryMergingData") + public void testVCFHeaderDictionaryMerging(final String vcfFileName) { + final VCFHeader headerOne = new VCFFileReader(new File(variantTestDataRoot + vcfFileName), false).getFileHeader(); + final VCFHeader headerTwo = new VCFHeader(headerOne); // deep copy final List sampleList = new ArrayList(); sampleList.addAll(headerOne.getSampleNamesInOrder()); diff --git a/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam b/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam index 82efe858e..335d8159c 100644 --- a/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam +++ b/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam @@ -7,4 +7,4 @@ read1 0 chr1 1 0 16M * 0 0 AcGtAcGTaCGtAcGt AAAAAAAAAAAAAAAA NM:i:0 read2 0 chr1 1 0 16M * 0 0 AcGtAcGTaCGtAcGt AAAAAAAAAAAAAAAA NM:i:0 read3 0 chr2 1 0 16M * 0 0 AcGtAcGTaCGtAcGt AAAAAAAAAAAAAAAA NM:i:8 MD:Z:0T2A0T2A0t2a0t2a0 read4 0 chr2 1 0 8M * 0 0 TCGATCGA AAAAAAAA NM:i:0 -read5 0 chr2 1 0 4M1D2M1S * 0 0 TCGACGAA AAAAAAAA NM:i:1 MD:Z:4^T2 +read5 0 chr2 1 0 4M1D2M2S * 0 0 TCGACGAA AAAAAAAA NM:i:1 MD:Z:4^T2 diff --git a/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.cram b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.cram new file mode 100644 index 000000000..57c58dfd0 Binary files /dev/null and b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.cram differ diff --git a/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa new file mode 100644 index 000000000..7c2ec2a88 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa @@ -0,0 +1,71 @@ +>17 17:1-4200 +AAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAA +TGTGCTCTGGGGTCTCTGGGGTCTCACCCACGACCAACTCCCTGGGCCTGGCACCAGGGA +GCTTAACAAACATCTGTCCAGCGAATACCTGCATCCCTAGAAGTGAAGCCACCGCCCAAA +GACACGCCCATGTCCAGCTTAACCTGCATCCCTAGAAGTGAAGGCACCGCCCAAAGACAC +GCCCATGTCCAGCTTATTCTGCCCAGTTCCTCTCCAGAAAGGCTGCATGGTTGACACACA +GTGCCTGCGACAAAGCTGAATGCTATCATTTAAAAACTCCTTGCTGGTTTGAGAGGCAGA +AAATGATATCTCATAGTTGCTTTACTTTGCATATTTTAAAATTGTGACTTTCATGGCATA +AATAATACTGGTTTATTACAGAAGCACTAGAAAATGCATGTGGACAAAAGTTGGGATTAG +GAGAGAGAAATGAAGACATATGTCCACACAAAAACCTGTTCATTGCAGCTTTCTACCATC +ACCAAAAATTGCAAACAACCACACGCCCTTCAACTGGGGAACTCATCAACAACAAACTTG +TGGTTTACCCACACAATGGAAGACCACTTAGCAACAAAAAGGACCAAACTCCTGGTACAT +GCAACTGACAGATGAATCTCAAACGCATTCCTCCGTGTGAAAGAAGCCGGACTCACAGGG +CAACACACTATCTGACTGTTTCATGGGAAAGTCTGGAAACGGCAACACCATTGAGACAGA +AAACAGGTGAGTGGTTGCCTGGGGCCAGGGAACTTTCTGGGGTCATATTCTCTGTGTTGA +TTCTGGTGGTGGAAACAAGACTGTCCCAGCCTGGGTGATACAGCGAGACCCCATCTCTAC +CAAAAAATTAAAAATTAGCTGGGCATGGTGGTGCATGCCTGTAGTCCCAGCTATTCACAG +TGCTGAGGTGGGAAGATGCTTGAGCCCAGGAGTTCAAGGCTGCAATGAGCTATGATTGCG +CCACTGCACTTTGGCCTGGACAACAGAGCAAAACCCTGTCTCTAAAAAAAGAAAAGAAAA +GAAAAACTCACTGGATATGAATGATACAGGTTGAGGATCCATTATCTGAAATGCTTGGAC +CAGATGTTTTGAATTTTGGATTTTTTCATATTTTGTAATCTTTGCAGTATATTTACCAGT +TCAGCATCCCTAACTCAAAAATTCAAAAATCTGAAATCCCAAACGCGCCAATAAGCATTC +CCTTTGAGCGTCATGTCGGTGCTTGGAATGTTTGGGGTTTTGGATTTACAGCTTTGGGAC +GCTCAACCTGTACCTCAATAAACCTGATTTTAAAAAAGTTTGGGGGGATTCCCCTAAGCC +CGCCACCCGGAGACAGCGGATTTCCTTAGTTACTTACTATGCTCCTTGGCCATTTCTCTA +GGTATTGGTATATTGTGTCTGCTGTGAACTGTCCTTGGCCTGTTTGGTGACGGGTGAGGA +GCAGGGACAGAAGGGTCCTGCGTGCCCTGCCTTCACAAGCCCCTGGAAGGAAAGTTGTTT +TGGGATCTCTGCACCCTCAGCCTGGACAACTTGTGCCCATCTGGTGACCCCTCACTCAGC +CACCAGACTTCCACGACAGGCTCCAGCCTCGGCACCTTCAGCCATGGACAGTTCCGCCAG +CGTTGCCCTCTGTTCTGCTGTTTTCTCTACCAGAAGTGCCCTTCCCTCCTCACCTGACCA +CTCTGGGGAAATCCCTCAGCACCCTCCCTGAGCATACCCTACTCTGGCACAAGCCCACCC +TGCAAAGCCCCTGAGGCCCGCCCTGTGGCGTCTCTCCCTCCCTTGCTGTCAGGACAGTGG +TCCTGGCCACCGGGGCTCACGGAGCCGCCCTGTGCCGTGTACCTCTGAGCCCTCTGCACA +GTGCCTTCTGCTTGCCTGTGGCTTTGAGAAGAAACCCCTTCTGGTTATACATAAGACAGC +CAGAGAAGGGAGTTGCCCAGGGTGGCACAGCACGTTGCTGCCAGTTACTGCCATTTTCAC +GGGCATGAAATGGAGATAACAACAGGAGCGACCGCACAGGCTGCTGAGCGCGTCACACGC +AGCCATCGCGCAGCTCAGGGATATTACGTGTAACTCGACATGTCAGCGATTGTCACAGGC +ACTGCTACTCCTGGGGTTTTCCATCAAACCCTCAAGAGCTGGGCCTGGGGTCAACTTCCG +GCCTGGGGAAACTGGGGCAAGTATCACCAGAGATGAGCTTTATAAAAATAATGGTGCTAG +CTGGGCATGGTGGCTTGCACCTGTAATCCCAGCACTTTGGGAGGCCGAGCTAGGAGGATC +GTTTGAGTCCAGCAGTTTGAGACCAGCCTGGCCAATACGGCAAAACCCAGTCTCTACAAA +AAATACAAAAAACAACTAGCCAGGCGTGGTGGTGCACACCTGTAGTCCCAGCTACTCAGG +AGGCTGAGGGGGAAGGACTGCTTGAGCCCAGGAGTTTGAGGCTGCTGTGAGCTGTGATCG +CATCACTGCATTCCAGCCCGGTGACAGAGTGAGTCACTGTCTCAAAAAAGAAAGGAAGAA +ATAAAGAAAACAAATAAAAATAATAGTGCAGACAAAAGGCCTTGACCCATCTAGCTTTGG +CCCTCAGCATCAACCGCTAGATACGTCCCTCCCTTTCTTCTGGGGCACAGGTCACACTCT +CTTCCAGGTCTAGGATGCAGCTGAGGGGTGCCCCTCTTACCATCTAATCTGTGCCCTTAT +TTCCTCTGCTTTAGTGAGGAAGAGGCCCCTGGTCCATGAAGGGGCCTTTCAGAGACGGGG +ACCCCTGAGGAGCCCCGAGCAGCAGCCGTCGTGTCTCACCCAGGGTGTCTGAAACAGATG +TGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCT +TTGAAAGGCCACGTGACCTGGCCCACGGCTGGCAGGTGGGACCCAGCTGCAGGGGTCCAG +CAGCACCCACAGCAGCCACCTGTGGCAGGGAGGAGCTTGTGGTACAGTGGACAGGCCCTG +CCCAGATGGCCCCCCGCCTGCCTGTGGAAGTTGACCAGACCATCTGTCACAGCAGGTAAG +ACTCTGCTTTCTGGGCAACCCAGCAGGTGACCCTGGAATTCCTGTCCATCTGGCAGGTGG +GCATTGAAACTGGTTTAAAAATGTCACACCATAGGCCGGGCACAGTGGCTCACGCCTGTA +ATCCCAGCCCTTTGGGAGGCCAGGGTGGGTGGATCACTTGAGGTCAGGAGTTCAAGACCA +GCCTGGCCAACATGGTGAAACCCCGTCTACTAAAAATACAAAAATTAGCCTGGCGTGGTG +GCGCATGCCTGTAATCCCAGCTACTTGGGAAGCTGAGGGATGAGAACTGCTTGAACCTGG +GAGGCAGACGTTGCAGTGAGCTGAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGT +AAGACTCTGTCTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCC +TGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAGAAAAAACATATAT +ATACGCAAACCAGTATCCTACTGTGTGTGTCGTTTGTTGTGTTTTCGACAGCTGTCCGTG +TTATAATAATTCCTCTAGTTCAAATTTATTCATTTTTAACTTCATAGTACCACATTCTAC +ACACTGCCCATGTCCCCTCAAGCTTCCCCTGGCTCCTGCAACCACAAATCTACTCTCTGC +CTCTGTGGGTTGACCTATTCTGGACACGTCATAGAAATAGAGTCCTGCAACACGTGGCCG +TCTGTGTCTGGCTTCTCTCGCTTAGCATCTTGTTTCCAAGGTCCTCCCACAGTGTAGCAT +GCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAT +GGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACACACCCGCT +ACACTCCTTCTTAGGGCTGATATTCCACGCACCCGCTACACTCCTTCTTAGGGCTGATAT +TCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTT +CTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCAC diff --git a/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa.fai b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa.fai new file mode 100644 index 000000000..c2112667e --- /dev/null +++ b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa.fai @@ -0,0 +1 @@ +17 4200 14 60 61 diff --git a/src/test/resources/htsjdk/samtools/ValidateSamFileTest/seq_qual_len_mismatch.sam b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/seq_qual_len_mismatch.sam new file mode 100644 index 000000000..3c689b135 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/seq_qual_len_mismatch.sam @@ -0,0 +1,21 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr2 LN:101 +@SQ SN:chr3 LN:101 +@SQ SN:chr4 LN:101 +@SQ SN:chr5 LN:101 +@SQ SN:chr6 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:Hi,Mom! LB:my-library PL:ILLUMINA +@RG ID:1 SM:Hi,Mom! LB:my-library PL:ILLUMINA +@RG ID:2 SM:Hi,Mom! LB:my-library PL:Illumina +@PG ID:1 PN:Hey! VN:2.0 +both_reads_align_clip_marked 1107 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/ RG:Z:0 PG:Z:1 NM:i:0 MQ:i:255 XT:Z:foo OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 +both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/ RG:Z:1 PG:Z:1 NM:i:3 MQ:i:255 XT:Z:foo OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 +read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/ RG:Z:2 PG:Z:1 NM:i:8 MQ:i:255 XT:Z:foo2 OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 +both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/ RG:Z:1 PG:Z:1 NM:i:1 MQ:i:255 XT:Z:foo2 OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 +both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/ RG:Z:1 PG:Z:1 NM:i:1 MQ:i:255 XT:Z:foo2 OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 +both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1' RG:Z:0 PG:Z:1 NM:i:5 MQ:i:255 OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 +read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1' RG:Z:2 PG:Z:1 NM:i:6 MQ:i:255 OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 +both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1' RG:Z:1 PG:Z:1 diff --git a/src/test/resources/htsjdk/samtools/cram/CEUTrio.HiSeq.WGS.b37.NA12878.20.first.8000.bam b/src/test/resources/htsjdk/samtools/cram/CEUTrio.HiSeq.WGS.b37.NA12878.20.first.8000.bam new file mode 100644 index 000000000..9df4f69dd Binary files /dev/null and b/src/test/resources/htsjdk/samtools/cram/CEUTrio.HiSeq.WGS.b37.NA12878.20.first.8000.bam differ diff --git a/src/test/resources/htsjdk/samtools/cram/amb#amb.2.1.cram b/src/test/resources/htsjdk/samtools/cram/amb#amb.2.1.cram new file mode 100644 index 000000000..6b19d5cd3 Binary files /dev/null and b/src/test/resources/htsjdk/samtools/cram/amb#amb.2.1.cram differ diff --git a/src/test/resources/htsjdk/samtools/cram/amb#amb.3.0.cram b/src/test/resources/htsjdk/samtools/cram/amb#amb.3.0.cram new file mode 100644 index 000000000..e683dc858 Binary files /dev/null and b/src/test/resources/htsjdk/samtools/cram/amb#amb.3.0.cram differ diff --git a/src/test/resources/htsjdk/samtools/cram/amb#amb.sam b/src/test/resources/htsjdk/samtools/cram/amb#amb.sam new file mode 100644 index 000000000..0640c90d2 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/amb#amb.sam @@ -0,0 +1,57 @@ +@HD VN:1.4 GO:none SO:coordinate +@SQ SN:iupac LN:31 M5:f88a72084e90c68cc7aa569bbf257e70 +@RG ID:ID SM:foo +read_A 0 iupac 1 86 30M * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ****************************** +read_B 0 iupac 1 86 30M * 0 0 BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB ****************************** +read_C 0 iupac 1 86 30M * 0 0 CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ****************************** +read_D 0 iupac 1 86 30M * 0 0 DDDDDDDDDDDDDDDDDDDDDDDDDDDDDD ****************************** +read_E 4 iupac 1 86 30M * 0 0 EEEEEEEEEEEEEEEEEEEEEEEEEEEEEE ****************************** +read_F 0 iupac 1 86 30M * 0 0 FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF ****************************** +read_G 0 iupac 1 86 30M * 0 0 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGG ****************************** +read_H 0 iupac 1 86 30M * 0 0 HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH ****************************** +read_I 0 iupac 1 86 30M * 0 0 IIIIIIIIIIIIIIIIIIIIIIIIIIIIII ****************************** +read_J 0 iupac 1 86 30M * 0 0 JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ ****************************** +read_K 0 iupac 1 86 30M * 0 0 KKKKKKKKKKKKKKKKKKKKKKKKKKKKKK ****************************** +read_L 0 iupac 1 86 30M * 0 0 LLLLLLLLLLLLLLLLLLLLLLLLLLLLLL ****************************** +read_M 0 iupac 1 86 30M * 0 0 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM ****************************** +read_N 0 iupac 1 86 30M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN ****************************** +read_O 0 iupac 1 86 30M * 0 0 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOO ****************************** +read_P 0 iupac 1 86 30M * 0 0 PPPPPPPPPPPPPPPPPPPPPPPPPPPPPP ****************************** +read_Q 0 iupac 1 86 30M * 0 0 QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ ****************************** +read_R 0 iupac 1 86 30M * 0 0 RRRRRRRRRRRRRRRRRRRRRRRRRRRRRR ****************************** +read_S 0 iupac 1 86 30M * 0 0 SSSSSSSSSSSSSSSSSSSSSSSSSSSSSS ****************************** +read_T 0 iupac 1 86 30M * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTT ****************************** +read_U 0 iupac 1 86 30M * 0 0 UUUUUUUUUUUUUUUUUUUUUUUUUUUUUU ****************************** +read_V 0 iupac 1 86 30M * 0 0 VVVVVVVVVVVVVVVVVVVVVVVVVVVVVV ****************************** +read_W 0 iupac 1 86 30M * 0 0 WWWWWWWWWWWWWWWWWWWWWWWWWWWWWW ****************************** +read_X 0 iupac 1 86 30M * 0 0 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ****************************** +read_Y 0 iupac 1 86 30M * 0 0 YYYYYYYYYYYYYYYYYYYYYYYYYYYYYY ****************************** +read_Z 0 iupac 1 86 30M * 0 0 ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ ****************************** +read_a 0 iupac 1 86 30M * 0 0 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ****************************** +read_b 0 iupac 1 86 30M * 0 0 bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb ****************************** +read_c 0 iupac 1 86 30M * 0 0 cccccccccccccccccccccccccccccc ****************************** +read_d 0 iupac 1 86 30M * 0 0 dddddddddddddddddddddddddddddd ****************************** +read_e 0 iupac 1 86 30M * 0 0 eeeeeeeeeeeeeeeeeeeeeeeeeeeeee ****************************** +read_f 0 iupac 1 86 30M * 0 0 ffffffffffffffffffffffffffffff ****************************** +read_g 0 iupac 1 86 30M * 0 0 gggggggggggggggggggggggggggggg ****************************** +read_h 0 iupac 1 86 30M * 0 0 hhhhhhhhhhhhhhhhhhhhhhhhhhhhhh ****************************** +read_i 0 iupac 1 86 30M * 0 0 iiiiiiiiiiiiiiiiiiiiiiiiiiiiii ****************************** +read_j 0 iupac 1 86 30M * 0 0 jjjjjjjjjjjjjjjjjjjjjjjjjjjjjj ****************************** +read_k 0 iupac 1 86 30M * 0 0 kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk ****************************** +read_l 0 iupac 1 86 30M * 0 0 llllllllllllllllllllllllllllll ****************************** +read_m 0 iupac 1 86 30M * 0 0 mmmmmmmmmmmmmmmmmmmmmmmmmmmmmm ****************************** +read_n 0 iupac 1 86 30M * 0 0 nnnnnnnnnnnnnnnnnnnnnnnnnnnnnn ****************************** +read_o 0 iupac 1 86 30M * 0 0 oooooooooooooooooooooooooooooo ****************************** +read_p 0 iupac 1 86 30M * 0 0 pppppppppppppppppppppppppppppp ****************************** +read_q 0 iupac 1 86 30M * 0 0 qqqqqqqqqqqqqqqqqqqqqqqqqqqqqq ****************************** +read_r 0 iupac 1 86 30M * 0 0 rrrrrrrrrrrrrrrrrrrrrrrrrrrrrr ****************************** +read_s 0 iupac 1 86 30M * 0 0 ssssssssssssssssssssssssssssss ****************************** +read_t 0 iupac 1 86 30M * 0 0 tttttttttttttttttttttttttttttt ****************************** +read_u 0 iupac 1 86 30M * 0 0 uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu ****************************** +read_v 0 iupac 1 86 30M * 0 0 vvvvvvvvvvvvvvvvvvvvvvvvvvvvvv ****************************** +read_w 0 iupac 1 86 30M * 0 0 wwwwwwwwwwwwwwwwwwwwwwwwwwwwww ****************************** +read_x 0 iupac 1 86 30M * 0 0 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ****************************** +read_y 0 iupac 1 86 30M * 0 0 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyy ****************************** +read_z 0 iupac 1 86 30M * 0 0 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz ****************************** +read_dot 0 iupac 1 86 30M * 0 0 .............................. ****************************** +read_equals 0 iupac 1 86 30M * 0 0 ============================== ****************************** diff --git a/src/test/resources/htsjdk/samtools/cram/amb.fa b/src/test/resources/htsjdk/samtools/cram/amb.fa new file mode 100644 index 000000000..040dd1c27 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/amb.fa @@ -0,0 +1,2 @@ +>iupac 31 857152f076709b2c6067edcbaaba65c7 +.aAbBcCdDgGhHkKmMnNrRsStTvVwWyY diff --git a/src/test/resources/htsjdk/samtools/cram/amb.fa.fai b/src/test/resources/htsjdk/samtools/cram/amb.fa.fai new file mode 100644 index 000000000..89701fd82 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/amb.fa.fai @@ -0,0 +1 @@ +iupac 31 49 31 32 diff --git a/src/test/resources/htsjdk/samtools/cram/ambiguityCodes.fasta b/src/test/resources/htsjdk/samtools/cram/ambiguityCodes.fasta new file mode 100644 index 000000000..430b59a1d --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/ambiguityCodes.fasta @@ -0,0 +1,2 @@ +>Sheila +GCTAGCRM.gactAAAAAAAAAA diff --git a/src/test/resources/htsjdk/samtools/cram/ambiguityCodes.fasta.fai b/src/test/resources/htsjdk/samtools/cram/ambiguityCodes.fasta.fai new file mode 100644 index 000000000..d35aa7ed6 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/ambiguityCodes.fasta.fai @@ -0,0 +1 @@ +Sheila 23 8 23 24 diff --git a/src/test/resources/htsjdk/samtools/cram/fieldarith.sam b/src/test/resources/htsjdk/samtools/cram/fieldarith.sam deleted file mode 100644 index 180d1e8ff..000000000 --- a/src/test/resources/htsjdk/samtools/cram/fieldarith.sam +++ /dev/null @@ -1,15 +0,0 @@ -@SQ SN:one LN:1000 -@SQ SN:two LN:500 -@CO For each SAM record that has each listed aux field, performs these tests: -@CO XQ is the expected result for bam_cigar2qlen() -@CO XR is the expected result for bam_cigar2rlen() -@CO XE is the expected result for bam_endpos() -@CO (Note that these are all zero-based, while POS is one-based in SAM) -r1 0 one 50 20 8M * 0 0 ATGCATGC qqqqqqqq XQ:i:8 XR:i:8 XE:i:57 -r2 0 one 100 20 50M * 0 0 ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq XQ:i:50 XR:i:50 XE:i:149 -unmapped 5 two 200 0 * two 200 0 ATGCATGC qqqqqqqq XQ:i:0 XR:i:0 XE:i:200 -hascigar 5 two 200 0 6M2S two 200 0 ATGCATGC qqqqqqqq XQ:i:8 XR:i:6 XE:i:200 -s1 0 one 300 20 2M * 0 0 AT qq XQ:i:2 XR:i:2 XE:i:301 -su1 4 * 0 0 * * 0 0 AT qq XQ:i:0 XR:i:0 XE:i:0 -su2 5 two 400 0 * two 400 0 AT qq XQ:i:0 XR:i:0 XE:i:400 -su3 4 one 500 0 2M * 0 0 AT qq XQ:i:2 XR:i:2 XE:i:500 diff --git a/src/test/resources/htsjdk/samtools/cram/human_g1k_v37.20.subset.dict b/src/test/resources/htsjdk/samtools/cram/human_g1k_v37.20.subset.dict new file mode 100644 index 000000000..cfab32082 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/human_g1k_v37.20.subset.dict @@ -0,0 +1,2 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:20 LN:9000 M5:46ad963a88a95089707c4639ad196126 UR:file:///Users/cmn/projects/cram/final/hum.20.fasta diff --git a/src/test/resources/htsjdk/samtools/cram/human_g1k_v37.20.subset.fasta b/src/test/resources/htsjdk/samtools/cram/human_g1k_v37.20.subset.fasta new file mode 100644 index 000000000..27ed99408 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/human_g1k_v37.20.subset.fasta @@ -0,0 +1,151 @@ +>20 +TTGTCCCTGTATCATTTACTGAAAAGACTCTTCTTTTCCCCATTAGATGGTCTTGAAACG +TTGTTAAAATTCAACTGACCATAGGTGTATTGGTTTATTTCTGTACTCTTAGTAGATTCC +ATTGACCTATATCTCTATCCTTATGCCAGTACCACAGTTTTGTTTACTACAGCTTTGTAG +TAAATTTTGAACTCTAAAGTGTTAGTTCTCTAACTTTGTTTGTTTTTCAAGAGTGTTTTG +ACTCTTCTTACTGCATCCCTTGCATTTCCATATGGACTTTATAATCAGCCATGTCAACTT +CTGCAAGAAAGACAGCTAGGATTTTGATAAGGATTGTGTTGAATCTGTAGTCCAATTTTG +GGAATACTACCATGTTAACATCGTCTTCCCATCCATGCACGTGCAATAGCTTACCATTTA +TTTGGTCTTCCTCAATTTCTGTCAACAATGATTTGTAGTTTTCAGTTGCAAGTCTTGCAC +TTCTTTTGTTAAACTTTTTCCAAATATTTATTCTTCTTAATTACATAATTCTCATAATTA +ATAAAATTCTGAAATTTTCTTAATTTCATTTTTGTGGCCATGCACTTTAAAACTTGCCTT +TTAACAAGACTTCCAGATGATTCTTGTGAACATTAAGTTTGTGAAGTGCTTCTCTATTGA +CAAACTGAACTCACGTGACATTCCACACAGCACTGGCAAATTCTGTCCCGTAACTCCGCT +AGCTCTCCAACAAGAAGTGACTTGACGCAGCCCAAGGTTACTACTTAACACAATGAATTA +AATGTTTTTAAATAAGAGGAAGCAATAAGATTCTAAAGGCTTTTCTGTTTTAATTTTCAT +GCAATGGAAAACTGGTATTAAATATCTATTTAATTAGGAGGAAACTACAATGCTGACTTT +TGTCTGAATTATGTAGATAAGTGATTCATTTGAAACAATTATTTTGATAATTGTCAATTA +TCCATTTCATTTTAATGCATTTTTTATTCTTTTTTCAAAAATAGCAACAATTACAACAGT +TAAACCTTATAATGAATATGTTTCCTAAACCCTGTTCTACTTTCTGGTTCCAGATCTGAC +ACCAATTACCTTTCTGATTTTGGACAAACCACTTAATATTTGTAACTTACAATTACTTCA +ACTGAATAATAAAAGAATTGGACTAGATTTCTCCAACATCTCTCTCTTTTGGCTTTATGT +TAGATAATGCTAAATTTTCATCATATCCAAACATGCTATATAATTTTATGAACTGTTACA +GAGTCAGACATAAGCAATATAAAGTATGATTCTGAATAAAGGCTGTGAAGTCTCGTTTTT +CATACATAATTACAGGAACCGATCAAATTCAATAAAGCATTTATAGTCATGACAATATAT +TCTCTTAACTTGCAATGTGGTTTTAGGAAACAATGAAACAAATTAAACAAATGCATGATT +CCTTAAATTTTGTTGACTCGGGATTTAGTTACATATAAGACTCTTTTCCCCCAGCCAGAT +TAACCCTGCTCTGTATATGTAATACGCATCTTCCCCCCAAATTTCACAATAATAATTTTA +TGAAAATCAGTATGTAAGTTGCTGCATTGGCATGAAAGGCTACTAATGTGATATCTTTCA +AAGATAAAGTGAACGTTTTTGTAAAAAGCCACCACACTGGAGGCATTTAACAATATTCAT +CTTCCTATGGCATTACTACCCTAGATGTACTTTGCAATATTAATCAAGCCTTGTCTTACT +TCAGGCTTCCTAGAAATAGAGCACAAGGCAGGGATTCTTTCTTGCTCGGTGATTTATTGA +GGAAGTGCTCTCAGAAGAAATTGGTAAGATGCTAAGGAGAACAGCAAAGGACAGGAGAAG +GGGGCTGAGCAGAGATGGGGATCTAACTGGAATCTGGCCTTTGCTGATTACCAGAGCAGC +TCTGGGTGCGAATGGTGAAGTGCTGTTCTATTGACAAACTCCACTCAAGTGACATTCCAT +GCAGCACTGCCAAATTCTGTCCCATAACTCTGCTAGCTCTTCAACAAGAAGTAACTTGAG +GCAGCACAGTTACAGAAAACAGCACAAGAACCAAGGATACTAAACAGTGACTTAAATGTT +TTTAGATAAGAGGAAGCAATGAGATACTATAGGGTTTTAATTTCTGTGCAATGGAAAACT +AGTATTAAATATTTATTAATTAGGAGGAAACGACAATGCTGACTTCTGTCTGCATTATAG +ACAAAATTGAGTTCTCCAACCATGAGGCAAGGTGCTGGCTTTTTGTACCCCTGCACTATT +CATTTGATGGGTGAGTGCACTGTAACTTCCAGACGTCTCTTGGTAGGTGGGCCCCACTGG +CCAAGGGCATTTCCCGGGAGAGTGAACGGCTGTGAGCTACTAGCAGTAGCAGTCACAGCA +GCTGGGAGGAAACACACTGGTCAGGTAAAGGGCCAGCATCTACCAAGGTCCAGTTCCACA +ATTAGTGGAGAAATATTTAATAACATTATTTTTGAAAATAATTAGGCCATATGACTTTGA +CACTTTTTCTCTAGCAAAGTGACTAAAGAGAAGCAGGTTTTTAACTGGGTTTTTATTTCT +GTTGTGTCTCTAGCCAAAGCCATGCTGATGTTTGATAGTTTTTTTTTTTCTTTTTTTAAG +AGATGGGGTTTCACCATGTTGGCCATGCTGGTCGTGAACTCCTGACCTCAATGGCCTCCC +AAACTGTTGGGATTACAGACGTGAGCCACCACGCCCAGCCAGAATTTTTTTTCCTAAAGA +ATAGAACAACATTATAAAATTTTAGGCATTAAGGACAAATTTTATTTTATGATTTTCATT +TCGGTGAGTCACAAGATATTCAACACAAAATGAAACTTCACAAAATTTCACAAAATGAAA +AATTCATTTCAGATTCAATATCCACCTCCATCATCCATATTCATTCTTCCAATGTCTCAG +GCCAGAAATTTGGAGTATGTGGCTTCTCCACCTCACACAATTTGCCCTAACTTTAATATA +TACTCAGAAATTACTGGCTTTTCACTGTTTCTATGATTCCCATGTGTAATATACAATACT +CACCATGCATACAATAATACAATAATTCTGTGTCACAACCACACCTAAATTGGTAAGTTT +ATAAGGTTATAAGCTGAGAGGTTTTGCTGATCTTGGCTGAGCTCAGCTGGGCAGGTCTTC +CGGTCTTGGCTGGGGTTCACTGACACACAAGCAGCTGACAGTTGGCTGATCTAGGATGGC +CTCAGCTGGGATGACAGGCTGTTTCCTCACCTTCCAGCAGGCAAGCCAGTCCCAAGAAAG +AGAAGGGTGAAACATGGAGGCCATTAATTGAGCCCATCGCATGAAACGCATCTGTGACAC +CATCACCGTCTGATATCAACTCTCACCTGGATGTTTGCAATCGCCTAACTTTTCTCCTTT +CATTCACTATGCTGCCTTACAAACCTATTCTCCACAAATCAGCTAGAGCAAACCTTTTAA +ATCCTAAGTAGAATGCTACCATTCCTCTGCTCAAACTACTGGAGAGGAGATGCCACAGTC +TTTACTATGGTCTTCAAGACCCTATGGGAGGTAGCCCTGTCTTACAACCATAGCCTCCTA +CCACTGCCCCAACATGCACACTGAGTTCCGGCCACAGCATCTGCTTCCTCTTTATTGATC +TGCCAAGGACATGAATGACTCAGAATCTCAAACTTATTCTTCCCTCAGAATCCACTGAGC +CTACTCCCTCTTTTTCTCAAATGGCAACTTCTCAGAGATGCCTTCTCTGGCTAACGTATA +TGTACTAAAACCTCCATCCAGCACTCTATGTCATCCTTACTATGGTTTATTTTTCTTCAT +AGCACCTATCAATTGGTGAAGTATTAAATATGCATTTTTGTTTGTATGCATCTCTCTCCC +CGCACCAGTATGTGAACTCCACCAGACTGGAGAGCGTGTTTATTTTGTTCACTGCTGTAA +CTCCAGTGTCTAGAACAGTGCCTGGCACACAGTAGGTGTTTAATAATGATTTGTTAAGCT +AAGCCAATGAATAAATATTCTTTTGCCCATGAGATGATGTTAAAAATTTTTCAATTATTC +ATAACTGCCTGTAAGGAATGAATGTTAGTGAATTACTGTGATACCAATGAAAGTTAAATG +ATGCAATGAAGATTGGGTGGATCACAAGGTCAGGAGTTTGAGACCATCCTGGCCAATATG +GTGAAACCCCGTCTCTACTAAAAATACCAAAATTAGCTGGGCATCGTGGCAGGACCCTGT +AGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAACCCAGGAGGCGGAGGTT +GCAGTGAGCCGAGATCACGCCACTGCACTCCAGCCTGGCCGACAGAGTGAGATTCTGTCT +CCAAAAAAAAAAAAAAAAAAGATGTAAATATTATTATTGTTTGCCATCACCCTTATGTGC +TGTCTTGGCTTTATGTCGTGCTTAGATCTTTTCCAGGTGCAATTTGAATTGATGCAACTA +ATGGAAACTACAACAGAAGTTCATATTGCCCTATTGTATTACACTATTTGACTCATCTTT +CAGTATCACCAGGTGCATAAGGAAAATTTCAAAAGTAAATCAAGAGAAAGAAATATGATC +GCAGCTTAACACAATGTAAATTTATTATTTGTACTTTTTGTCTAAATGGTTTGCCTAAAA +GACTGAAAGACATTTTATATTAGTTAGAATACTTGAGGATAATAACATAAAAACTTTCCT +TTCCAACTTGTTTATAAAAGGAAATCTTCACTGTTTTGAACATCAGTTATTTTAAACTTT +TAAGTTGTTAGCACAGCAAAAGCAACAAAATTCTAAGTGCAGTAATCACTTTACTGCGTG +GTCATATGAAATCAAGGCAATGTTATGAGTATTACTGGAAAGCTGGACAGAGTAACGGGA +AAAGTGACTAAAACTATGCAAAACTATGCAAAACTAAGCAGATTGTGTCTCTAGAGTATT +TCCCATCTCAAGTTTAGTTATTTACTAATTTGGCAACATCTGACCTATCTTTAATTGTGA +GAAAATAAACAAACACATAAGCCAACTCTCAGAATATGGTTATACATAGGTGTAGCCTAT +GACTTTGAATGTATTTGTTTGAATAGCGTAAAACAAAATAAAAATAAAATCTTGTTACAG +TGCAAGAAACGGCAGTCATCAAACTAAGATGAGGCAAGTGTCATGAAGTATGAAAATATG +GTACCTGAATTCTATTTATTAGAAAGTCTTCACTGAGCTGAGCATGTTTTTTTTAACAAA +TTCAATTACTGATTTGAATATTTATTATACTTAATTATTGCAGCCATGAAAAGAGGTGCT +GGCTGAGGCTGCATTTAATAAAAACATTTAATCAGCTTGAGGTTAGTAAACCATTTAATT +TGTTTTTTCATGAAGATTTAACTTCTAGAATAATTTCATTTATGTATTTTTAGGTATAGC +CCTAGATTCTGGTCTACATAGTATACAAATCATTTTAGAATGACACTAGGTTATTTCAAC +TGCTTTTCTACAGAAGTGTTAAATAAGGGAGTAAAGTGTTGGCTTTTTCCATAATTGAAA +TAAATGCACAATGAGCAGTAACATCCTGATTTCACTGCTATTTTGTTTAATCAACATAAT +GATGTAGATTTACTCTGTATATATATGGAAGAGTGAAAGAAGGTTGGGAAGGAATAACTA +TCAATTAATATAGGTGATATAGTAGTTATTTTTGCAAATCAACTATAATTTCTGAATGGA +TATTCAGACCATATTTACATTACATAGAAGAGGCACACACCAAAAGATTTAACAAATGTG +CCAAATATTGGTGAATATTTAGTTAGGTACCAAAAGGATGTTGTATAAATTAGGATGCTT +TCAACCATAAGAGACTCATCTCAAAAATGGCTTGAAAATGTGGGGAATTTTTATCTCAGT +GTGAAGTTAAAGGTAGGGCAAGTCCAAATCAATTCAAGTTAATTAAAGCCCCAAGTTCTT +GGAATTTGCTCTGCTAGTCTTAGCTGGTTAGCCTTTGTCCTGAAGTTTGTAACTTCATGA +CCAGAAGATGATCGCAATATTTTCTAAGTATAAAGCCTGAAGGTGTAAACCAGGTAGTCT +CAGCAAAACCAATGATGCATGGTCACCTTTCCCTTAGTTGACAAATACTTGCCTTCCAGA +TACTTTGAATCAGAATGGGCATTTTAACTAAGATCCAGTGCAACTAAAGGAATAATCAAA +CAAATGATAAAATAATTATTCTGAGCTGAAAACACAAGTCTGAATATTGGAAGAGGTTCC +TGATTTCCAGGCAGAATAGATAAGCAAAGATATCAACCTAAACACATCCTGGTTATAGTC +TAAAATTTAGAGAATAAATGGGAAAAGATTATGAACTTTAAGGAAGAAATAACAACTTAC +ACATAAAACAAAAAGGAAAAACTATCTGTTGACAGATTTATCACCTGCTAGATAAGAGCA +GAATAACCATTCACTAGGAGAAAATGGGAGAAGCTAGAAGATACTGGAGTAATATTTATA +GAGTACTGAAGAAAAAACAAAAAACAGAAATTCAATACACAGCTAAGATATCATTTACCT +GTAAATGATACATGGTAAAATAAAGGGTAAAATAAGGATATTTGCAGATTCACAAGGAGA +AGTTAGCCTTCACATACTCAGCTGAGGAAAATCCTAGAGACAAAACTCTAACAAAAACAA +ACTAGCCGGGTGCAGTGGTGCAGGCCTGTAGTTCCAGCTACTCAAGAGGCTGAAGTAAGA +GGATCACTGGAGCCCAGGGGTTCTGGGCTGTACTGTGCTATGCTGATCAGGTGTCCACAC +TTAGCTCGCCATCAATATGATGACCTCCCAGGTGGGAAATGGAGCAGGTGAAAACTCCCA +TGCTGATCAGTAGTGGGATCATGTCTGTGAATAATCACTGCACTCCAACTGGGAAACATA +GCAAGACCCCATCTCTAAAAAACAAACAAACAAACAAACAAACAAACAAAAATGTTTCAG +ACAAATGTCAAGATAGAGTAAAAGAAGAAAGTACTGAAGGCCCTTCAACATAAATTGGAT +CAGATAATAAAAATAATAGCAAAGTTCTTTTCATGCTGTATCCTTAATTCTTCACCATAA +TCTTAGGAAGTGAATGTATTAATTATCTTTTGCTATATAACAAATTACTCCCAAAACTTG +GCGGCTTAAAACAACAAATATTATTTCACAATTTCTGTGGGTCAAGAATTTGGAAGTAGT +GACTCTGGCTCAGGGTCTCATTTAAGGTGGTAGTTCAGGATGCCAGTCAGGGCTGCAGGC +ACTGAGGCTGCTTCCTCAATGGCCCACTCACATGGCTGTTGGCTGGAGGCCTCTCTTTCT +CACCACACGGGCCTCTTCATAGGACTGCCCGAGTGTCCTTACAGCGTGGCAGCTGGCTTC +CCCCAGAGTGAACATTCTGAGAGAGAGAGAAAGAGAGAGTTAGAAGGCATGCTGTCACTT +CCACTGTATCCCATTCACCAGAAGTGAGAGACTAAATTCACCAAACAGAAAAGTGAAGGA +GAATGAGGCTCCAGTTTTTTGGGTGAGAGTTGAAGAATGTATGGATATTTTGAACAACCA +CAATATAATTCTTCTTTTCACAGAAGCACAAAAAAATTTATTTAACTTGTCCAAGTTTAC +ACAGTTAGCAAGCAACACCTTTGAGAAAAAAATCCATGTAGTCTGATACAAGCACCCAAA +CTCATAACCACAATGTGAATCTAACTGCTTTTCAATTAAAAAAGAAAGAAAGATTCCCTT +CAAATCTGGCATATGCATTCACATGGAGCATTCATACTGCCAGTGACAGTACCATAGTTA +TATGGAATTAGAAGTTCTAACTTATCTTGGCCAAACTAAAGACTTAGGGCTGGGTAGAAG +GTTGGAGGGATGTAAGGTCATTCTCAAGATCTCATCTAGGAGAAGAAAACAAAATGGGGA +AGTAGAAGACAAAATGCTTTTTTAGGTTGGGAAAGGACTGGGAGAATCAAGCATCTAGAA +ATGGGCACAAAGAGTTACCTTATTTTATTTAAAAGAAAATAAATGTTTGACTATTAATGC +CTGAGAACGGAAGGTGATTATTAATGAGATGAAAAAGTTAATCAGATTCTCCAAGTTAGG +AGGGACTTGAAGACCAAATTGATAAAAATAAAAAAAAAGATGTCATAGTAGAATAATCTA +GATAATAAGCAATCAATGAGACTGAAAAAATAAAATCAAGTATATCATTTGTTACACTAA +ATATTAATATACCAGATTCTCTCATTAAAAAAACAGAGAAAGTCAAATTGGATTAAATAA +GAACAAAAAGTTAGCTATATAGTATTTATCAGAAACATTCTTATAAACAAATTGATAATG +AAAGATTAAAAATAAGAGATTTGAGGCAAGGCAAGCAAAAAGAAATAAATGTTAAACAAG +GAGAAATTAAAGGCTACGGACATTACCTAAGGAAAAGGATGACATAGAGTTACAGTGGCA +AAAGTTAGGAAGCAGATGACATAAATCTATATGCACAAACAGTATGGCCACAAAATACAT +TAATTAAAAATTACTAGAAATATAAGATGACTTTGATTAAAATACACTGATTACAAGGGA +TTTAACATATAAAAATTAGGCTGATGTGGTAAATTTAAATATAATCAAATATTTAGGAAA +ATAGAACAACACAACAAAGTTGATTACATATATTCATTTTCCAGATAGTATACTTTATGC +CTATGAAATAGTTCTTAAAATCAATTATATATGGCCGGGTGCAGTGGCCCACGCCTGTAA +TCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCACGAGGTCAGGAGATCGAGACCATCC +TGGCTAACACAGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAAAAAAATTAGCTGG +GCGTGGTGGCTGGAACCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGT diff --git a/src/test/resources/htsjdk/samtools/cram/human_g1k_v37.20.subset.fasta.fai b/src/test/resources/htsjdk/samtools/cram/human_g1k_v37.20.subset.fasta.fai new file mode 100644 index 000000000..77ced1f80 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/human_g1k_v37.20.subset.fasta.fai @@ -0,0 +1 @@ +20 9000 4 60 61 diff --git a/src/test/resources/htsjdk/samtools/cram/samtoolsSliceMD5WithAmbiguityCodesTest.cram b/src/test/resources/htsjdk/samtools/cram/samtoolsSliceMD5WithAmbiguityCodesTest.cram new file mode 100644 index 000000000..e72731880 Binary files /dev/null and b/src/test/resources/htsjdk/samtools/cram/samtoolsSliceMD5WithAmbiguityCodesTest.cram differ diff --git a/src/test/resources/htsjdk/samtools/cram/test.dict b/src/test/resources/htsjdk/samtools/cram/test.dict deleted file mode 100644 index dfb98d63a..000000000 --- a/src/test/resources/htsjdk/samtools/cram/test.dict +++ /dev/null @@ -1,2 +0,0 @@ -@HD VN:1.4 SO:unsorted -@SQ SN:Sheila LN:20 M5:7ddd8a4b4f2c1dec43476a738b1a9b72 UR:file:/Users/edwardk/Documents/htsjdk/testdata/htsjdk/samtools/cram/auxf.fa diff --git a/src/test/resources/htsjdk/samtools/cram/test.fa b/src/test/resources/htsjdk/samtools/cram/test.fa deleted file mode 100644 index 11d25dda6..000000000 --- a/src/test/resources/htsjdk/samtools/cram/test.fa +++ /dev/null @@ -1,2 +0,0 @@ ->Sheila -GCTAGCTCAGAAAAAAAAAA diff --git a/src/test/resources/htsjdk/samtools/cram/test.fa.fai b/src/test/resources/htsjdk/samtools/cram/test.fa.fai deleted file mode 100644 index f3cdedb55..000000000 --- a/src/test/resources/htsjdk/samtools/cram/test.fa.fai +++ /dev/null @@ -1 +0,0 @@ -Sheila 20 8 20 21 diff --git a/src/test/resources/htsjdk/samtools/cram/test2.dict b/src/test/resources/htsjdk/samtools/cram/test2.dict deleted file mode 100644 index dfb98d63a..000000000 --- a/src/test/resources/htsjdk/samtools/cram/test2.dict +++ /dev/null @@ -1,2 +0,0 @@ -@HD VN:1.4 SO:unsorted -@SQ SN:Sheila LN:20 M5:7ddd8a4b4f2c1dec43476a738b1a9b72 UR:file:/Users/edwardk/Documents/htsjdk/testdata/htsjdk/samtools/cram/auxf.fa diff --git a/src/test/resources/htsjdk/samtools/cram/test2.fa b/src/test/resources/htsjdk/samtools/cram/test2.fa deleted file mode 100644 index 11d25dda6..000000000 --- a/src/test/resources/htsjdk/samtools/cram/test2.fa +++ /dev/null @@ -1,2 +0,0 @@ ->Sheila -GCTAGCTCAGAAAAAAAAAA diff --git a/src/test/resources/htsjdk/samtools/cram/test2.fa.fai b/src/test/resources/htsjdk/samtools/cram/test2.fa.fai deleted file mode 100644 index f3cdedb55..000000000 --- a/src/test/resources/htsjdk/samtools/cram/test2.fa.fai +++ /dev/null @@ -1 +0,0 @@ -Sheila 20 8 20 21 diff --git a/src/test/resources/htsjdk/samtools/noheader.sam b/src/test/resources/htsjdk/samtools/noheader.sam new file mode 100755 index 000000000..7ac0030d4 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/noheader.sam @@ -0,0 +1,10 @@ +A 73 chr2 1 255 10M * 0 0 CAACAGAAGC )'.*.+2,)) RG:Z:0 +A 133 * 0 0 * chr2 1 0 CAACAGAAGC )'.*.+2,)) RG:Z:0 +B 99 chr1 1 255 10M = 26 35 CAACAGAAGC )'.*.+2,)) RG:Z:0 +B 147 chr1 26 255 10M = 1 -35 CAACAGAAGC )'.*.+2,)) RG:Z:0 +C 99 chr2 1 255 10M = 26 35 CAACAGAAGC )'.*.+2,)) RG:Z:0 +C 147 chr2 26 255 10M = 1 -35 CAACAGAAGC )'.*.+2,)) RG:Z:0 +D 99 chr3 1 255 10M = 25 35 CAACAGAAGC )'.*.+2,)) RG:Z:0 +D 147 chr3 26 255 10M = 1 -35 CAACAGAAGC )'.*.+2,)) RG:Z:0 +E 99 chr1 2 255 10M = 15 30 CAACAGAAGC )'.*.+2,)) RG:Z:0 +E 147 chr1 15 255 10M = 2 -30 CAACAGAAGC )'.*.+2,)) RG:Z:0 diff --git a/src/test/resources/htsjdk/samtools/reference/Homo_sapiens_assembly18.trimmed.fasta.gz b/src/test/resources/htsjdk/samtools/reference/Homo_sapiens_assembly18.trimmed.fasta.gz new file mode 100644 index 000000000..aa8ef591b Binary files /dev/null and b/src/test/resources/htsjdk/samtools/reference/Homo_sapiens_assembly18.trimmed.fasta.gz differ diff --git a/src/test/resources/htsjdk/samtools/reference/Homo_sapiens_assembly18.trimmed.fasta.gz.fai b/src/test/resources/htsjdk/samtools/reference/Homo_sapiens_assembly18.trimmed.fasta.gz.fai new file mode 100644 index 000000000..04a438b94 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/reference/Homo_sapiens_assembly18.trimmed.fasta.gz.fai @@ -0,0 +1,2 @@ +chrM 16571 6 60 61 +chr20 1000000 16861 60 61 diff --git a/src/test/resources/htsjdk/samtools/reference/Homo_sapiens_assembly18.trimmed.fasta.gz.gzi b/src/test/resources/htsjdk/samtools/reference/Homo_sapiens_assembly18.trimmed.fasta.gz.gzi new file mode 100644 index 000000000..a53660233 Binary files /dev/null and b/src/test/resources/htsjdk/samtools/reference/Homo_sapiens_assembly18.trimmed.fasta.gz.gzi differ diff --git a/src/test/resources/htsjdk/samtools/reference/crlf.fasta b/src/test/resources/htsjdk/samtools/reference/crlf.fasta new file mode 100644 index 000000000..70c878536 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/reference/crlf.fasta @@ -0,0 +1,4 @@ +>a test CR+LF +ACTG +>b test CR+LF +ACTG diff --git a/src/test/resources/htsjdk/samtools/reference/crlf.fasta.fai b/src/test/resources/htsjdk/samtools/reference/crlf.fasta.fai new file mode 100644 index 000000000..923386e04 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/reference/crlf.fasta.fai @@ -0,0 +1,2 @@ +a 4 15 4 6 +b 4 36 4 5 diff --git a/src/test/resources/htsjdk/samtools/reference/header_with_white_space.fasta b/src/test/resources/htsjdk/samtools/reference/header_with_white_space.fasta new file mode 100644 index 000000000..24cff02a9 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/reference/header_with_white_space.fasta @@ -0,0 +1,4 @@ +>a test white space +ACTG +>b test whitespace +ACTG diff --git a/src/test/resources/htsjdk/samtools/reference/header_with_white_space.fasta.fai b/src/test/resources/htsjdk/samtools/reference/header_with_white_space.fasta.fai new file mode 100644 index 000000000..bb15aa584 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/reference/header_with_white_space.fasta.fai @@ -0,0 +1,2 @@ +a 4 20 4 5 +b 4 44 4 5 diff --git a/src/test/resources/htsjdk/samtools/util/random.bin b/src/test/resources/htsjdk/samtools/util/random.bin new file mode 100644 index 000000000..f59b24766 Binary files /dev/null and b/src/test/resources/htsjdk/samtools/util/random.bin differ diff --git a/src/test/resources/htsjdk/samtools/util/random.bin.gz b/src/test/resources/htsjdk/samtools/util/random.bin.gz new file mode 100644 index 000000000..cd764e0b3 Binary files /dev/null and b/src/test/resources/htsjdk/samtools/util/random.bin.gz differ diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.mangled.vcf.gz b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.mangled.vcf.gz new file mode 100644 index 000000000..98f276a17 Binary files /dev/null and b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.mangled.vcf.gz differ diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.mangled.vcf.gz.tbi b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.mangled.vcf.gz.tbi new file mode 100644 index 000000000..333a50f6e Binary files /dev/null and b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.mangled.vcf.gz.tbi differ diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf new file mode 100644 index 000000000..63667cacb --- /dev/null +++ b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf @@ -0,0 +1,37 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##source=SelectVariants +#CHROM POS ID REF ALT QUAL FILTER INFO +1 100 a G A 232.46 PASS . +1 199 b GG G 232.46 PASS . +1 200 c G A 232.46 PASS . +1 203 d GGGG G 232.46 PASS . +1 280 e G A 232.46 PASS . +1 284 f GGG G 232.46 PASS . +1 285 g G A 232.46 PASS . +1 286 h G A 232.46 PASS . +1 999 i G A 232.46 PASS . +1 1000 j G A 232.46 PASS . +1 1000 k GGGG G 232.46 PASS . +1 1076 l G A 232.46 PASS . +1 1150 m G A 232.46 PASS . +1 1176 n G A 232.46 PASS . +2 200 o G A 232.46 PASS . +2 525 p G A 232.46 PASS . +2 548 q GGG G 232.46 PASS . +2 640 r G A 232.46 PASS . +2 700 s G A 232.46 PASS . +3 1 t G A 232.46 PASS . +3 300 u G A 232.46 PASS . +3 300 v GGGG G 232.46 PASS . +3 400 w G A 232.46 PASS . +4 600 x G A 232.46 PASS . +4 775 y G A 232.46 PASS . +4 776 z GGGG G 232.46 PASS . diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz new file mode 100644 index 000000000..03ad18b9c Binary files /dev/null and b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz differ diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz.tbi b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz.tbi new file mode 100644 index 000000000..3441492b6 Binary files /dev/null and b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz.tbi differ diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.idx b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.idx new file mode 100644 index 000000000..6d5e546e2 Binary files /dev/null and b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.idx differ diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf new file mode 100644 index 000000000..8a6df0d2e --- /dev/null +++ b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf @@ -0,0 +1,37 @@ +!##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##source=SelectVariants +#CHROM POS ID REF ALT QUAL FILTER INFO +1 100 a G A 232.46 PASS . +1 199 b GG G 232.46 PASS . +1 200 c G A 232.46 PASS . +1 203 d GGGG G 232.46 PASS . +1 280 e G A 232.46 PASS . +1 284 f GGG G 232.46 PASS . +1 285 g G A 232.46 PASS . +1 286 h G A 232.46 PASS . +1 999 i G A 232.46 PASS . +1 1000 j G A 232.46 PASS . +1 1000 k GGGG G 232.46 PASS . +1 1076 l G A 232.46 PASS . +1 1150 m G A 232.46 PASS . +1 1176 n G A 232.46 PASS . +2 200 o G A 232.46 PASS . +2 525 p G A 232.46 PASS . +2 548 q GGG G 232.46 PASS . +2 640 r G A 232.46 PASS . +2 700 s G A 232.46 PASS . +3 1 t G A 232.46 PASS . +3 300 u G A 232.46 PASS . +3 300 v GGGG G 232.46 PASS . +3 400 w G A 232.46 PASS . +4 600 x G A 232.46 PASS . +4 775 y G A 232.46 PASS . +4 776 z GGGG G 232.46 PASS . diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf.idx b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf.idx new file mode 100644 index 000000000..4a20ddcf8 Binary files /dev/null and b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf.idx differ diff --git a/src/test/scala/htsjdk/UnitSpec.scala b/src/test/scala/htsjdk/UnitSpec.scala new file mode 100644 index 000000000..a2995d56c --- /dev/null +++ b/src/test/scala/htsjdk/UnitSpec.scala @@ -0,0 +1,25 @@ +package htsjdk + +import java.nio.file.{Files, Path} + +import org.scalatest.{FlatSpec, Matchers} + +/** Base class for all Scala tests. */ +class UnitSpec extends FlatSpec with Matchers { + /** Make a temporary file that will get cleaned up at the end of testing. */ + protected def makeTempFile(prefix: String, suffix: String): Path = { + val path = Files.createTempFile(prefix, suffix) + path.toFile.deleteOnExit() + path + } + + /** Implicit conversion from Java to Scala iterator. */ + implicit def javaIteratorAsScalaIterator[A](iter: java.util.Iterator[A]): Iterator[A] = { + scala.collection.JavaConverters.asScalaIterator(iter) + } + + /** Implicit conversion from Java to Scala iterable. */ + implicit def javaIterableAsScalaIterable[A](iterable: java.lang.Iterable[A]): Iterable[A] = { + scala.collection.JavaConverters.iterableAsScalaIterable(iterable) + } +} diff --git a/src/test/scala/htsjdk/samtools/fastq/FastqReaderWriterTest.scala b/src/test/scala/htsjdk/samtools/fastq/FastqReaderWriterTest.scala new file mode 100644 index 000000000..00f62e91a --- /dev/null +++ b/src/test/scala/htsjdk/samtools/fastq/FastqReaderWriterTest.scala @@ -0,0 +1,180 @@ +package htsjdk.samtools.fastq + +import java.io.{BufferedReader, File, StringReader} + +import htsjdk.UnitSpec +import htsjdk.samtools.{SAMException, SAMUtils} +import htsjdk.samtools.util.IOUtil + +import scala.util.Random + +class FastqReaderWriterTest extends UnitSpec { + private val rng = new Random() + private val Bases = Array('A', 'C', 'G', 'T') + + /** Generates a random string of bases of the desired length. */ + def bases(length: Int): String = { + val chs = new Array[Char](length) + chs.indices.foreach(i => chs(i) = Bases(rng.nextInt(Bases.length))) + new String(chs) + } + + /** Generates a FastqRecord with random bases at a given length. */ + def fq(name: String, length: Int, qual: Int = 30): FastqRecord = { + new FastqRecord(name, bases(length), "", SAMUtils.phredToFastq(qual).toString * length) + } + + "FastqWriter" should "write four lines per record to file" in { + val path = makeTempFile("test.", ".fastq") + val out = new FastqWriterFactory().newWriter(path.toFile) + val recs = Seq(fq("q1", 50), fq("q2", 48), fq("q3", 55)) + val Seq(q1, q2, q3) = recs + + recs.foreach(rec => out.write(rec)) + out.close() + + val lines = IOUtil.slurpLines(path.toFile) + lines should have size 12 + + lines.get(0) shouldBe "@q1" + lines.get(1) shouldBe q1.getReadString + lines.get(4) shouldBe "@q2" + lines.get(5) shouldBe q2.getReadString + lines.get(8) shouldBe "@q3" + lines.get(9) shouldBe q3.getReadString + } + + it should "write a record with only a single base" in { + val path = makeTempFile("test.", ".fastq") + val out = new FastqWriterFactory().newWriter(path.toFile) + out.write(fq("q1", 1)) + out.close() + val lines = IOUtil.slurpLines(path.toFile) + lines.get(1) should have length 1 + lines.get(3) should have length 1 + } + + it should "write a record with zero-length bases and quals" in { + val path = makeTempFile("test.", ".fastq") + val out = new FastqWriterFactory().newWriter(path.toFile) + out.write(fq("q1", 0)) + out.close() + val lines = IOUtil.slurpLines(path.toFile) + lines.get(1) should have length 0 + lines.get(3) should have length 0 + } + + + "FastqReader" should "read back a fastq file written by FastqWriter" in { + val path = makeTempFile("test.", ".fastq") + val out = new FastqWriterFactory().newWriter(path.toFile) + val recs = Seq(fq("q1", 50), fq("q2", 100), fq("q3", 150)) + recs.foreach(rec => out.write(rec)) + out.close() + + val in = new FastqReader(path.toFile) + val recs2 = in.iterator().toList + in.close() + recs2 should contain theSameElementsInOrderAs recs + } + + it should "throw an exception if the input fastq is garbled" in { + val fastq = + """ + |@q1 + |AACCGGTT + |+ + |######## + |@q2 + |ACGT + |#### + """.stripMargin.trim + + val in = new FastqReader(null, new BufferedReader(new StringReader(fastq))) + an[Exception] shouldBe thrownBy { in.next() } + } + + it should "throw an exception if the input file doesn't exist" in { + an[Exception] shouldBe thrownBy { new FastqReader(new File("/some/path/that/shouldnt/exist.fq"))} + } + + it should "read an empty file just fine" in { + val path = makeTempFile("empty.", ".fastq") + val in = new FastqReader(path.toFile) + while (in.hasNext) in.next() + an[Exception] shouldBe thrownBy { in.next() } + in.close() + } + + it should "honor skipBlankLines when requested" in { + val fastq = + """ + | + |@SL-XBG:1:1:4:1663#0/2 + |NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN + |+SL-XBG:1:1:4:1663#0/2 + |BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB + """.stripMargin + val reader = new BufferedReader(new StringReader(fastq)) + val in = new FastqReader(null, reader, true) + while (in.hasNext) in.next() + } + + it should "fail on blank lines when skipBlankLines is false" in { + val fastq = + """ + | + |@SL-XBG:1:1:4:1663#0/2 + |NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN + |+SL-XBG:1:1:4:1663#0/2 + |BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB + """.stripMargin + val reader = new BufferedReader(new StringReader(fastq)) + an[SAMException] shouldBe thrownBy { val in = new FastqReader(null, reader, false) } + } + + it should "fail on a truncated file" in { + val fastq = + """ + |@q1 + |AACCGGTT + |+ + |######## + """.stripMargin.trim + + Range.inclusive(1, 3).foreach { n => + val text = fastq.lines.take(n).mkString("\n") + val reader = new BufferedReader(new StringReader(text)) + an[Exception] shouldBe thrownBy { new FastqReader(null, reader).iterator().toSeq } + } + } + + it should "fail if the seq and qual lines are different lengths" in { + val fastq = + """ + |@q1 + |AACC + |+ + |######## + """.stripMargin.trim + + val reader = new BufferedReader(new StringReader(fastq)) + an[Exception] shouldBe thrownBy { new FastqReader(null, reader).iterator().toSeq } + } + + it should "fail if either header line is empty" in { + val fastq = + """ + |@q1 + |AACC + |+q1 + |######## + """.stripMargin.trim + + val noSeqHeader = new BufferedReader(new StringReader(fastq.replace("@q1", ""))) + val noQualHeader = new BufferedReader(new StringReader(fastq.replace("+q1", ""))) + an[Exception] shouldBe thrownBy { new FastqReader(noSeqHeader).iterator().toSeq } + an[Exception] shouldBe thrownBy { new FastqReader(noQualHeader).iterator().toSeq } + } + +} diff --git a/src/test/scala/htsjdk/samtools/util/StringUtilTest.scala b/src/test/scala/htsjdk/samtools/util/StringUtilTest.scala new file mode 100644 index 000000000..35957d681 --- /dev/null +++ b/src/test/scala/htsjdk/samtools/util/StringUtilTest.scala @@ -0,0 +1,134 @@ +/* + * The MIT License + * + * Copyright (c) 2017 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.util + +import htsjdk.UnitSpec + +class StringUtilTest extends UnitSpec { + "StringUtil.split" should "behave like String.split(char)" in { + Seq("A:BB:C", "A:BB", "A:BB:", "A:BB:C:DDD", "A:", "A", "A:BB:C").foreach { s => + val arr = new Array[String](10) + val count = StringUtil.split(s, arr, ':') + arr.take(count) shouldBe s.split(':') + } + } + + "StringUtil.splitConcatenateExcessTokens" should "behave like String.split(regex, limit)" in { + Seq("A:BB:C", "A:BB", "A:BB:", "A:BB:C:DDD", "A:", "A", "A:BB:C:").foreach { s => + val arr = new Array[String](3) + val count = StringUtil.splitConcatenateExcessTokens(s, arr, ':') + arr.take(count) shouldBe s.split(":", 3).filter(_.nonEmpty) + } + } + + "StringUtil.join" should "join tokens with a separator" in { + StringUtil.join(",", 1, "hello", 'T') shouldBe "1,hello,T" + StringUtil.join(",") shouldBe "" + } + + "StringUtil.hammingDistance" should "return zero for two empty sequences" in { + StringUtil.hammingDistance("", "") shouldBe 0 + } + + Seq(("ATAC", "GCAT", 3), ("ATAGC", "ATAGC", 0)).foreach { case (s1, s2, distance) => + it should s"return distance $distance between $s1 and $s2" in { + StringUtil.hammingDistance(s1, s2) shouldBe distance + } + } + + it should "be case sensitive" in { + StringUtil.hammingDistance("ATAC", "atac") shouldBe 4 + } + + it should "count Ns as matching when computing distance" in { + StringUtil.hammingDistance("nAGTN", "nAGTN") shouldBe 0 + } + + it should "throw an exception if two strings of different lengths are provided" in { + an[Exception] shouldBe thrownBy { StringUtil.hammingDistance("", "ABC")} + an[Exception] shouldBe thrownBy { StringUtil.hammingDistance("Abc", "wxyz")} + } + + "StringUtil.isWithinHammingDistance" should "agree with StringUtil.hammingDistance" in { + Seq(("ATAC", "GCAT", 3), ("ATAC", "GCAT", 2), ("ATAC", "GCAT", 1), ("ATAC", "GCAT", 0)).foreach { case (s1, s2, within) => + StringUtil.isWithinHammingDistance(s1, s2, within) shouldBe (StringUtil.hammingDistance(s1, s2) <= within) + } + } + + it should "throw an exception if the two strings are of different lengths" in { + an[Exception] shouldBe thrownBy { StringUtil.isWithinHammingDistance("", "ABC", 2)} + an[Exception] shouldBe thrownBy { StringUtil.isWithinHammingDistance("Abc", "wxyz", 2)} + } + + "StringUtil.toLowerCase(byte)" should "work just like Character.toLowerCase" in { + 0 to 127 foreach {i => StringUtil.toLowerCase(i.toByte) shouldBe i.toChar.toLower.toByte } + } + + "StringUtil.toUpperCase(byte)" should "work just like Character.toUpperCase" in { + 0 to 127 foreach {i => StringUtil.toUpperCase(i.toByte) shouldBe i.toChar.toUpper.toByte } + } + + "StringUtil.toUpperCase(byte[])" should "do upper case characters" in { + val seq = "atACgtaCGTgatcCAtATATgATtatgacNryuAN" + val bytes = seq.getBytes + StringUtil.toUpperCase(bytes) + bytes shouldBe seq.toUpperCase.getBytes + } + + "StringUtil.assertCharactersNotInString" should "catch illegal characters" in { + an[Exception] shouldBe thrownBy { + StringUtil.assertCharactersNotInString("Hello World!", ' ', '!', '_') + } + } + + it should "not fail when there are no illegal characters present" in { + StringUtil.assertCharactersNotInString("HelloWorld", ' ', '!', '_') + } + + val textForWrapping: String = + """This is a little bit + |of text with nice short + |lines. + """.stripMargin.trim + + "StringUtil.wordWrap" should "not wrap when lines are shorter than the given length" in { + StringUtil.wordWrap(textForWrapping, 50) shouldBe textForWrapping + } + + it should "wrap text when lines are longer than length give" in { + val result = StringUtil.wordWrap(textForWrapping, 15) + result.lines.size shouldBe 5 + result.lines.foreach(line => line.length should be <= 15) + } + + "StringUtil.intValuesToString(int[])" should "generate a CSV string of ints" in { + val ints = Array[Int](1, 2, 3, 11, 22, 33, Int.MinValue, 0, Int.MaxValue) + StringUtil.intValuesToString(ints) shouldBe ints.mkString(", ") + } + + "StringUtil.intValuesToString(short[])" should "generate a CSV string of ints" in { + val ints = Array[Short](1, 2, 3, 11, 22, 33, Short.MinValue, 0, Short.MaxValue) + StringUtil.intValuesToString(ints) shouldBe ints.mkString(", ") + } +}