Expose a couple of protected methods and replace hard coded strings w… #854

Merged
merged 1 commit into from Apr 16, 2017
Jump to file or symbol
Failed to load files and symbols.
+217 −92
Split
@@ -41,6 +41,22 @@
* directly. It is provided so that this class can be used in Java for-each loop.
*/
public class FastqReader implements Iterator<FastqRecord>, Iterable<FastqRecord>, Closeable {
+ /** Enum of the types of lines we see in Fastq. */
+ protected enum LineType {
+ SequenceHeader("Sequence Header"),
+ SequenceLine("Sequence Line"),
+ QualityHeader("Quality Header"),
+ QualityLine("Quality Line");
+
+ private String printable;
+
+ LineType(String printable) {
+ this.printable = printable;
+ }
+
+ @Override public String toString() { return this.printable; }
+ }
+
final private File fastqFile;
final private BufferedReader reader;
private FastqRecord nextRecord;
@@ -58,10 +74,7 @@ public FastqReader(final File file) {
* @param skipBlankLines should we skip blank lines ?
*/
public FastqReader(final File file, final boolean skipBlankLines) {
- this.skipBlankLines=skipBlankLines;
- fastqFile = file;
- reader = IOUtil.openFileForBufferedReading(fastqFile);
- nextRecord = readNextRecord();
+ this(file, IOUtil.openFileForBufferedReading(file), skipBlankLines);
}
public FastqReader(final BufferedReader reader) {
@@ -87,31 +100,30 @@ public FastqReader(final File file, final BufferedReader reader) {
private FastqRecord readNextRecord() {
try {
-
// Read sequence header
final String seqHeader = readLineConditionallySkippingBlanks();
if (seqHeader == null) return null ;
if (StringUtil.isBlank(seqHeader)) {
throw new SAMException(error("Missing sequence header"));
}
if (!seqHeader.startsWith(FastqConstants.SEQUENCE_HEADER)) {
- throw new SAMException(error("Sequence header must start with "+ FastqConstants.SEQUENCE_HEADER+": "+seqHeader));
+ throw new SAMException(error("Sequence header must start with " + FastqConstants.SEQUENCE_HEADER + ": " + seqHeader));
}
// Read sequence line
final String seqLine = readLineConditionallySkippingBlanks();
- checkLine(seqLine,"sequence line");
+ checkLine(seqLine, LineType.SequenceLine);
// Read quality header
final String qualHeader = readLineConditionallySkippingBlanks();
- checkLine(qualHeader,"quality header");
+ checkLine(qualHeader, LineType.QualityHeader);
if (!qualHeader.startsWith(FastqConstants.QUALITY_HEADER)) {
- throw new SAMException(error("Quality header must start with "+ FastqConstants.QUALITY_HEADER+": "+qualHeader));
+ throw new SAMException(error("Quality header must start with " + FastqConstants.QUALITY_HEADER + ": "+ qualHeader));
}
// Read quality line
final String qualLine = readLineConditionallySkippingBlanks();
- checkLine(qualLine,"quality line");
+ checkLine(qualLine, LineType.QualityLine);
// Check sequence and quality lines are same length
if (seqLine.length() != qualLine.length()) {
@@ -165,21 +177,23 @@ public void close() {
try {
reader.close();
} catch (IOException e) {
- throw new SAMException("IO problem in fastq file "+getAbsolutePath(), e);
+ throw new SAMException("IO problem in fastq file " + getAbsolutePath(), e);
}
}
- private void checkLine(final String line, final String kind) {
+ /** Checks that the line is neither null (representing EOF) or empty (blank line in file). */
+ protected void checkLine(final String line, final LineType kind) {
if (line == null) {
- throw new SAMException(error("File is too short - missing "+kind+" line"));
+ throw new SAMException(error("File is too short - missing " + kind));
}
if (StringUtil.isBlank(line)) {
- throw new SAMException(error("Missing "+kind));
+ throw new SAMException(error("Missing " + kind));
}
}
- private String error(final String msg) {
- return msg + " at line "+line+" in fastq "+getAbsolutePath();
+ /** Generates an error message with line number information. */
+ protected String error(final String msg) {
+ return msg + " at line " + line + " in fastq " + getAbsolutePath();
}
private String getAbsolutePath() {
@@ -198,6 +212,6 @@ private String readLineConditionallySkippingBlanks() throws IOException {
@Override
public String toString() {
- return "FastqReader["+(this.fastqFile == null?"":this.fastqFile)+ " Line:"+getLineNumber()+"]";
+ return "FastqReader[" + (this.fastqFile == null ? "" : this.fastqFile) + " Line:" + getLineNumber() + "]";
}
}
@@ -1,9 +1,12 @@
package htsjdk.samtools.fastq;
import htsjdk.HtsjdkTest;
+import htsjdk.samtools.util.TestUtil;
import org.testng.Assert;
import org.testng.annotations.Test;
+import java.util.ArrayList;
+
public final class FastqRecordTest extends HtsjdkTest {
@Test
@@ -207,4 +210,14 @@ public void testNotEqualLengths() {
new FastqRecord("header", seqLine1, "qualHeaderPrefix", qualLine1);
//Note: this does not blow up now but it will once we enforce that seqLine and qualLine be the same length
}
+
+ @Test
+ public void testFastqSerialize() throws Exception {
+ final ArrayList<FastqRecord> records = new ArrayList<>();
+ records.add(new FastqRecord("q1", "ACGTACGT", "", "########"));
+ records.add(new FastqRecord("q2", "CCAGCGTAATA", "", "????????###"));
+ records.add(new FastqRecord("q3", "NNNNNNNNNNNN", "", "############"));
+
+ Assert.assertEquals(TestUtil.serializeAndDeserialize(records),records);
+ }
}
@@ -1,74 +0,0 @@
-/*
- * The MIT License
- *
- * Pierre Lindenbaum PhD
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package htsjdk.samtools.fastq;
-
-import htsjdk.HtsjdkTest;
-import org.testng.Assert;
-import org.testng.annotations.DataProvider;
-import org.testng.annotations.Test;
-
-import htsjdk.samtools.util.TestUtil;
-
-import java.io.File;
-import java.util.ArrayList;
-
-/**
- * test fastq
- */
-public class FastqWriterTest extends HtsjdkTest {
- private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/util/QualityEncodingDetectorTest");
-
- @DataProvider(name = "fastqsource")
- public Object[][] createTestData() {
- return new Object[][]{
- {"solexa_full_range_as_solexa.fastq"},
- {"5k-30BB2AAXX.3.aligned.sam.fastq"}
- };
- }
-
- @Test(dataProvider = "fastqsource")
- public void testReadReadWriteFastq(final String basename) throws Exception {
- final File tmpFile = File.createTempFile("test.", ".fastq");
- tmpFile.deleteOnExit();
- final FastqReader fastqReader = new FastqReader(new File(TEST_DATA_DIR,basename));
- final FastqWriterFactory writerFactory = new FastqWriterFactory();
- final FastqWriter fastqWriter = writerFactory.newWriter(tmpFile);
- for(final FastqRecord rec: fastqReader) fastqWriter.write(rec);
- fastqWriter.close();
- fastqReader.close();
- }
-
- @Test(dataProvider = "fastqsource")
- public void testFastqSerialize(final String basename) throws Exception {
- //write
- final ArrayList<FastqRecord> records = new ArrayList<>();
- final FastqReader fastqReader = new FastqReader(new File(TEST_DATA_DIR,basename));
- for(final FastqRecord rec: fastqReader) {
- records.add(rec);
- if(records.size()>100) break;
- }
- fastqReader.close();
- Assert.assertEquals(TestUtil.serializeAndDeserialize(records),records);
- }
-}
@@ -1,6 +1,25 @@
package htsjdk
+import java.nio.file.{Files, Path}
+
import org.scalatest.{FlatSpec, Matchers}
/** Base class for all Scala tests. */
-class UnitSpec extends FlatSpec with Matchers
+class UnitSpec extends FlatSpec with Matchers {
+ /** Make a temporary file that will get cleaned up at the end of testing. */
+ protected def makeTempFile(prefix: String, suffix: String): Path = {
+ val path = Files.createTempFile(prefix, suffix)
+ path.toFile.deleteOnExit()
+ path
+ }
+
+ /** Implicit conversion from Java to Scala iterator. */
+ implicit def javaIteratorAsScalaIterator[A](iter: java.util.Iterator[A]): Iterator[A] = {
+ scala.collection.JavaConverters.asScalaIterator(iter)
+ }
+
+ /** Implicit conversion from Java to Scala iterable. */
+ implicit def javaIterableAsScalaIterable[A](iterable: java.lang.Iterable[A]): Iterable[A] = {
+ scala.collection.JavaConverters.iterableAsScalaIterable(iterable)
+ }
+}
@@ -0,0 +1,153 @@
+package htsjdk.samtools.fastq
+
+import java.io.{BufferedReader, File, StringReader}
+
+import htsjdk.UnitSpec
+import htsjdk.samtools.SAMUtils
+import htsjdk.samtools.util.IOUtil
+
+import scala.util.Random
+
+class FastqReaderWriterTest extends UnitSpec {
+ private val rng = new Random()
+ private val Bases = Array('A', 'C', 'G', 'T')
+
+ /** Generates a random string of bases of the desired length. */
+ def bases(length: Int): String = {
+ val chs = new Array[Char](length)
+ chs.indices.foreach(i => chs(i) = Bases(rng.nextInt(Bases.length)))
+ new String(chs)
+ }
+
+ /** Generates a FastqRecord with random bases at a given length. */
+ def fq(name: String, length: Int, qual: Int = 30): FastqRecord = {
+ new FastqRecord(name, bases(length), "", SAMUtils.phredToFastq(qual).toString * length)
+ }
+
+ "FastqWriter" should "write four lines per record to file" in {
+ val path = makeTempFile("test.", ".fastq")
+ val out = new FastqWriterFactory().newWriter(path.toFile)
+ val recs = Seq(fq("q1", 50), fq("q2", 48), fq("q3", 55))
+ val Seq(q1, q2, q3) = recs
+
+ recs.foreach(rec => out.write(rec))
+ out.close()
+
+ val lines = IOUtil.slurpLines(path.toFile)
+ lines should have size 12
+
+ lines.get(0) shouldBe "@q1"
+ lines.get(1) shouldBe q1.getReadString
+ lines.get(4) shouldBe "@q2"
+ lines.get(5) shouldBe q2.getReadString
+ lines.get(8) shouldBe "@q3"
+ lines.get(9) shouldBe q3.getReadString
+ }
+
+ it should "write a record with only a single base" in {
+ val path = makeTempFile("test.", ".fastq")
+ val out = new FastqWriterFactory().newWriter(path.toFile)
+ out.write(fq("q1", 1))
+ out.close()
+ val lines = IOUtil.slurpLines(path.toFile)
+ lines.get(1) should have length 1
+ lines.get(3) should have length 1
@yfarjoun

yfarjoun Apr 16, 2017

Contributor

thanks.

+ }
+
+ it should "write a record with zero-length bases and quals" in {
+ val path = makeTempFile("test.", ".fastq")
+ val out = new FastqWriterFactory().newWriter(path.toFile)
+ out.write(fq("q1", 0))
+ out.close()
+ val lines = IOUtil.slurpLines(path.toFile)
+ lines.get(1) should have length 0
+ lines.get(3) should have length 0
+ }
+
+
+ "FastqReader" should "read back a fastq file written by FastqWriter" in {
+ val path = makeTempFile("test.", ".fastq")
+ val out = new FastqWriterFactory().newWriter(path.toFile)
+ val recs = Seq(fq("q1", 50), fq("q2", 100), fq("q3", 150))
+ recs.foreach(rec => out.write(rec))
+ out.close()
+
+ val in = new FastqReader(path.toFile)
+ val recs2 = in.iterator().toList
+ in.close()
+ recs2 should contain theSameElementsInOrderAs recs
+ }
+
+ it should "throw an exception if the input fastq is garbled" in {
+ val fastq =
+ """
+ |@q1
+ |AACCGGTT
+ |+
+ |########
+ |@q2
+ |ACGT
+ |####
+ """.stripMargin.trim
+
+ val in = new FastqReader(null, new BufferedReader(new StringReader(fastq)))
+ an[Exception] shouldBe thrownBy { in.next() }
+ }
+
+ it should "throw an exception if the input file doesn't exist" in {
+ an[Exception] shouldBe thrownBy { new FastqReader(new File("/some/path/that/shouldnt/exist.fq"))}
+ }
+
+ it should "read an empty file just fine" in {
+ val path = makeTempFile("empty.", ".fastq")
+ val in = new FastqReader(path.toFile)
+ while (in.hasNext) in.next()
+ an[Exception] shouldBe thrownBy { in.next() }
+ in.close()
+ }
+
+ it should "fail on a truncated file" in {
+ val fastq =
+ """
+ |@q1
+ |AACCGGTT
+ |+
+ |########
+ """.stripMargin.trim
+
+ Range.inclusive(1, 3).foreach { n =>
+ val text = fastq.lines.take(n).mkString("\n")
+ val reader = new BufferedReader(new StringReader(text))
+ an[Exception] shouldBe thrownBy { new FastqReader(null, reader).iterator().toSeq }
+ }
+ }
+
+ it should "fail if the seq and qual lines are different lengths" in {
+ val fastq =
+ """
+ |@q1
+ |AACC
+ |+
+ |########
+ """.stripMargin.trim
+
+ val reader = new BufferedReader(new StringReader(fastq))
+ an[Exception] shouldBe thrownBy { new FastqReader(null, reader).iterator().toSeq }
+ }
+
+ it should "fail if either header line is empty" in {
+ val fastq =
+ """
+ |@q1
+ |AACC
+ |+q1
+ |########
+ """.stripMargin.trim
+
+ val noSeqHeader = new BufferedReader(new StringReader(fastq.replace("@q1", "")))
+ val noQualHeader = new BufferedReader(new StringReader(fastq.replace("+q1", "")))
+ an[Exception] shouldBe thrownBy { new FastqReader(noSeqHeader).iterator().toSeq }
+ an[Exception] shouldBe thrownBy { new FastqReader(noQualHeader).iterator().toSeq }
+ }
+
+}