Added the ability to query the length of the line terminator to Ascii… #843

Merged
merged 1 commit into from Apr 18, 2017
Jump to file or symbol
Failed to load files and symbols.
+48 −4
Split
@@ -40,8 +40,9 @@
private static final byte LINEFEED = (byte) ('\n' & 0xff);
private static final byte CARRIAGE_RETURN = (byte) ('\r' & 0xff);
- PositionalBufferedStream is;
- char[] lineBuffer;
+ private final PositionalBufferedStream is;
+ private char[] lineBuffer;
+ private int lineTerminatorLength = -1;
public AsciiLineReader(final InputStream is){
this(new PositionalBufferedStream(is));
@@ -65,6 +66,16 @@ public long getPosition(){
return is.getPosition();
}
+ /** Returns the length of the line terminator read after the last read line. Returns either:
+ * -1 if no line has been read
+ * 0 after the last line if the last line in the file had no CR or LF line ending
+ * 1 if the line ended with CR or LF
+ * 2 if the line ended with CR and LF
+ */
+ public int getLineTerminatorLength() {
+ return this.lineTerminatorLength;
+ }
+
/**
* Read a line of text. A line is considered to be terminated by any one
* of a line feed ('\n'), a carriage return ('\r'), or a carriage return
@@ -83,6 +94,7 @@ public final String readLine(final PositionalBufferedStream stream) throws IOExc
if (b == -1) {
// eof reached. Return the last line, or null if this is a new line
if (linePosition > 0) {
+ this.lineTerminatorLength = 0;
return new String(lineBuffer, 0, linePosition);
} else {
return null;
@@ -93,6 +105,10 @@ public final String readLine(final PositionalBufferedStream stream) throws IOExc
if (c == LINEFEED || c == CARRIAGE_RETURN) {
if (c == CARRIAGE_RETURN && stream.peek() == LINEFEED) {
stream.read(); // <= skip the trailing \n in case of \r\n termination
+ this.lineTerminatorLength = 2;
+ }
+ else {
@yfarjoun

yfarjoun Apr 5, 2017

Contributor

I detect an extra \n here :-)

@tfenne

tfenne Apr 5, 2017

Owner

How meta ;) In all seriousness though I thought putting the else onto the next line was user-choice in htsjdk & picard. Certainly the rule used to be the opposite (always put it on a new line).

@yfarjoun

yfarjoun Apr 5, 2017

Contributor

I don't know about "used to be" I was trained by @nh13 to always have } else { I'm not super picky, but I thought it appropriate due to the subject matter.

@tfenne

tfenne Apr 5, 2017

Owner

Since I've been around since the inception of htsjdk, I'm pretty sure I know about "used to be" 😛

+ this.lineTerminatorLength = 1;
}
return new String(lineBuffer, 0, linePosition);
@@ -2,10 +2,10 @@
import htsjdk.HtsjdkTest;
import htsjdk.tribble.TestUtils;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.BeforeMethod;
+import org.testng.Assert;
import org.testng.annotations.Test;
+import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.InputStream;
@@ -40,4 +40,32 @@ public void testReadLines() throws Exception {
assertEquals(expectedNumber, actualLines);
}
+
+ @Test public void voidTestLineEndingLength() throws Exception {
+ final String input = "Hello\nThis\rIs A Silly Test\r\nSo There";
@magicDGS

magicDGS Apr 5, 2017

Contributor

I think that will be nice to have also the ' \r\n' at the end for testing.

@tfenne

tfenne Apr 5, 2017

Owner

Good point, added.

+ final InputStream is = new ByteArrayInputStream(input.getBytes());
@yfarjoun

yfarjoun Apr 5, 2017

Contributor

What should happen if there's a stray carriage return with no linefeed? Could you add that to the test?

@yfarjoun

yfarjoun Apr 5, 2017

Contributor

also, I don't know what happens with the wrong order \n\r but it should be tested...

@tfenne

tfenne Apr 5, 2017

Owner

Seriously? Valid line endings in use today are CR, LF, CR+LF. AsciiLineReader specifically looks for those three endings for lines. The test already covers all three valid line ending (a solo CR, a solo LF and a CR+LF).

If they are in the wrong order, they will be treated as multiple lines (existing behaviour). I.e. foo\n\r would be tokenized as <foo><eol><empty><eol>.

@yfarjoun

yfarjoun Apr 5, 2017

Contributor

I'm not asking to modify the valid line-endings...I just want the tests to be explicit about it.

So the test will show an empty line and the correct size of line ending.

@nh13

nh13 Apr 5, 2017

Contributor

I don't think this test is necessary.

+ final AsciiLineReader in = new AsciiLineReader(is);
+
+ Assert.assertEquals(in.getLineTerminatorLength(), -1);
+ Assert.assertEquals(in.readLine(), "Hello");
+ Assert.assertEquals(in.getLineTerminatorLength(), 1);
+ Assert.assertEquals(in.readLine(), "This");
+ Assert.assertEquals(in.getLineTerminatorLength(), 1);
+ Assert.assertEquals(in.readLine(), "Is A Silly Test");
+ Assert.assertEquals(in.getLineTerminatorLength(), 2);
+ Assert.assertEquals(in.readLine(), "So There");
+ Assert.assertEquals(in.getLineTerminatorLength(), 0);
+ }
+
+ @Test public void voidTestLineEndingLengthAtEof() throws Exception {
+ final String input = "Hello\nWorld\r\n";
+ final InputStream is = new ByteArrayInputStream(input.getBytes());
+ final AsciiLineReader in = new AsciiLineReader(is);
+
+ Assert.assertEquals(in.getLineTerminatorLength(), -1);
+ Assert.assertEquals(in.readLine(), "Hello");
+ Assert.assertEquals(in.getLineTerminatorLength(), 1);
+ Assert.assertEquals(in.readLine(), "World");
+ Assert.assertEquals(in.getLineTerminatorLength(), 2);
+ }
}