@@ -32,16 +32,15 @@
import htsjdk.samtools.seekablestream.SeekableStream ;
import htsjdk.samtools.util.zip.InflaterFactory ;
-import java.io.ByteArrayOutputStream ;
-import java.io.File ;
-import java.io.IOException ;
-import java.io.InputStream ;
-import java.io.RandomAccessFile ;
+import java.io.* ;
import java.net.URL ;
import java.nio.ByteBuffer ;
import java.nio.ByteOrder ;
+import java.nio.channels.SeekableByteChannel ;
+import java.nio.file.Files ;
+import java.nio.file.Path ;
+import java.nio.file.StandardOpenOption ;
import java.util.Arrays ;
-import java.util.zip.Inflater ;
/* *
* Utility class for reading BGZF block compressed files. The caller can treat this file like any other InputStream.
@@ -587,41 +586,98 @@ private int unpackInt32(final byte[] buffer, final int offset) {
public enum FileTermination {HAS_TERMINATOR_BLOCK , HAS_HEALTHY_LAST_BLOCK , DEFECTIVE }
+ /* *
+ *
+ * @param file the file to check
+ * @return status of the last compressed block
+ * @throws IOException
+ */
public static FileTermination checkTermination (final File file ) throws IOException {
- final long fileSize = file. length();
+ return checkTermination(file == null ? null : file. toPath());
+ }
+
+ /* *
+ *
+ * @param path to the file to check
+ * @return status of the last compressed block
+ * @throws IOException
+ */
+ public static FileTermination checkTermination (final Path path ) throws IOException {
+ try ( final SeekableByteChannel channel = Files . newByteChannel(path, StandardOpenOption . READ ) ){
+ return checkTermination(channel);
+ }
+ }
+
+ /* *
+ * check the status of the final bzgipped block for the given bgzipped resource
+ *
+ * @param channel an open channel to read from,
+ * the channel will remain open and the initial position will be restored when the operation completes
+ * this makes no guarantee about the state of the channel if an exception is thrown during reading
+ *
+ * @return the status of the last compressed black
+ * @throws IOException
+ */
+ public static FileTermination checkTermination (SeekableByteChannel channel ) throws IOException {
+ final long fileSize = channel. size();
if (fileSize < BlockCompressedStreamConstants . EMPTY_GZIP_BLOCK. length) {
return FileTermination . DEFECTIVE ;
}
- final RandomAccessFile raFile = new RandomAccessFile (file, " r" );
+ final long initialPosition = channel. position();
+ boolean exceptionThrown = false ;
try {
- raFile. seek(fileSize - BlockCompressedStreamConstants . EMPTY_GZIP_BLOCK. length);
- byte [] buf = new byte [BlockCompressedStreamConstants . EMPTY_GZIP_BLOCK. length];
- raFile. readFully(buf);
- if (Arrays . equals(buf, BlockCompressedStreamConstants . EMPTY_GZIP_BLOCK )) {
+ channel. position(fileSize - BlockCompressedStreamConstants . EMPTY_GZIP_BLOCK. length);
+
+ // Check if the end of the file is an empty gzip block which is used as the terminator for a bgzipped file
+ final ByteBuffer lastBlockBuffer = ByteBuffer . allocate(BlockCompressedStreamConstants . EMPTY_GZIP_BLOCK. length);
+ readFully(channel, lastBlockBuffer);
+ if (Arrays . equals(lastBlockBuffer. array(), BlockCompressedStreamConstants . EMPTY_GZIP_BLOCK )) {
return FileTermination . HAS_TERMINATOR_BLOCK ;
}
- final int bufsize = (int )Math . min(fileSize, BlockCompressedStreamConstants . MAX_COMPRESSED_BLOCK_SIZE );
- buf = new byte [bufsize];
- raFile. seek(fileSize - bufsize);
- raFile. read(buf);
- for (int i = buf. length - BlockCompressedStreamConstants . EMPTY_GZIP_BLOCK. length;
- i >= 0 ; -- i) {
+
+ // if the last block isn't an empty gzip block, check to see if it is a healthy compressed block or if it's corrupted
+ final int bufsize = (int ) Math . min(fileSize, BlockCompressedStreamConstants . MAX_COMPRESSED_BLOCK_SIZE );
+ final byte [] bufferArray = new byte [bufsize];
+ channel. position(fileSize - bufsize);
+ readFully(channel, ByteBuffer . wrap(bufferArray));
+ for (int i = bufferArray. length - BlockCompressedStreamConstants . EMPTY_GZIP_BLOCK. length;
+ i >= 0 ; -- i) {
if (! preambleEqual(BlockCompressedStreamConstants . GZIP_BLOCK_PREAMBLE ,
- buf , i, BlockCompressedStreamConstants . GZIP_BLOCK_PREAMBLE. length)) {
+ bufferArray , i, BlockCompressedStreamConstants . GZIP_BLOCK_PREAMBLE. length)) {
continue ;
}
- final ByteBuffer byteBuffer = ByteBuffer . wrap(buf, i + BlockCompressedStreamConstants . GZIP_BLOCK_PREAMBLE. length, 4 );
+ final ByteBuffer byteBuffer = ByteBuffer . wrap(bufferArray,
+ i + BlockCompressedStreamConstants . GZIP_BLOCK_PREAMBLE. length,
+ 4 );
byteBuffer. order(ByteOrder . LITTLE_ENDIAN );
- final int totalBlockSizeMinusOne = byteBuffer. getShort() & 0xFFFF ;
- if (buf . length - i == totalBlockSizeMinusOne + 1 ) {
+ final int totalBlockSizeMinusOne = byteBuffer. getShort() & 0xFFFF ;
+ if (bufferArray . length - i == totalBlockSizeMinusOne + 1 ) {
return FileTermination . HAS_HEALTHY_LAST_BLOCK ;
} else {
return FileTermination . DEFECTIVE ;
}
}
return FileTermination . DEFECTIVE ;
+ } catch (final Throwable e) {
+ exceptionThrown = true ;
+ throw e;
} finally {
- raFile. close();
+ // if an exception was thrown we don't want to reset the position because that would be likely to throw again
+ // and suppress the initial exception
+ if (! exceptionThrown) {
+ channel. position(initialPosition);
+ }
+ }
+ }
+
+ /* *
+ * read as many bytes as dst's capacity into dst or throw if that's not possible
+ * @throws EOFException if channel has fewer bytes available than dst's capacity
+ */
+ static void readFully (SeekableByteChannel channel , ByteBuffer dst ) throws IOException {
+ final int bytesRead = channel. read(dst);
+ if (bytesRead < dst. capacity()){
+ throw new EOFException ();
}
}
0 comments on commit
98c2438