Skip to content

Commit

Permalink
JAVA-332: Have GridFSDBInputFile.MyInputStream.skip be smart about no…
Browse files Browse the repository at this point in the history
…t fetching grid fs chunks that are being entirely skipped over
  • Loading branch information
dgottlieb committed Feb 24, 2012
1 parent 68f5d7f commit e964822
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 6 deletions.
38 changes: 33 additions & 5 deletions src/main/com/mongodb/gridfs/GridFSDBFile.java
Expand Up @@ -133,13 +133,11 @@ public int read(byte[] b){
public int read(byte[] b, int off, int len){

if ( _data == null || _offset >= _data.length ){

if ( _nextChunk >= _numChunks )
if ( _currentChunkIdx + 1 >= _numChunks )
return -1;

_data = getChunk( _nextChunk );
_data = getChunk( ++_currentChunkIdx );
_offset = 0;
_nextChunk++;
}

int r = Math.min( len , _data.length - _offset );
Expand All @@ -148,9 +146,39 @@ public int read(byte[] b, int off, int len){
return r;
}

/**
* Will smartly skips over chunks without fetching them if possible.
*/
public long skip(long numBytesToSkip) throws IOException {
if (numBytesToSkip <= 0)
return 0;

if (_currentChunkIdx == _numChunks)
//We're actually skipping over the back end of the file, short-circuit here
//Don't count those extra bytes to skip in with the return value
return 0;

if (_offset + numBytesToSkip <= _chunkSize) {
//We're skipping over bytes in the current chunk, adjust the offset accordingly
_offset += numBytesToSkip;
if (_data == null && _currentChunkIdx < _numChunks)
_data = getChunk(_currentChunkIdx);

return numBytesToSkip;
}

//We skipping over the remainder of this chunk, could do this less recursively...
long skippedBytes = _chunkSize - _offset;
_offset = 0;
++_currentChunkIdx;
_data = null;

return skippedBytes + skip(numBytesToSkip - skippedBytes);
}

final int _numChunks;

int _nextChunk = 0;
int _currentChunkIdx = -1;
int _offset;
byte[] _data = null;
}
Expand Down
5 changes: 5 additions & 0 deletions src/main/com/mongodb/util/MyAsserts.java
Expand Up @@ -73,6 +73,11 @@ public static void assertEquals( short a , short b ){
if ( a != b )
throw new MyAssert( "" + a + " != " + b );
}

public static void assertEquals( byte expected , byte result ) {
if ( expected != result )
throw new MyAssert( "" + expected + " != " + result );
}

public static void assertEquals( double a , double b , double diff ){
if ( Math.abs( a - b ) > diff )
Expand Down
63 changes: 62 additions & 1 deletion src/test/com/mongodb/gridfs/GridFSTest.java
Expand Up @@ -167,10 +167,11 @@ public void testBadChunkSize() throws Exception {
fileSize = 10 * 1024 * 1024;

byte[] randomBytes = new byte[fileSize];
for (int idx = 0; idx < 2 * GridFS.MAX_CHUNKSIZE; ++idx)
for (int idx = 0; idx < fileSize; ++idx)
randomBytes[idx] = (byte)(256 * Math.random());

GridFSInputFile inputFile = _fs.createFile(randomBytes);
inputFile.setFilename("bad_chunk_size.bin");
try{
inputFile.save(0);
fail("should have received an exception about a chunk size being zero");
Expand All @@ -197,6 +198,66 @@ public void testBadChunkSize() throws Exception {
assertArrayEquals(randomBytes, savedFileBytes);
}

@Test(groups = {"basic"})
public void testInputStreamSkipping() throws Exception {
//int chunkSize = 5;
int chunkSize = GridFS.DEFAULT_CHUNKSIZE;
int fileSize = 7 * chunkSize;


byte[] fileBytes = new byte[fileSize];
for (int idx = 0; idx < fileSize; ++idx)
fileBytes[idx] = (byte)(idx % 251);
//Don't want chunks to be aligned at byte position 0

GridFSInputFile inputFile = _fs.createFile(fileBytes);
inputFile.setFilename("input_stream_skipping.bin");
inputFile.save(chunkSize);

GridFSDBFile savedFile = _fs.findOne(new BasicDBObject("_id", inputFile.getId()));
GridFSDBFile.MyInputStream inputStream = (GridFSDBFile.MyInputStream)savedFile.getInputStream();

//Quick run-through, make sure the file is as expected
for (int idx = 0; idx < fileSize; ++idx)
assertEquals((byte)(idx % 251), (byte)inputStream.read());

inputStream = (GridFSDBFile.MyInputStream)savedFile.getInputStream();

int position = 0;
assertEquals((byte)(position++ % 251), (byte)inputStream.read());

long skipped = inputStream.skip(1);
assertEquals(1, skipped);
position += 1;
assertEquals((byte)(position++ % 251), (byte)inputStream.read());

skipped = inputStream.skip(chunkSize);
assertEquals(chunkSize, skipped);
position += chunkSize;
assertEquals((byte)(position++ % 251), (byte)inputStream.read());

skipped = inputStream.skip(-1);
assertEquals(0, skipped);
skipped = inputStream.skip(0);
assertEquals(0, skipped);

skipped = inputStream.skip(3 * chunkSize);
assertEquals(3 * chunkSize, skipped);
position += 3 * chunkSize;
assertEquals((byte)(position++ % 251), (byte)inputStream.read());

//Make sure skipping works when we skip to an exact chunk boundary
long toSkip = inputStream.available();
skipped = inputStream.skip(toSkip);
assertEquals(toSkip, skipped);
position += toSkip;
assertEquals((byte)(position++ % 251), (byte)inputStream.read());

skipped = inputStream.skip(2 * fileSize);
assertEquals(fileSize - position, skipped);
assertEquals(-1, inputStream.read());
}

final DB _db;
final GridFS _fs;

Expand Down

0 comments on commit e964822

Please sign in to comment.