Permalink
Browse files

discooutputstream: check outputstream size and hunk size

  • Loading branch information...
dangra committed Aug 24, 2011
1 parent 40786e1 commit 0d7815cf61d9fa7c5970534c057494c081619454
Showing with 42 additions and 1 deletion.
  1. +1 −1 lib/disco/fileutils.py
  2. +41 −0 tests/test_chunker.py
View
@@ -41,7 +41,7 @@ def chunks(self, records):
yield self.dumpout(out)
out = self.makeout()
out.append(record)
if out.hunk_size:
if out.size or out.hunk_size:
yield self.dumpout(out)
def dumpout(self, out):
View
@@ -0,0 +1,41 @@
from itertools import count
from cPickle import dumps
from unittest import TestCase
from disco.fileutils import Chunker, CHUNK_SIZE, HUNK_SIZE
def _records():
for v in count():
yield (v, v)
def _sizelimitedinput(max_size):
size = 0
for v in count():
record = (v, v)
yield record
# use same serializer than DiscoOutputStream
size += len(dumps(record, 1))
if size >= max_size:
break
class ChunkerTestCase(TestCase):
def test_exactly_a_hunk(self):
records = _sizelimitedinput(HUNK_SIZE)
chunker = Chunker(chunk_size=HUNK_SIZE * 2)
chunks = list(chunker.chunks(records))
self.assertEqual(len(chunks), 1)
def test_less_than_a_hunk(self):
records = _sizelimitedinput(HUNK_SIZE / 2)
chunker = Chunker(chunk_size=HUNK_SIZE * 2)
chunks = list(chunker.chunks(records))
self.assertEqual(len(chunks), 1)
def test_more_than_a_hunk(self):
chunk_size = HUNK_SIZE * 2
records = _sizelimitedinput(chunk_size)
chunker = Chunker(chunk_size=chunk_size)
chunks = list(chunker.chunks(records))
self.assertEqual(len(chunks), 1)

0 comments on commit 0d7815c

Please sign in to comment.