Skip to content

Commit

Permalink
Fixing bug where sampling number of blocks could be less than number …
Browse files Browse the repository at this point in the history
…of blocks to read
  • Loading branch information
prashmohan committed Aug 1, 2011
1 parent 2081232 commit 4ea1b22
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion datadriver/datablocker.py
Expand Up @@ -91,6 +91,12 @@ def get_blocks(self, records):
return [records[indices : indices + block_size] for indices in range(0, num_records, block_size)]


def _sample(dataset, sample_num):
if len(dataset) < sample_num:
return dataset
else:
return random.sample(dataset, sample_num)

class ResamplingBlocker(object):
@staticmethod
def get_blocks_gamma(records, num_blocks, block_size, gamma):
Expand All @@ -108,7 +114,7 @@ def get_blocks_gamma(records, num_blocks, block_size, gamma):
nonfull_blocks = range(num_blocks)

for record in records:
for block_no in random.sample(nonfull_blocks, gamma):
for block_no in _sample(nonfull_blocks, gamma):
blocks[block_no].append(record)
if len(blocks[block_no]) >= block_size:
# Remove block from contenders list if block is
Expand Down

0 comments on commit 4ea1b22

Please sign in to comment.