Permalink
Browse files

add a simple dsorted function to disco.util for easy disk-sorting

  • Loading branch information...
jflatow authored and Ville Tuulos committed Jun 29, 2011
1 parent ec4b25e commit 7b2a0208c44b70f864e03a8773bdaca981e55329
Showing with 24 additions and 0 deletions.
  1. +24 −0 lib/disco/util.py
View
@@ -45,6 +45,30 @@ def __str__((host, port)):
def chainify(iterable):
return list(chain(*iterable))
def dsorted(iterable, buffer_size=1e6, tempdir='.'):
from cPickle import dump, load
from heapq import merge
from itertools import islice
from tempfile import TemporaryFile
def read(handle):
while True:
try:
yield load(handle)
except EOFError:
return
iterator = iter(iterable)
subiters = []
while True:
buffer = sorted(islice(iterator, buffer_size))
handle = TemporaryFile(dir=tempdir)
for item in buffer:
dump(item, handle, -1)
handle.seek(0)
subiters.append(read(handle))
if len(buffer) < buffer_size:
break
return merge(*subiters)
def flatten(iterable):
for item in iterable:
if isiterable(item):

0 comments on commit 7b2a020

Please sign in to comment.