Skip to content

Commit

Permalink
Implement postinglist splits in NewPostingList
Browse files Browse the repository at this point in the history
  • Loading branch information
BjarniRunar committed Oct 23, 2014
1 parent 0909912 commit 721718a
Showing 1 changed file with 35 additions and 2 deletions.
37 changes: 35 additions & 2 deletions mailpile/postinglist.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,41 @@ def save(self, split=True):
TIMERS['save_count'] += 1

def _splits(self):
# FIXME
return [self]
splits = [self]
if len(self.sig) < self.MAX_HASH_LEN:
total, sums = 0, {}
for sig, values in self.words.iteritems():
total += len(values)
if len(values) >= (self.MAX_ITEMS / 2):
nsig = sig[:self.MAX_HASH_LEN]
else:
nsig = sig[:len(self.sig)+1]
if nsig in sums:
sums[nsig] += len(values)
else:
sums[nsig] = len(values)

while total > self.MAX_ITEMS and sums:
skeys = sums.keys()
skeys.sort(key=lambda k: -sums[k])
nsig = skeys[0]
total -= sums[nsig]
del sums[nsig]
try:
fn = self._SaveFile(self.config, nsig)
if not os.path.exists(fn):
open(fn, 'w').close()

plc = PostingListContainer(self.session, nsig)
for sig in list(self.words.keys()):
if sig.startswith(nsig):
plc.add(sig, self.words[sig])
del self.words[sig]
splits.append(plc)
except (OSError, IOError):
pass

return splits

def _load(self):
t0 = time.time()
Expand Down

0 comments on commit 721718a

Please sign in to comment.