Permalink
Browse files

split: Prevent memory drain from excessively long shalists.

This avoids huge RAM usage when you're splitting a really huge object, plus
git probably doesn't work too well with single trees that contain millions
of objects anyway.
  • Loading branch information...
1 parent eacc2d3 commit a47f3b4d6e0287b69621a96f5e219132bb1220a5 @apenwarr apenwarr committed Jan 6, 2010
Showing with 33 additions and 4 deletions.
  1. +5 −0 cmd-split.py
  2. +20 −1 hashsplit.py
  3. +8 −3 test-sh
View
@@ -17,6 +17,7 @@
bench print benchmark timings to stderr
max-pack-size= maximum bytes in a single pack
max-pack-objects= maximum number of objects in a single pack
+fanout= maximum number of blobs in a single tree
"""
o = options.Options('bup split', optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])
@@ -34,6 +35,10 @@
hashsplit.max_pack_size = int(opt.max_pack_size)
if opt.max_pack_objects:
hashsplit.max_pack_objects = int(opt.max_pack_objects)
+if opt.fanout:
+ hashsplit.fanout = int(opt.fanout)
+if opt.blobs:
+ hashsplit.fanout = 0
start_time = time.time()
View
@@ -8,6 +8,7 @@
split_verbosely = 0
max_pack_size = 1000*1000*1000
max_pack_objects = 10*1000*1000
+fanout = 4096
class Buf:
def __init__(self):
@@ -122,8 +123,26 @@ def split_to_shalist(w, files):
yield ('100644', 'bup.chunk.%016x' % cn, sha)
+def _next(i):
+ try:
+ return i.next()
+ except StopIteration:
+ return None
+
+
def split_to_tree(w, files):
- shalist = list(split_to_shalist(w, files))
+ sl = iter(split_to_shalist(w, files))
+ if not fanout:
+ shalist = list(sl)
+ else:
+ shalist = []
+ tmplist = []
+ for e in sl:
+ tmplist.append(e)
+ if len(tmplist) >= fanout and len(tmplist) >= 3:
+ shalist.append(('40000', tmplist[0][1], w.new_tree(tmplist)))
+ tmplist = []
+ shalist += tmplist
tree = w.new_tree(shalist)
return (shalist, tree)
View
11 test-sh
@@ -16,8 +16,13 @@ bup init
bup split --bench -b <testfile1 >tags1.tmp
bup split -vvvv -b testfile2 >tags2.tmp
bup split -t testfile2 >tags2t.tmp
+bup split -t testfile2 --fanout 3 >tags2tf.tmp
bup split -r "$BUP_DIR" -c testfile2 >tags2c.tmp
diff -u tags1.tmp tags2.tmp || true
+if diff -q tags2t.tmp tags2tf.tmp; then
+ echo "fanout tree same as non-fanout tree!?"
+ false
+fi
wc -c testfile1 testfile2
wc -l tags1.tmp tags2.tmp
bup join $(cat tags1.tmp) >out1.tmp
@@ -32,11 +37,11 @@ diff -u testfile2 out2c.tmp
(
set -e
cd "$BUP_DIR" || exit 1
- git repack -Ad
- git prune
+ #git repack -Ad
+ #git prune
(cd "$TOP/t/sampledata" && bup save -vvn master .) || exit 1
n=$(git fsck --full --strict 2>&1 |
- grep -v 'dangling commit' |
+ egrep -v 'dangling (commit|tree)' |
tee -a /dev/stderr |
wc -l)
if [ "$n" != 0 ]; then

0 comments on commit a47f3b4

Please sign in to comment.