Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

woohoo! clone using partial pack caches works awesome

  • Loading branch information...
commit b11dac852e5ccfd288539707b244306f05908f32 1 parent 1b2946e
@schacon authored
Showing with 45 additions and 23 deletions.
  1. +35 −10 dulwich/agitmemnon.py
  2. +9 −12 dulwich/pack.py
  3. +1 −1  dulwich/server.py
View
45 dulwich/agitmemnon.py
@@ -114,7 +114,6 @@ def load_next_revtree_hunk(self):
o = self.get_super('RevTree', self.repo_name, 100000)
nilsha = '0000000000000000000000000000000000000000'
for col in o:
- print col.name
self.revtree[col.name] = []
for sup in col.columns:
objects = sup.value.split(":")
@@ -150,19 +149,33 @@ def fetch_objects(self, determine_wants, graph_walker, progress):
def partial_sender(self, objects, f, entries):
# PackCacheIndex (projectname) [(cache_key) => (list of objects/offset/size), ...]
- objs = {}
+ sent = set()
+ objs = set()
for sha, path in objects.itershas():
- objs[sha] = true
+ objs.add(sha)
+ index = a.get('PackCacheIndex', self.repo_name)
+
# parse cache_index entries, figure out what we need to pull
# (which caches have enough objects that we need)
# "sha:offset:size:base_sha\n"
-
- # pull each partial cache and send all the objects that are needed
- # cache = self.get('PackCache', cache_key)
-
- # add each sent object to the sent[] array to return
- # return the sent[] array
+ for cache in index:
+ # cache.name
+ cacheobjs = set()
+ entries = cache.value.split("\n")
+ if '' in entries:
+ entries.remove('')
+ for entry in entries:
+ (sha, offset, size, ref) = entry.split(":")
+ cacheobjs.add(sha)
+ if len(cacheobjs - objs) == 0:
+ # pull each partial cache and send all the objects that are needed
+ data = self.get_value('PackCache', cache.name, 'data')
+ data = base64.b64decode(data)
+ f.write(data)
+ sent = sent | cacheobjs # add each sent object to the sent[] array to return
+
+ return sent # return the sent[] array
def get_refs(self):
"""Get dictionary with all refs."""
@@ -228,11 +241,23 @@ def __init__(self):
self.partial_sender = self.repo.partial_sender
-#a = Agitmemnon()
+a = Agitmemnon()
#a.repo_name = 'fuzed2'
#a.load_next_revtree_hunk()
#print a.revtree
+#index = a.get('PackCacheIndex', 'fuzed2')
+#myset = set()
+#for cache in index:
+# print cache.name
+# entries = cache.value.split("\n")
+# if '' in entries:
+# entries.remove('')
+# for entry in entries:
+# (sha, offset, size, ref) = entry.split(":")
+# myset.add(sha)
+# print myset
+
#print a.get_object('7486f4075d2b9307d02e3905c69e28e456a51a32')[0].value
#print a['7486f4075d2b9307d02e3905c69e28e456a51a32'].get_parents()
#print a.get_object('7486f4075d2b9307d02e3905c69e28e456a51a32')
View
21 dulwich/pack.py
@@ -818,7 +818,7 @@ def write_pack(filename, objects, num_objects):
write_pack_index_v2(filename + ".idx", entries, data_sum)
-def write_pack_data(f, objects, num_objects, window=10, progress=None, partial_sender=None):
+def write_pack_data(f, objects, num_objects, window=10, progress=None, backend=None):
"""Write a new pack file.
:param filename: The filename of the new pack file.
@@ -836,19 +836,16 @@ def write_pack_data(f, objects, num_objects, window=10, progress=None, partial_s
f.write(struct.pack(">L", 2)) # Pack version
f.write(struct.pack(">L", num_objects)) # Number of objects in pack
- #if partial_sender && (num_objects > 500):
- # objs = partial_sender(objects, f, entries)
- # objects.remove_objects(objs)
+ sent = set()
+ if backend and (num_objects > 500):
+ sent = backend.partial_sender(objects, f, entries)
- # NOT NECCESARY - DEBUGGING
- count = 0
+ shas = set()
for sha, path in objects.itershas():
- count = count + 1
- progress("looking for cached data: %d.\r" % count)
- progress("looking for cached data: %d.\n" % count)
- # END NOT NECCESARY
-
- for o, path in objects:
+ shas.add(sha)
+
+ for sha in (shas - sent):
+ o = backend.repo[sha]
offset, crc32 = write_pack_object(f, o.type, o.as_raw_string())
entries.append((o.sha().digest(), offset, crc32))
return entries, f.write_sha()
View
2  dulwich/server.py
@@ -172,7 +172,7 @@ def next(self):
progress("counting objects: %d, done.\n" % len(objects_iter))
progress("GitHub is collecting your data\n")
write_pack_data(ProtocolFile(None, write), objects_iter,
- len(objects_iter), 10, progress, self.backend.partial_sender)
+ len(objects_iter), 10, progress, self.backend)
progress("Have a nice day!\n")
# we are done
self.proto.write("0000")
Please sign in to comment.
Something went wrong with that request. Please try again.