Permalink
Browse files

Fixed packfile serialization and added test for it

  • Loading branch information...
1 parent 446f0d5 commit 0305882ecc904068518f271430ddf45a15282a52 @tarruda committed Jan 9, 2013
Showing with 166 additions and 75 deletions.
  1. +9 −2 Makefile
  2. +1 −0 package.json
  3. +3 −0 src/cpp/zlib.cpp
  4. +2 −0 src/js/common.js
  5. +1 −0 src/js/index.js
  6. +28 −18 src/js/pack.js
  7. +14 −13 src/js/zlib.js
  8. +108 −42 test/all.coffee
View
@@ -2,7 +2,14 @@ runtestdir = \
@ls $(1)/*.coffee | xargs \
./node_modules/.bin/mocha --compilers coffee:coffee-script -u $(2) --colors
-test:
+build-addon:
+ @./node_modules/.bin/node-gyp configure
+ @./node_modules/.bin/node-gyp build
+
+test: build-addon
$(call runtestdir, "./test", "tdd")
-.PHONY: test
+clean:
+ @./node_modules/.bin/node-gyp clean
+
+.PHONY: test build-addon clean
View
@@ -15,6 +15,7 @@
"git-objects"
],
"devDependencies": {
+ "node-gyp": "*",
"wrench": "*",
"less": "*",
"mocha": "*",
View
@@ -1,4 +1,7 @@
// helper zlib functions for easily inflating/deflating git packed objects
+// the default node.js zlib library cannot be used for parsing packfiles,
+// since it does not tell the amount of bytes consumed from input stream,
+// and this is needed when inflating pack entries
#include <node.h>
#include <node_buffer.h>
#include <zlib.h>
View
@@ -26,6 +26,8 @@ function invoke(fn, context, arg) {
}
function GitObject() {
+ // since when packing a git object graph we may end up visiting
+ // the same object twice, this id is used to avoid duplicates
this._id = ++id;
}
View
@@ -3,4 +3,5 @@ module.exports = {
, Tree: require('./tree')
, Commit: require('./commit')
, Tag: require('./tag')
+ , Pack: require('./pack')
};
View
@@ -8,10 +8,13 @@ function Pack(objects) {
this.objects = objects || [];
}
-// FIXME this function does not currently applies delta compression
+// FIXME this function does not currently applies delta compression to
+// similar objects in the pack, so it is mostly useful for sending
+// a relatively small amount of git objects to a remote repository
Pack.prototype.toBuffer = function() {
var key, object, buffer, header, typeBits, data, encodedHeader, packContent
- , hash, encodedHeaderBytes, deflated, checksum
+ , encodedHeaderBytes, deflated, checksum
+ , hash = crypto.createHash('sha1')
, contentArray = []
, processed = {};
@@ -29,32 +32,37 @@ Pack.prototype.toBuffer = function() {
header = new Buffer(12);
header.write("PACK");
header.writeUInt32BE(2, 4);
- header.writeUInt32BE(Object.keys(this.objects).length, 8);
+ header.writeUInt32BE(Object.keys(processed).length, 8);
contentArray.push(header);
+ hash.update(header);
// start packing objects
for (key in processed) {
buffer = processed[key];
// calculate the object header
- typeBits = obj.typeCode << 4;
- data = removeObjectHeader(obj.data);
+ typeBits = buffer.typeCode << 4;
+ // the header is only used for loose objects. in packfiles they
+ // should not be used
+ data = removeObjectHeader(buffer.data);
encodedHeaderBytes = encodePackEntrySize(data.length);
encodedHeaderBytes[0] = encodedHeaderBytes[0] | typeBits;
encodedHeader = new Buffer(encodedHeaderBytes);
- contentArray.push(encodedHeader);
deflated = zlib.deflate(data);
+ contentArray.push(encodedHeader);
+ contentArray.push(deflated);
+ hash.update(encodedHeader);
+ hash.update(deflated);
}
// prepare the buffer to be returned and calculate trailing checksum
- packContent = Buffer.concat([contentArray]);
- hash = crypto.createHash('sha1');
- hash.update(packContent);
+ packContent = Buffer.concat(contentArray);
checksum = new Buffer(hash.digest('hex'), 'hex');
+
return Buffer.concat([packContent, checksum]);
}
function removeObjectHeader(buffer) {
- var i;
+ var i = 0;
while (buffer[i] != 0)
i++;
@@ -64,12 +72,12 @@ function removeObjectHeader(buffer) {
function encodePackEntrySize(size) {
- // The first byte will only contain the first 4 bits
- // since 3 bits will be reserved for holding type information
- // and that will be added outside this function
- var current = size >>> 4
- , lastByte = size & 0xf
- , bytes = [lastByte];
+ // this is a variant of LEB128: http://en.wikipedia.org/wiki/LEB128
+ // with the difference that the first byte will contain type information
+ // in the first 3 bits
+ var lastByte = size & 0xf
+ , bytes = [lastByte]
+ , current = size >>> 4;
while (current > 0) {
// Set the most significant bit for the last processed byte to signal
@@ -84,8 +92,8 @@ function encodePackEntrySize(size) {
}
function decodePackEntrySize(buffer, offset) {
- var bits = 4;
- , byte = buffer[offset] & 0xf;
+ var bits = 4
+ , byte = buffer[offset] & 0xf
, rv = byte;
while (buffer[offset++] & 0x80) {
@@ -96,3 +104,5 @@ function decodePackEntrySize(buffer, offset) {
return [rv, pos];
}
+
+module.exports = Pack;
View
@@ -1,15 +1,20 @@
var _zlib = require('../../build/Release/binding.node')
, returnCodes = {
- Z_OK: binding.Z_OK,
- Z_STREAM_END: binding.Z_STREAM_END,
- Z_NEED_DICT: binding.Z_NEED_DICT,
- Z_ERRNO: binding.Z_ERRNO,
- Z_STREAM_ERROR: binding.Z_STREAM_ERROR,
- Z_DATA_ERROR: binding.Z_DATA_ERROR,
- Z_MEM_ERROR: binding.Z_MEM_ERROR,
- Z_BUF_ERROR: binding.Z_BUF_ERROR,
- Z_VERSION_ERROR: binding.Z_VERSION_ERROR
+ Z_OK: _zlib.Z_OK,
+ Z_STREAM_END: _zlib.Z_STREAM_END,
+ Z_NEED_DICT: _zlib.Z_NEED_DICT,
+ Z_ERRNO: _zlib.Z_ERRNO,
+ Z_STREAM_ERROR: _zlib.Z_STREAM_ERROR,
+ Z_DATA_ERROR: _zlib.Z_DATA_ERROR,
+ Z_MEM_ERROR: _zlib.Z_MEM_ERROR,
+ Z_BUF_ERROR: _zlib.Z_BUF_ERROR,
+ Z_VERSION_ERROR: _zlib.Z_VERSION_ERROR
};
+// idea stolen from node.js zlib bindings
+Object.keys(returnCodes).forEach(function(k) {
+ returnCodes[returnCodes[k]] = k;
+});
+
function checkBuffer(data) {
if (!(data instanceof Buffer))
@@ -43,9 +48,5 @@ function inflate(data, inflatedsize) {
}
-Object.keys(returnCodes).forEach(function(k) {
- returnCodes[returnCodes[k]] = k;
-});
-
exports.deflate = deflate;
exports.inflate = inflate;
View
@@ -5,7 +5,8 @@ zlib = require 'zlib'
wrench = require 'wrench'
{spawn} = require 'child_process'
{expect} = require 'chai'
-{Blob, Tree, Commit, Tag} = require '../src/js'
+{Blob, Tree, Commit, Tag, Pack} = require '../src/js'
+_zlib = require '../src/js/zlib'
createGitRepo = (done) ->
@@ -17,47 +18,18 @@ createGitRepo = (done) ->
deleteGitRepo = -> wrench.rmdirSyncRecursive(@path, true)
-suite 'git repository manipulation', ->
-
- setup createGitRepo
-
- teardown deleteGitRepo
-
- test 'write git graph to repository and check integrity', (done) ->
- d1 = new Date 1000000000
- d2 = new Date 2000000000
- d3 = new Date 3000000000
- b1 = new Blob 'test content\n'
- b2 = new Blob 'new test content\n'
- b3 = new Blob 'subdir test content\n'
- t1 = new Tree {
- 'file-under-tree': b3
- }
- t2 = new Tree {
- 'some-file': b1
- 'some-file.txt': b2
- 'sub-directory.d': t1
- }
- t3 = new Tree {
- 'another-file.txt': b1
- }
- author = 'Thiago de Arruda <user@domain.com>'
- c1 = new Commit t1, author, null, d1, "Artificial commit 1"
- c2 = new Commit t2, author, null, d2, "Artificial commit 2", [c1]
- c3 = new Commit t3, author, null, d3, "Artificial commit 3", [c2]
- writeGitGraph @path, c3, 'master', =>
- tag = new Tag c2, 'v0.0.1', author, d2, 'Tag second commit'
- writeGitGraph @path, tag, tag.name, =>
- buf = []
- git = spawn 'git', ['fsck'], cwd: @path
- git.stdout.setEncoding('utf8')
- push = (chunk) -> buf.push(chunk)
- end = =>
- expect(buf.join '' ).to.equal ''
- done()
- git.stderr.on('data', push)
- git.stderr.on('end', end)
-
+captureOutput = (child, cb) ->
+ out = []
+ err = []
+ child.stdout.setEncoding 'utf8'
+ child.stderr.setEncoding 'utf8'
+ child.stdout.on 'data', (chunk) ->
+ out.push chunk
+ child.stderr.on 'data', (chunk) ->
+ err.push chunk
+ child.stderr.on 'end', ->
+ cb(out.join(''), err.join(''))
+
writeGitGraph = (repo, root, refName, cb) ->
count = 0
writeCb = ->
@@ -87,3 +59,97 @@ writeGitBuffer = (repo, buffer, cb) ->
if typeof cb == 'function' then bufferFile.on('close', cb)
bufferFile.on 'error', (err) ->
if typeof cb == 'function' then cb()
+
+suite 'git repository manipulation', ->
+
+ suiteSetup createGitRepo
+
+ suiteTeardown deleteGitRepo
+
+ setup (done) ->
+ # some git objects to work on the tests
+ d1 = new Date 1000000000
+ d2 = new Date 2000000000
+ d3 = new Date 3000000000
+ b1 = new Blob 'test content\ntest content2\ntest content3\n'
+ # this encode second blob as a delta of the first in packfiles
+ b2 = new Blob 'test content\ntest content2\ntest content3\nappend'
+ b3 = new Blob 'subdir test content\n'
+ t1 = new Tree {
+ 'file-under-tree': b3
+ }
+ t2 = new Tree {
+ 'some-file': b1
+ 'some-file.txt': b2
+ 'sub-directory.d': t1
+ }
+ t3 = new Tree {
+ 'another-file.txt': b1
+ }
+ author = 'Git User <user@domain.com>'
+ c1 = new Commit t1, author, null, d1, "Artificial commit 1"
+ c2 = new Commit t2, author, null, d2, "Artificial commit 2", [c1]
+ # keep master head and tag references
+ @c3 = new Commit t3, author, null, d3, "Artificial commit 3", [c2]
+ @tag = new Tag c2, 'v0.0.1', author, d2, 'Tag second commit'
+ # write some the objects to the repository
+ writeGitGraph @path, @c3, 'master', =>
+ writeGitGraph @path, @tag, @tag.name, done
+
+ test 'check repository integrity', (done) ->
+ gitFsck = spawn 'git', ['fsck', '--strict'], cwd: @path
+ captureOutput gitFsck, (stdout, stderr) ->
+ expect(stdout).to.equal ''
+ expect(stderr).to.equal ''
+ done()
+
+ test 'unpack objects in repository', (done) ->
+ # delete all git objects written so git-unpack-objects will
+ # actually unpack all objects
+ objectsDir = path.join(@path, '.git', 'objects')
+ find = spawn 'find', [objectsDir, '-type', 'f', '-delete']
+ captureOutput find, (stdout, stderr) =>
+ expect(stdout).to.equal ''
+ expect(stderr).to.equal ''
+ # git-fsck should report errors since there are broken refs
+ gitFsck = spawn 'git', ['fsck', '--strict'], cwd: @path
+ captureOutput gitFsck, (stdout, stderr) =>
+ expect(stdout).to.equal ''
+ expect(stderr).to.match /HEAD\:\s+invalid\s+sha1\s+pointer/
+ # lets invoke git-unpack-objects passing our packed stream
+ # so the repository will be repopulated
+ pack = new Pack [@c3, @tag]
+ gitUnpack = spawn 'git', ['unpack-objects', '-q', '--strict'],
+ cwd: @path
+ gitUnpack.stdin.end(pack.toBuffer())
+ captureOutput gitUnpack, (stdout, stderr) =>
+ expect(stdout).to.equal ''
+ expect(stderr).to.equal ''
+ # git-fsck should be happy again
+ gitFsck = spawn 'git', ['fsck', '--strict'], cwd: @path
+ captureOutput gitFsck, (stdout, stderr) =>
+ expect(stdout).to.equal ''
+ expect(stderr).to.equal ''
+ done()
+
+
+suite 'zlib binding', ->
+ test 'deflate/inflate some data synchronously', ->
+ data = new Buffer 30
+ data.fill 'a'
+ deflated = _zlib.deflate data
+ mixedData = Buffer.concat [
+ deflated
+ new Buffer([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+ ]
+ # the following code is the reason why this zlib binding was needed:
+ # packfiles contain deflated data mixed with other data, so to
+ # advance properly in the packfile stream, we need to know how
+ # many bytes each deflated sequence uses
+ # we also to pass the original data size (which is available
+ # on packfiles)so inflate can efficiently allocate memory to
+ # hold output
+ [inflated, bytesRead] = _zlib.inflate mixedData, data.length
+ expect(inflated.toString()).to.equal data.toString()
+ expect(bytesRead).to.equal deflated.length
+

0 comments on commit 0305882

Please sign in to comment.