Skip to content

Commit

Permalink
Implemented deserialization of the 4 object types
Browse files Browse the repository at this point in the history
  • Loading branch information
tarruda committed Jan 9, 2013
1 parent aab65df commit fc8054c
Show file tree
Hide file tree
Showing 7 changed files with 339 additions and 54 deletions.
16 changes: 10 additions & 6 deletions src/js/blob.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,24 @@ var util = require('util')
, common = require('./common');


function Blob(content) {
function Blob(contents) {
this.constructor.super_.call(this);
this.content = content;
this.contents = contents;
}
util.inherits(Blob, common.GitObject);

Blob.prototype.serialize = function(visitor) {
var content = this.content;
var contents = this.contents;

if (typeof content === 'string') content = new Buffer(content, 'utf8');
if (typeof contents === 'string') contents = new Buffer(contents, 'utf8');

return this._serialize(content, visitor);
return this._serialize(contents, visitor);
};

Blob.prototype.typeCode = 3;
Blob.deserialize = function(contents) {
var info = common.GitObject.getObjectInfo('blob', contents);

return [new Blob(info.contents), info.hash];
};

module.exports = Blob;
46 changes: 45 additions & 1 deletion src/js/commit.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,50 @@ Commit.prototype.serialize = function(visitor) {
return this._serialize(new Buffer(contentArray.join('\n')), visitor);
};

Commit.prototype.typeCode = 1;
Commit.deserialize = function(contents) {
var pos, tree, author, committer, date, message, match
, parents = []
, info = common.GitObject.getObjectInfo('commit', contents);

// tree
match = /^tree\s([0-9a-f]{40})$/.exec(info.contents.slice(0, 45));
if (!match)
throw new Error('commit missing tree');
tree = match[1];
pos = 46; // linefeed

// parents
while (match = /^parent\s([0-9a-f]{40})$/.exec(
info.contents.slice(pos, pos + 47).toString('utf8'))) {
parents.push(match[1]);
pos += 48;
}

// author
match = /^author\s(.+\s<.*>)\s(.+)$/.exec(info.contents.slice(
pos, common.findLinefeed(info.contents, pos)).toString('utf8'));
if (!match)
throw new Error('commit missing author');
author = match[1];
date = common.parseDate(match[2]);
pos += Buffer.byteLength(match[0]) + 1;

// committer
// FIXME ignoring commit date
match = /^committer\s(.+\s<.*>)\s(?:.+)$/.exec(info.contents.slice(
pos, common.findLinefeed(info.contents, pos)).toString('utf8'));
if (!match)
throw new Error('commit missing committer');
committer = match[1];
pos += Buffer.byteLength(match[0]) + 3;

// message
message = info.contents.slice(pos).toString('utf8');

return [
new Commit(tree, author, committer, date, message, parents)
, info.hash
];
};

module.exports = Commit;
98 changes: 85 additions & 13 deletions src/js/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,63 @@ function timestamp(date) {
date = date || new Date();
stamp = Math.ceil(date.getTime() / 1000).toString();
zone = formatTimezone(date);
return stamp + zone;
return stamp + ' ' + zone;
}

function formatTimezone(date) {
var m
, str = date.toLocaleString();
m = /GMT(.\d{4})/.exec(str);
if (m) {
return " " + m[1];
}
return '';
var m, hOffset, mOffset
, offset = date.getTimezoneOffset();

hOffset = padLeft(Math.floor(offset / 60), 2, '0');
mOffset = padLeft(offset % 60, 2, '0');

if (offset > 0)
return '-' + hOffset + mOffset;
return '+' + hOffset + mOffset;
}

function padLeft(s, l, c) {
s = s.toString();
if (l < s.length) return s;
else return Array(l - s.length + 1).join(c || ' ') + s;
}

// FIXME for now this function only supports git internal format
function parseDate(dateStr) {
var epoch
, match = /(\d+)\s(?:\+|-)(?:\d{4})/.exec(dateStr);

if (!match)
throw new Error('Failed to parse date');

epoch = parseInt(match[1], 10);
return new Date(epoch * 1000);
}

function findInBuffer(buffer, pos, b) {
if (!pos)
pos = 0;

while (buffer[pos] !== b) pos++;

return pos;
}

function findLinefeed(buffer, pos) {
return findInBuffer(buffer, pos, 10);
}

function findNull(buffer, pos) {
return findInBuffer(buffer, pos, 0);
}

function removeObjectHeader(objData) {
var i = 0;

while (objData[i] !== 0)
i++;

return objData.slice(i + 1);
}

function invoke(fn, context, arg) {
Expand All @@ -35,8 +81,7 @@ GitObject.prototype._serialize = function(content, visitor){
var rv, hash, packData
, type = this.constructor.name.toLowerCase()
, header = new Buffer(type + " " + content.length)
, data = Buffer.concat([header, NULL, content])
, _this = this;
, data = Buffer.concat([header, NULL, content]);

rv = {
getHash: function() {
Expand All @@ -60,15 +105,42 @@ GitObject.prototype._serialize = function(content, visitor){
, getType: function() {
return type;
}
, getTypeCode: function() {
return _this.typeCode;
}
};
invoke(visitor, this, rv);
return rv;
};

GitObject.getObjectInfo = function(type, contents) {
var rv, header, hash, match
, fullContents = contents;

if (contents.slice(0, type.length).toString('utf8') !== type) {
// append header so the hash can be calculated
header = new Buffer(type + " " + contents.length);
fullContents = Buffer.concat([header, NULL, contents]);
} else {
// remove header for return value
contents = removeObjectHeader(contents);
// assert that the header is valid
header = fullContents.slice(0, fullContents.length -
(contents.length + 1));
match = /^(\w+)\s(\d+)$/.exec(header.toString('utf8'));
if (!match || match[1] !== type ||
parseInt(match[2], 10) !== contents.length)
throw new Error('invalid object header');
}

hash = crypto.createHash('sha1');
hash.update(fullContents);

return {hash: hash.digest('hex'), contents: contents};
};


exports.timestamp = timestamp;
exports.parseDate = parseDate;
exports.GitObject = GitObject;
exports.NULL = NULL;
exports.SHA1 = /^[0-9a-f]{40}$/i;
exports.findLinefeed = findLinefeed;
exports.findNull = findNull;
62 changes: 57 additions & 5 deletions src/js/pack.js
Original file line number Diff line number Diff line change
@@ -1,17 +1,35 @@
var crypto = require('crypto')
var i, codes, types
, crypto = require('crypto')
, zlib = require('./zlib')
, Commit = require('./commit')
, Tree = require('./tree')
, Blob = require('./blob')
, Tag = require('./tag')
, MAGIC = 'PACK';


codes = {
commit: {code: 1, cls: Commit}
, tree: {code: 2, cls: Tree}
, blob: {code: 3, cls: Blob}
, tag: {code: 4, cls: Tag}
, ofsdelta: {code: 6}
, refdelta: {code: 7}
};
types = {};
Object.keys(codes).forEach(function(k) {
types[codes[k].code] = codes[k].cls;
});

// this implementation is based on the information at
// http://www.kernel.org/pub/software/scm/git/docs/technical/pack-format.txt
function Pack(objects) {
this.objects = objects || [];
}

// FIXME this function does not currently applies delta compression to
// FIXME this class does not currently applies delta compression to
// similar objects in the pack, so it is mostly useful for sending
// a relatively small amount of git objects to a remote repository
// small amounts of git objects to a remote repository
Pack.prototype.serialize = function() {
var key, object, serialized, header, typeBits, data, encodedHeader
, packContent, encodedHeaderBytes, deflated, checksum
Expand Down Expand Up @@ -41,7 +59,7 @@ Pack.prototype.serialize = function() {
for (key in processed) {
serialized = processed[key];
// calculate the object header
typeBits = serialized.getTypeCode() << 4;
typeBits = codes[serialized.getType()].code << 4;
// the header is only used for loose objects. in packfiles they
// should not be used
data = serialized.getPackData();
Expand All @@ -61,6 +79,40 @@ Pack.prototype.serialize = function() {
return Buffer.concat(contentArray);
}

Pack.deserialize = function(buffer) {
var i, count, pos, type, entryHeader, inflatedEntry, inflatedData
, deserialized
, objectsById = {} // used after parsing objects to connect references
, rv = new Pack();

// verify magic number
if (buffer.slice(0, 4).toString('utf8') !== MAGIC)
throw new Error('Invalid pack magic number');

// only accept version 2 packs
if (buffer.readUint32BE(4) !== 2)
throw new Error('Invalid pack version');

count = buffer.readUint32BE(8);
pos = 12;

// unpack all objects
for (i = 0;i < count;i++) {
cls = types[(buffer[pos] & 0x70) >>> 4].cls;
if (!cls)
throw new Error('invalid pack entry type code');
entryHeader = decodePackEntryHeader(buffer, pos);
size = entryHeader[0];
pos = entryHeader[1];
inflatedEntry = zlib.inflate(buffer.slice(pos), size);
inflatedData = inflatedEntry[0];
pos = inflatedEntry[1];
deserialized = cls.deserialize(inflatedData);
}


};

function encodePackEntrySize(size) {
// this is an adaptation of LEB128: http://en.wikipedia.org/wiki/LEB128
// with the difference that the first byte will contain type information
Expand All @@ -81,7 +133,7 @@ function encodePackEntrySize(size) {
return bytes;
}

function decodePackEntrySize(buffer, offset) {
function decodePackEntryHeader(buffer, offset) {
var bits = 4
, byte = buffer[offset] & 0xf
, rv = byte;
Expand Down
46 changes: 45 additions & 1 deletion src/js/tag.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,50 @@ Tag.prototype.serialize = function(visitor) {
return this._serialize(new Buffer(contentArray.join('\n')), visitor);
};

Tag.prototype.typeCode = 4;
Tag.deserialize = function(contents) {
var pos, object, type, tag, tagger, date, message, match
, info = common.GitObject.getObjectInfo('tag', contents);

// object
match = /^object\s([0-9a-f]{40})$/.exec(
info.contents.slice(0, 47).toString('utf8'));
if (!match)
throw new Error('tag missing object');
object = match[1];
pos = 48;

// type
match = /^type\s(commit|tree|blob)$/.exec(info.contents.slice(
pos, common.findLinefeed(info.contents, pos)).toString('utf8'));
if (!match)
throw new Error('tag missing type');
type = match[1];
pos += match[0].length + 1;

// tag name
match = /^tag\s(.+)$/.exec(info.contents.slice(
pos, common.findLinefeed(info.contents, pos)).toString('utf8'));
if (!match)
throw new Error('tag missing name');
tag = match[1];
pos += Buffer.byteLength(match[0]) + 1;

// tagger
match = /^tagger\s(.+\s<.*>)\s(.+)$/.exec(info.contents.slice(
pos, common.findLinefeed(info.contents, pos)).toString('utf8'));
if (!match)
throw new Error('tag missing tagger');
tagger = match[1];
date = common.parseDate(match[2]);
pos = Buffer.byteLength(match[0]) + 3;

// message
message = info.contents.slice(pos).toString('utf8');

return [
new Tag(object, tag, tagger, date, message, type)
, info.hash
];
};

module.exports = Tag;
28 changes: 27 additions & 1 deletion src/js/tree.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,32 @@ Tree.prototype.serialize = function(visitor) {
return this._serialize(Buffer.concat(contentArray), visitor);
};

Tree.prototype.typeCode = 2;
Tree.deserialize = function(contents) {
var childName, hash, hashStart
, match
, pos = 0
, children = {}
, info = common.GitObject.getObjectInfo('tree', contents);

while (pos < info.contents.length) {
// find the blob/tree name/mode
// FIXME for now this implementation is ignoring file modes
match = /^\d+\s(.+)$/.exec(info.contents.slice(
pos, common.findNull(info.contents, pos)).toString('utf8'));
if (!match)
throw new Error('could not parse tree');
childName = match[1];
hashStart = pos + Buffer.byteLength(match[0]) + 1; // skip NULL
hash = info.contents.slice(hashStart, hashStart + 20);
children[childName] = hash.toString('hex');
pos = hashStart + 20;
}

// pos should equal the length by now
if (pos !== info.contents.length)
throw new Error('could not parse tree');

return [new Tree(children), info.hash];
};

module.exports = Tree;
Loading

0 comments on commit fc8054c

Please sign in to comment.