Implemented deserialization of the 4 object types

tarruda · Jan 9, 2013 · fc8054c · fc8054c
1 parent aab65df
commit fc8054c
Show file tree

Hide file tree

Showing 7 changed files with 339 additions and 54 deletions.
diff --git a/src/js/blob.js b/src/js/blob.js
@@ -2,20 +2,24 @@ var util = require('util')
   , common = require('./common');
 
 
-function Blob(content) {
+function Blob(contents) {
   this.constructor.super_.call(this);
-  this.content = content;
+  this.contents = contents;
 }
 util.inherits(Blob, common.GitObject);
 
 Blob.prototype.serialize = function(visitor) {
-  var content = this.content;
+  var contents = this.contents;
 
-  if (typeof content === 'string') content = new Buffer(content, 'utf8');
+  if (typeof contents === 'string') contents = new Buffer(contents, 'utf8');
 
-  return this._serialize(content, visitor);
+  return this._serialize(contents, visitor);
 };
 
-Blob.prototype.typeCode = 3;
+Blob.deserialize = function(contents) {
+  var info = common.GitObject.getObjectInfo('blob', contents);
+
+  return [new Blob(info.contents), info.hash];
+};
 
 module.exports = Blob;
diff --git a/src/js/commit.js b/src/js/commit.js
@@ -40,6 +40,50 @@ Commit.prototype.serialize = function(visitor) {
   return this._serialize(new Buffer(contentArray.join('\n')), visitor);
 };
 
-Commit.prototype.typeCode = 1;
+Commit.deserialize = function(contents) {
+  var pos, tree, author, committer, date, message, match
+    , parents = []
+    , info = common.GitObject.getObjectInfo('commit', contents);
+
+  // tree
+  match = /^tree\s([0-9a-f]{40})$/.exec(info.contents.slice(0, 45));
+  if (!match)
+    throw new Error('commit missing tree');
+  tree = match[1];
+  pos = 46; // linefeed
+
+  // parents
+  while (match = /^parent\s([0-9a-f]{40})$/.exec(
+    info.contents.slice(pos, pos + 47).toString('utf8'))) {
+    parents.push(match[1]);
+    pos += 48;
+  }
+
+  // author
+  match = /^author\s(.+\s<.*>)\s(.+)$/.exec(info.contents.slice(
+    pos, common.findLinefeed(info.contents, pos)).toString('utf8'));
+  if (!match)
+    throw new Error('commit missing author');
+  author = match[1];
+  date = common.parseDate(match[2]);
+  pos += Buffer.byteLength(match[0]) + 1;
+
+  // committer
+  // FIXME ignoring commit date
+  match = /^committer\s(.+\s<.*>)\s(?:.+)$/.exec(info.contents.slice(
+    pos, common.findLinefeed(info.contents, pos)).toString('utf8'));
+  if (!match)
+    throw new Error('commit missing committer');
+  committer = match[1];
+  pos += Buffer.byteLength(match[0]) + 3;
+
+  // message
+  message = info.contents.slice(pos).toString('utf8');
+
+  return [
+      new Commit(tree, author, committer, date, message, parents)
+    , info.hash
+  ];
+};
 
 module.exports = Commit;
diff --git a/src/js/common.js b/src/js/common.js
@@ -8,17 +8,63 @@ function timestamp(date) {
   date = date || new Date();
   stamp = Math.ceil(date.getTime() / 1000).toString();
   zone = formatTimezone(date);
-  return stamp + zone;
+  return stamp + ' ' + zone;
 }
 
 function formatTimezone(date) {
-  var m
-    , str = date.toLocaleString();
-  m = /GMT(.\d{4})/.exec(str);
-  if (m) {
-    return " " + m[1];
-  }
-  return '';
+  var m, hOffset, mOffset
+    , offset = date.getTimezoneOffset();
+
+  hOffset = padLeft(Math.floor(offset / 60), 2, '0');
+  mOffset = padLeft(offset % 60, 2, '0');
+
+  if (offset > 0)
+    return '-' + hOffset + mOffset;
+  return '+' + hOffset + mOffset;
+}
+
+function padLeft(s, l, c) {
+  s = s.toString();
+  if (l < s.length) return s;
+  else return Array(l - s.length + 1).join(c || ' ') + s;
+}
+
+// FIXME for now this function only supports git internal format
+function parseDate(dateStr) {
+  var epoch
+    , match = /(\d+)\s(?:\+|-)(?:\d{4})/.exec(dateStr);
+
+  if (!match)
+    throw new Error('Failed to parse date');
+
+  epoch = parseInt(match[1], 10);
+  return new Date(epoch * 1000);
+}
+
+function findInBuffer(buffer, pos, b) {
+  if (!pos)
+    pos = 0;
+
+  while (buffer[pos] !== b) pos++;
+
+  return pos;
+}
+
+function findLinefeed(buffer, pos) {
+  return findInBuffer(buffer, pos, 10);
+}
+
+function findNull(buffer, pos) {
+  return findInBuffer(buffer, pos, 0);
+}
+
+function removeObjectHeader(objData) {
+  var i = 0;
+
+  while (objData[i] !== 0)
+    i++;
+
+  return objData.slice(i + 1);
 }
 
 function invoke(fn, context, arg) {
@@ -35,8 +81,7 @@ GitObject.prototype._serialize = function(content, visitor){
   var rv, hash, packData
     , type = this.constructor.name.toLowerCase()
     , header = new Buffer(type + " " + content.length)
-    , data = Buffer.concat([header, NULL, content])
-    , _this = this;
+    , data = Buffer.concat([header, NULL, content]);
 
     rv = {
         getHash: function() {
@@ -60,15 +105,42 @@ GitObject.prototype._serialize = function(content, visitor){
       , getType: function() {
           return type;
         }
-      , getTypeCode: function() {
-          return _this.typeCode;
-        }
     };
     invoke(visitor, this, rv);
     return rv;
 };
 
+GitObject.getObjectInfo = function(type, contents) {
+  var rv, header, hash, match
+    , fullContents = contents;
+
+  if (contents.slice(0, type.length).toString('utf8') !== type) {
+    // append header so the hash can be calculated
+    header = new Buffer(type + " " + contents.length);
+    fullContents = Buffer.concat([header, NULL, contents]);
+  } else {
+    // remove header for return value
+    contents = removeObjectHeader(contents);
+    // assert that the header is valid
+    header = fullContents.slice(0, fullContents.length -
+                                (contents.length + 1));
+    match = /^(\w+)\s(\d+)$/.exec(header.toString('utf8'));
+    if (!match || match[1] !== type ||
+        parseInt(match[2], 10) !== contents.length)
+      throw new Error('invalid object header');
+  }
+
+  hash = crypto.createHash('sha1');
+  hash.update(fullContents);
+
+  return {hash: hash.digest('hex'), contents: contents};
+};
+
+
 exports.timestamp = timestamp;
+exports.parseDate = parseDate;
 exports.GitObject = GitObject;
 exports.NULL = NULL;
 exports.SHA1 = /^[0-9a-f]{40}$/i;
+exports.findLinefeed = findLinefeed;
+exports.findNull = findNull;
diff --git a/src/js/pack.js b/src/js/pack.js
@@ -1,17 +1,35 @@
-var crypto = require('crypto')
+var i, codes, types
+  , crypto = require('crypto')
   , zlib = require('./zlib')
+  , Commit = require('./commit')
+  , Tree = require('./tree')
+  , Blob = require('./blob')
+  , Tag = require('./tag')
   , MAGIC = 'PACK';
 
 
+codes = {
+    commit: {code: 1, cls: Commit}
+  , tree: {code: 2, cls: Tree}
+  , blob: {code: 3, cls: Blob}
+  , tag: {code: 4, cls: Tag}
+  , ofsdelta: {code: 6}
+  , refdelta: {code: 7}
+};
+types = {};
+Object.keys(codes).forEach(function(k) {
+  types[codes[k].code] = codes[k].cls;
+});
+
 // this implementation is based on the information at
 // http://www.kernel.org/pub/software/scm/git/docs/technical/pack-format.txt
 function Pack(objects) {
   this.objects = objects || [];
 }
 
-// FIXME this function does not currently applies delta compression to 
+// FIXME this class does not currently applies delta compression to 
 // similar objects in the pack, so it is mostly useful for sending
-// a relatively small amount of git objects to a remote repository
+// small amounts of git objects to a remote repository
 Pack.prototype.serialize = function() {
   var key, object, serialized, header, typeBits, data, encodedHeader
     , packContent, encodedHeaderBytes, deflated, checksum
@@ -41,7 +59,7 @@ Pack.prototype.serialize = function() {
   for (key in processed) {
     serialized = processed[key];
     // calculate the object header
-    typeBits = serialized.getTypeCode() << 4;
+    typeBits = codes[serialized.getType()].code << 4;
     // the header is only used for loose objects. in packfiles they
     // should not be used
     data = serialized.getPackData();
@@ -61,6 +79,40 @@ Pack.prototype.serialize = function() {
   return Buffer.concat(contentArray);
 }
 
+Pack.deserialize = function(buffer) {
+  var i, count, pos, type, entryHeader, inflatedEntry, inflatedData
+    , deserialized
+    , objectsById = {} // used after parsing objects to connect references
+    , rv = new Pack(); 
+
+  // verify magic number
+  if (buffer.slice(0, 4).toString('utf8') !== MAGIC)
+    throw new Error('Invalid pack magic number');
+
+  // only accept version 2 packs
+  if (buffer.readUint32BE(4) !== 2)
+    throw new Error('Invalid pack version');
+
+  count = buffer.readUint32BE(8);
+  pos = 12;
+
+  // unpack all objects
+  for (i = 0;i < count;i++) {
+    cls = types[(buffer[pos] & 0x70) >>> 4].cls;
+    if (!cls)
+      throw new Error('invalid pack entry type code');
+    entryHeader = decodePackEntryHeader(buffer, pos);
+    size = entryHeader[0];
+    pos = entryHeader[1];
+    inflatedEntry = zlib.inflate(buffer.slice(pos), size);
+    inflatedData = inflatedEntry[0];
+    pos = inflatedEntry[1];
+    deserialized = cls.deserialize(inflatedData);
+  }
+
+
+};
+
 function encodePackEntrySize(size) {
   // this is an adaptation of LEB128: http://en.wikipedia.org/wiki/LEB128
   // with the difference that the first byte will contain type information
@@ -81,7 +133,7 @@ function encodePackEntrySize(size) {
   return bytes;
 }
 
-function decodePackEntrySize(buffer, offset) {
+function decodePackEntryHeader(buffer, offset) {
   var bits = 4
     , byte = buffer[offset] & 0xf
     , rv = byte;

diff --git a/src/js/tag.js b/src/js/tag.js
@@ -39,6 +39,50 @@ Tag.prototype.serialize = function(visitor) {
   return this._serialize(new Buffer(contentArray.join('\n')), visitor);
 };
 
-Tag.prototype.typeCode = 4;
+Tag.deserialize = function(contents) {
+  var pos, object, type, tag, tagger, date, message, match
+    , info = common.GitObject.getObjectInfo('tag', contents);
+
+  // object
+  match = /^object\s([0-9a-f]{40})$/.exec(
+    info.contents.slice(0, 47).toString('utf8'));
+  if (!match)
+    throw new Error('tag missing object');
+  object = match[1];
+  pos = 48;
+
+  // type
+  match = /^type\s(commit|tree|blob)$/.exec(info.contents.slice(
+    pos, common.findLinefeed(info.contents, pos)).toString('utf8'));
+  if (!match)
+    throw new Error('tag missing type');
+  type = match[1];
+  pos += match[0].length + 1;
+
+  // tag name
+  match = /^tag\s(.+)$/.exec(info.contents.slice(
+    pos, common.findLinefeed(info.contents, pos)).toString('utf8'));
+  if (!match)
+    throw new Error('tag missing name');
+  tag = match[1];
+  pos += Buffer.byteLength(match[0]) + 1;
+
+  // tagger
+  match = /^tagger\s(.+\s<.*>)\s(.+)$/.exec(info.contents.slice(
+    pos, common.findLinefeed(info.contents, pos)).toString('utf8'));
+  if (!match)
+    throw new Error('tag missing tagger');
+  tagger = match[1];
+  date = common.parseDate(match[2]);
+  pos = Buffer.byteLength(match[0]) + 3;
+
+  // message
+  message = info.contents.slice(pos).toString('utf8');
+
+  return [
+      new Tag(object, tag, tagger, date, message, type)
+    , info.hash
+  ];
+};
 
 module.exports = Tag;
diff --git a/src/js/tree.js b/src/js/tree.js
@@ -31,6 +31,32 @@ Tree.prototype.serialize = function(visitor) {
   return this._serialize(Buffer.concat(contentArray), visitor);
 };
 
-Tree.prototype.typeCode = 2;
+Tree.deserialize = function(contents) {
+  var childName, hash, hashStart
+    , match
+    , pos = 0
+    , children = {}
+    , info = common.GitObject.getObjectInfo('tree', contents);
+
+  while (pos < info.contents.length) {
+    // find the blob/tree name/mode
+    // FIXME for now this implementation is ignoring file modes
+    match = /^\d+\s(.+)$/.exec(info.contents.slice(
+      pos, common.findNull(info.contents, pos)).toString('utf8'));
+    if (!match)
+      throw new Error('could not parse tree');
+    childName = match[1];
+    hashStart = pos + Buffer.byteLength(match[0]) + 1; // skip NULL
+    hash = info.contents.slice(hashStart, hashStart + 20); 
+    children[childName] = hash.toString('hex');
+    pos = hashStart + 20;
+  }
+
+  // pos should equal the length by now
+  if (pos !== info.contents.length)
+    throw new Error('could not parse tree');
+
+  return [new Tree(children), info.hash];
+};
 
 module.exports = Tree;