Permalink
Browse files

initial commit

  • Loading branch information...
0 parents commit 73d9d64cf6957bf5d72f38b7749a456fc9e78124 @samuel committed Oct 21, 2009
Showing with 331 additions and 0 deletions.
  1. +2 −0 .gitignore
  2. +3 −0 bert/__init__.py
  3. +48 −0 bert/converters.py
  4. +108 −0 bert/erlang.py
  5. +8 −0 test.py
  6. 0 tests/__init__.py
  7. +60 −0 tests/bertdecoder.py
  8. +102 −0 tests/erlangdecoder.py
2 .gitignore
@@ -0,0 +1,2 @@
+*.pyc
+.DS_Store
3 bert/__init__.py
@@ -0,0 +1,3 @@
+
+from bert.erlang import ErlangTermDecoder, Atom, Binary
+from bert.converters import BERTDecoder
48 bert/converters.py
@@ -0,0 +1,48 @@
+
+import datetime
+import re
+
+from bert.erlang import ErlangTermDecoder
+
+class BERTDecoder(ErlangTermDecoder):
+ def __init__(self):
+ pass
+
+ def decode(self, bytes, offset=0):
+ obj = super(BERTDecoder, self).decode(bytes, offset)
+ return self.convert(obj)
+
+ def convert(self, item):
+ if isinstance(item, tuple):
+ if item[0] == "bert":
+ return self.convert_bert(item)
+ return tuple(self.convert(i) for i in item)
+ elif isinstance(item, list):
+ if item[0] == "bert":
+ return self.convert_bert(item)
+ return [self.convert(i) for i in item]
+ return item
+
+ def convert_bert(self, item):
+ if item[1] == "nil":
+ return None
+ elif item[1] == "dict":
+ return dict((self.convert(k), self.convert(v)) for k, v in item[2])
+ elif item[1] == "true":
+ return True
+ elif item[1] == "false":
+ return False
+ elif item[1] == "time":
+ return datetime.timedelta(seconds=item[2] * 1000000 + item[3], microseconds=item[4])
+ elif item[1] == "regex":
+ flags = 0
+ if 'extended' in item[3]:
+ flags |= re.VERBOSE
+ if 'ignorecase' in item[3]:
+ flags |= re.IGNORECASE
+ if 'multiline' in item[3]:
+ flags |= re.MULTILINE
+ if 'dotall' in item[3]:
+ flags |= re.DOTALL
+ return re.compile(item[2], flags)
+ return None
108 bert/erlang.py
@@ -0,0 +1,108 @@
+
+"""Erlang External Term Format serializer/deserializer"""
+
+import struct
+
+NEW_FLOAT_EXT = 70 # [Float64:IEEE float]
+SMALL_INTEGER_EXT = 97 # [UInt8:Int] Unsigned 8 bit integer
+INTEGER_EXT = 98 # [Int32:Int] Signed 32 bit integer in big-endian format
+FLOAT_EXT = 99 # [31:Float String] Float in string format (formatted "%.20e", sscanf "%lf"). Superseded by NEW_FLOAT_EXT
+ATOM_EXT = 100 # [UInt16:Len, Len:AtomName] max Len is 255
+SMALL_TUPLE_EXT = 104 # [UInt8:Arity, N:Elements]
+LARGE_TUPLE_EXT = 105 # [UInt32:Arity, N:Elements]
+NIL_EXT = 106 # empty list
+STRING_EXT = 107 # [UInt32:Len, Len:Characters]
+LIST_EXT = 108 # [UInt32:Len, Elements, Tail]
+BINARY_EXT = 109 # [UInt32:Len, Len:Data]
+SMALL_BIG_EXT = 110 # [UInt8:n, UInt8:Sign, n:nums]
+LARGE_BIG_EXT = 111 # [UInt32:n, UInt8:Sign, n:nums]
+
+class Atom(str):
+ pass
+
+class Binary(str):
+ pass
+
+class ErlangTermDecoder(object):
+ def __init__(self):
+ pass
+
+ def decode(self, bytes, offset=0):
+ if bytes[offset] == "\x83": # Version 131
+ offset += 1
+ return self._decode(bytes, offset)[0]
+
+ def _decode(self, bytes, offset=0):
+ tag = ord(bytes[offset])
+ offset += 1
+ if tag == SMALL_INTEGER_EXT:
+ return ord(bytes[offset]), offset+1
+ elif tag == INTEGER_EXT:
+ return struct.unpack(">l", bytes[offset:offset+4])[0], offset+4
+ elif tag == FLOAT_EXT:
+ return float(bytes[offset:offset+31].split('\x00', 1)[0]), offset+31
+ elif tag == NEW_FLOAT_EXT:
+ return struct.unpack(">d", bytes[offset:offset+8])[0], offset+8
+ elif tag == ATOM_EXT:
+ atom_len = struct.unpack(">H", bytes[offset:offset+2])[0]
+ atom = bytes[offset+2:offset+2+atom_len]
+ offset += 2+atom_len
+ if atom == "true":
+ return True, offset
+ elif atom == "false":
+ return False, offset
+ return Atom(atom), offset
+ elif tag in (SMALL_TUPLE_EXT, LARGE_TUPLE_EXT):
+ if tag == SMALL_TUPLE_EXT:
+ arity = ord(bytes[offset])
+ offset += 1
+ else:
+ arity = struct.unpack(">L", bytes[offset:offset+4])[0]
+ offset += 4
+
+ items = []
+ for i in range(arity):
+ val, offset = self._decode(bytes, offset)
+ items.append(val)
+ return tuple(items), offset
+ elif tag == NIL_EXT:
+ return [], offset
+ elif tag == STRING_EXT:
+ length = struct.unpack(">H", bytes[offset:offset+2])[0]
+ return bytes[offset+2:offset+2+length], offset+2+length
+ elif tag == LIST_EXT:
+ length = struct.unpack(">L", bytes[offset:offset+4])[0]
+ offset += 4
+ items = []
+ for i in range(length):
+ val, offset = self._decode(bytes, offset)
+ items.append(val)
+ tail, offset = self._decode(bytes, offset)
+ if tail != []:
+ # TODO: Not sure what to do with the tail
+ raise NotImplementedError("Lists with non empty tails are not supported")
+ return items, offset
+ elif tag == BINARY_EXT:
+ length = struct.unpack(">L", bytes[offset:offset+4])[0]
+ return Binary(bytes[offset+4:offset+4+length]), offset+4+length
+ elif tag in (SMALL_BIG_EXT, LARGE_BIG_EXT):
+ if tag == SMALL_BIG_EXT:
+ n = ord(bytes[offset])
+ offset += 1
+ else:
+ n = struct.unpack(">L", bytes[offset:offset+4])[0]
+ offset += 4
+ sign = ord(bytes[offset])
+ offset += 1
+ b = 1
+ val = 0
+ for i in range(n):
+ val += ord(bytes[offset]) * b
+ b <<= 8
+ offset += 1
+ if sign != 0:
+ val = -val
+ return val, offset
+ else:
+ raise NotImplementedError("Unsupported tag %d" % tag)
+
8 test.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+
+import unittest
+
+if __name__ == '__main__':
+ from tests.erlangdecoder import *
+ from tests.bertdecoder import *
+ unittest.main()
0 tests/__init__.py
No changes.
60 tests/bertdecoder.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+import datetime
+import re
+import unittest
+
+from bert import BERTDecoder
+
+class BERTDecoderTest(unittest.TestCase):
+ def setUp(self):
+ self.decoder = BERTDecoder()
+
+ def tearDown(self):
+ pass
+
+ def testNone(self):
+ self.failUnlessEqual(None, self.convert(("bert", "nil")))
+
+ def testNestedNone(self):
+ self.failUnlessEqual([None, (None,)], self.convert([("bert", "nil"), (("bert", "nil"),)]))
+
+ def testDict(self):
+ self.failUnlessEqual({'foo': 'bar'}, self.convert(('bert', 'dict', [('foo', 'bar')])))
+
+ def testEmptyDict(self):
+ self.failUnlessEqual({}, self.convert(('bert', 'dict', [])))
+
+ def testNestedDict(self):
+ self.failUnlessEqual({'foo': {'baz': 'bar'}},
+ self.convert(
+ ('bert', 'dict', [
+ ('foo', ('bert', 'dict', [
+ ('baz', 'bar')]))])))
+
+ def testTrue(self):
+ self.failUnlessEqual(True, self.convert(('bert', 'true')))
+
+ def testFalse(self):
+ self.failUnlessEqual(False, self.convert(('bert', 'false')))
+
+ def testTime(self):
+ self.failUnlessEqual(datetime.timedelta(seconds=123*1000000+456, microseconds=789),
+ self.convert(('bert', 'time', 123, 456, 789)))
+
+ def testRegex(self):
+ before = ('bert', 'regex', '^c(a)t$', ('caseless', 'extended'))
+ # after = re.compile('^c(a)t$', re.I|re.X)
+ # self.failUnlessEqual(after, self.convert(before))
+ self.failUnlessEqual(str(type(self.convert(before))), "<type '_sre.SRE_Pattern'>")
+
+ def testOther(self):
+ """Conversion shouldn't change non-bert values"""
+ before = [1, 2.0, ("foo", "bar")]
+ self.failUnlessEqual(before, self.convert(before))
+
+ def convert(self, term):
+ return self.decoder.convert(term)
+
+if __name__ == '__main__':
+ unittest.main()
102 tests/erlangdecoder.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+
+import unittest
+
+from bert import ErlangTermDecoder, Atom, Binary
+
+class ErlangDecoderTest(unittest.TestCase):
+ def setUp(self):
+ self.decoder = ErlangTermDecoder()
+
+ def testNil(self):
+ decoded = self.decode([131, 106])
+ self.failUnlessEqual([], decoded)
+ self.failUnless(isinstance(decoded, list))
+
+ def testBinary(self):
+ decoded = self.decode([131,109,0,0,0,3,102,111,111])
+ self.failUnlessEqual("foo", decoded)
+ self.failUnless(isinstance(decoded, Binary))
+
+ def testAtom(self):
+ decoded = self.decode([131,100,0,3,102,111,111])
+ self.failUnlessEqual("foo", decoded)
+ self.failUnless(isinstance(decoded, Atom))
+
+ def testAtomFalse(self):
+ decoded = self.decode([131,100,0,4,116,114,117,101])
+ self.failUnlessEqual(True, decoded)
+
+ def testAtomFalse(self):
+ decoded = self.decode([131,100,0,5,102,97,108,115,101])
+ self.failUnlessEqual(False, decoded)
+
+ def testString(self):
+ decoded = self.decode([131,107,0,3,102,111,111])
+ self.failUnlessEqual("foo", decoded)
+
+ def testSmallInteger(self):
+ decoded = self.decode([131,97,123])
+ self.failUnlessEqual(123, decoded)
+
+ def testInteger(self):
+ decoded = self.decode([131,98,0,0,48,57])
+ self.failUnlessEqual(12345, decoded)
+
+ def testFloat(self):
+ decoded = self.decode([131,99,49,46,50,51,52,52,57,57,57,57,57,57,57,57,57,57,57,57,51,48,55,50,101,43,48,48,0,0,0,0,0])
+ self.failUnlessEqual(1.2345, decoded)
+
+ def testFloat(self):
+ decoded = self.decode([131,99,49,46,50,51,52,52,57,57,57,57,57,57,57,57,57,57,57,57,51,48,55,50,101,43,48,48,0,0,0,0,0])
+ self.failUnlessEqual(1.2345, decoded)
+
+ def testTuple(self):
+ decoded = self.decode([131,104,3,100,0,3,102,111,111,107,0,4,116,101,115,116,97,123])
+ self.failUnless(isinstance(decoded, tuple))
+ self.failUnlessEqual(("foo", "test", 123), decoded)
+
+ def testList(self):
+ decoded = self.decode([131,108,0,0,0,3,98,0,0,4,0,107,0,4,116,101,115,116,99,
+ 52,46,48,57,54,48,48,48,48,48,48,48,48,48,48,48,48,48,
+ 56,53,50,55,101,43,48,48,0,0,0,0,0,106])
+ self.failUnless(isinstance(decoded, list))
+ self.failUnlessEqual([1024, "test", 4.096], decoded)
+
+ def testSmallBig(self):
+ decoded = self.decode([131,110,8,0,210,10,31,235,140,169,84,171])
+ self.failUnlessEqual(12345678901234567890, decoded)
+
+ def testLargeBig(self):
+ decoded = self.decode([131,111,0,0,1,68,0,210,10,63,206,150,241,207,172,75,241,
+ 123,239,97,17,61,36,94,147,169,136,23,160,194,1,165,37,
+ 183,227,81,27,0,235,231,229,213,80,111,152,189,144,241,
+ 195,221,82,131,209,41,252,38,234,72,195,49,119,241,7,
+ 243,243,51,143,183,150,131,5,116,236,105,156,89,34,152,
+ 152,105,202,17,98,89,61,204,161,180,82,27,108,1,134,24,
+ 233,162,51,170,20,239,17,91,125,79,20,82,85,24,36,254,
+ 127,150,148,206,114,63,215,139,154,167,118,189,187,43,7,
+ 88,148,120,127,73,2,52,46,160,204,222,239,58,167,137,
+ 126,164,175,98,228,193,7,29,243,99,108,124,48,201,80,96,
+ 191,171,149,122,162,68,81,102,247,202,239,176,196,61,17,
+ 6,42,58,89,245,56,175,24,167,129,19,223,189,84,108,52,
+ 224,0,238,147,214,131,86,201,60,231,73,223,168,46,245,
+ 252,164,36,82,149,239,209,167,210,137,206,117,33,248,8,
+ 177,90,118,166,217,122,219,48,136,16,243,127,211,115,99,
+ 152,91,26,172,54,86,31,173,48,41,208,151,56,209,2,230,
+ 251,72,20,57,220,41,46,181,146,246,145,65,27,205,184,96,
+ 66,198,4,131,76,192,184,175,78,43,129,237,236,63,59,31,
+ 171,49,193,94,74,255,79,30,1,135,72,15,46,90,68,6,240,
+ 186,107,170,103,86,72,93,23,230,73,46,66,20,97,50,193,
+ 59,209,43,234,46,228,146,21,147,233,39,69,208,40,205,
+ 144,251,16])
+ self.failUnlessEqual(123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890, decoded)
+
+
+ def decode(self, binaryterm):
+ if isinstance(binaryterm, (list, tuple)):
+ binaryterm = "".join(chr(x) for x in binaryterm)
+ return self.decoder.decode(binaryterm)
+
+if __name__ == '__main__':
+ unittest.main()

0 comments on commit 73d9d64

Please sign in to comment.