maxmind · autarch · Feb 11, 2014 · Feb 10, 2014 · Feb 10, 2014 · Feb 10, 2014
diff --git a/.pylintrc b/.pylintrc
@@ -0,0 +1,6 @@
+[MESSAGES CONTROL]
+disable=R0201,W0105
+
+[BASIC]
+
+no-docstring-rgx=_.*
diff --git a/.travis.yml b/.travis.yml
@@ -4,6 +4,7 @@ python:
   - 2.6
   - 2.7
   - 3.3
+  - pypy
 
 before_install:
   - git submodule update --init --recursive
@@ -15,10 +16,13 @@ before_install:
   - sudo make install
   - sudo ldconfig
   - cd ..
+  - pip install pylint
   - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2; fi
 
 script:
   - CFLAGS="-Werror -Wall -Wextra" python setup.py test
+  - MAXMINDDB_PURE_PYTHON=1 python setup.py test
+  - if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pylint --rcfile .pylintrc maxminddb/*.py; fi
 
 notifications:
   email:

diff --git a/README.rst b/README.rst
@@ -1,6 +1,6 @@
-===========================
-MaxMind DB Python Extension
-===========================
+========================
+MaxMind DB Python Module
+========================
 
 Beta Note
 ---------
@@ -10,17 +10,21 @@ release.
 Description
 -----------
 
-This is a Python extension for reading MaxMind DB files. MaxMind DB is a
-binary file format that stores data indexed by IP address subnets (IPv4 or
-IPv6).
+This is a Python module for reading MaxMind DB files. The module includes both
+a pure Python reader and an optional C extension.
+
+MaxMind DB is a binary file format that stores data indexed by IP address
+subnets (IPv4 or IPv6).
 
 Installation
 ------------
 
-You must have the `libmaxminddb <https://github.com/maxmind/libmaxminddb>`_ C
-library installed before installing this extension.
+If you want to use the C extension, you must first install `libmaxminddb
+<https://github.com/maxmind/libmaxminddb>`_ C library installed before
+installing this extension. If the library is not available, the module will
+fall-back to a pure Python implementation.
 
-To install the MaxMind DB extension, type:
+To install maxminddb, type:
 
 .. code-block:: bash
 
@@ -74,13 +78,16 @@ invalid IP address or an IPv6 address in an IPv4 database.
 Requirements
 ------------
 
-This code requires CPython 2.6+ or 3.3+. Older versions are not supported.
+This code requires Python 2.6+ or 3.3+. The C extension requires CPython. The
+pure Python implementation has been tested with PyPy.
+
+On Python 2, the `ipaddr module <https://code.google.com/p/ipaddr-py/>`_ is
+required.
 
 Versioning
 ----------
 
-The MaxMind DB Python extension uses
-`Semantic Versioning <http://semver.org/>`_.
+The MaxMind DB Python module uses `Semantic Versioning <http://semver.org/>`_.
 
 Support
 -------

diff --git a/maxminddb/__init__.py b/maxminddb/__init__.py
@@ -0,0 +1,17 @@
+# pylint:disable=C0111
+import os
+
+try:
+    if os.environ.get('MAXMINDDB_PURE_PYTHON'):
+        raise ImportError()
+    from maxminddb.extension import Reader, InvalidDatabaseError
+except ImportError:
+    from maxminddb.decoder import InvalidDatabaseError
+    from maxminddb.reader import Reader
+
+
+__title__ = 'maxminddb'
+__version__ = '0.3.0'
+__author__ = 'Gregory Oschwald'
+__license__ = 'LGPLv2.1+'
+__copyright__ = 'Copyright 2014 Maxmind, Inc.'
diff --git a/maxminddb/compat.py b/maxminddb/compat.py
@@ -0,0 +1,36 @@
+import sys
+
+# pylint: skip-file
+
+is_py2 = sys.version_info[0] == 2
+
+is_py3_3_or_better = (
+    sys.version_info[0] >= 3 and sys.version_info[1] >= 3)
+
+if is_py2 and not is_py3_3_or_better:
+    import ipaddr as ipaddress  # pylint:disable=F0401
+    ipaddress.ip_address = ipaddress.IPAddress
+else:
+    import ipaddress  # pylint:disable=F0401
+
+
+if is_py2:
+    int_from_byte = ord
+
+    FileNotFoundError = IOError
+
+    def int_from_bytes(b):
+        if b:
+            return int(b.encode("hex"), 16)
+        return 0
+
+    byte_from_int = chr
+
+else:
+    int_from_byte = lambda x: x
+
+    FileNotFoundError = FileNotFoundError
+
+    int_from_bytes = lambda x: int.from_bytes(x, 'big')
+
+    byte_from_int = lambda x: bytes([x])
diff --git a/maxminddb/decoder.py b/maxminddb/decoder.py
@@ -0,0 +1,167 @@
+"""
+maxminddb.decoder
+~~~~~~~~~~~~~~~~~
+
+This package contains code for decoding the MaxMind DB data section.
+
+"""
+from __future__ import unicode_literals
+
+import struct
+
+from maxminddb.compat import byte_from_int, int_from_bytes
+from maxminddb.errors import InvalidDatabaseError
+
+
+class Decoder(object):  # pylint: disable=too-few-public-methods
+
+    """Decoder for the data section of the MaxMind DB"""
+
+    def __init__(self, database_buffer, pointer_base=0, pointer_test=False):
+        """Created a Decoder for a MaxMind DB
+
+        Arguments:
+        database_buffer -- an mmap'd MaxMind DB file.
+        pointer_base -- the base number to use when decoding a pointer
+        pointer_test -- used for internal unit testing of pointer code
+        """
+        self._pointer_test = pointer_test
+        self._buffer = database_buffer
+        self._pointer_base = pointer_base
+
+    def _decode_array(self, size, offset):
+        array = []
+        for _ in range(size):
+            (value, offset) = self.decode(offset)
+            array.append(value)
+        return array, offset
+
+    def _decode_boolean(self, size, offset):
+        return size != 0, offset
+
+    def _decode_bytes(self, size, offset):
+        new_offset = offset + size
+        return self._buffer[offset:new_offset], new_offset
+
+    # pylint: disable=no-self-argument
+    # |-> I am open to better ways of doing this as long as it doesn't involve
+    #     lots of code duplication.
+    def _decode_packed_type(type_code, type_size, pad=False):
+        # pylint: disable=protected-access, missing-docstring
+        def unpack_type(self, size, offset):
+            if not pad:
+                self._verify_size(size, type_size)
+            new_offset = offset + type_size
+            packed_bytes = self._buffer[offset:new_offset]
+            if pad:
+                packed_bytes = packed_bytes.rjust(type_size, b'\x00')
+            (value,) = struct.unpack(type_code, packed_bytes)
+            return value, new_offset
+        return unpack_type
+
+    def _decode_map(self, size, offset):
+        container = {}
+        for _ in range(size):
+            (key, offset) = self.decode(offset)
+            (value, offset) = self.decode(offset)
+            container[key] = value
+        return container, offset
+
+    _pointer_value_offset = {
+        1: 0,
+        2: 2048,
+        3: 526336,
+        4: 0,
+    }
+
+    def _decode_pointer(self, size, offset):
+        pointer_size = ((size >> 3) & 0x3) + 1
+        new_offset = offset + pointer_size
+        pointer_bytes = self._buffer[offset:new_offset]
+        packed = pointer_bytes if pointer_size == 4 else struct.pack(
+            b'!c', byte_from_int(size & 0x7)) + pointer_bytes
+        unpacked = int_from_bytes(packed)
+        pointer = unpacked + self._pointer_base + \
+            self._pointer_value_offset[pointer_size]
+        if self._pointer_test:
+            return pointer, new_offset
+        (value, _) = self.decode(pointer)
+        return value, new_offset
+
+    def _decode_uint(self, size, offset):
+        new_offset = offset + size
+        uint_bytes = self._buffer[offset:new_offset]
+        return int_from_bytes(uint_bytes), new_offset
+
+    def _decode_utf8_string(self, size, offset):
+        new_offset = offset + size
+        return self._buffer[offset:new_offset].decode('utf-8'), new_offset
+
+    _type_decoder = {
+        1: _decode_pointer,
+        2: _decode_utf8_string,
+        3: _decode_packed_type(b'!d', 8),  # double,
+        4: _decode_bytes,
+        5: _decode_uint,  # uint16
+        6: _decode_uint,  # uint32
+        7: _decode_map,
+        8: _decode_packed_type(b'!i', 4, pad=True),  # int32
+        9: _decode_uint,  # uint64
+        10: _decode_uint,  # uint128
+        11: _decode_array,
+        14: _decode_boolean,
+        15: _decode_packed_type(b'!f', 4),  # float,
+    }
+
+    def decode(self, offset):
+        """Decode a section of the data section starting at offset
+
+        Arguments:
+        offset -- the location of the data structure to decode
+        """
+        new_offset = offset + 1
+        (ctrl_byte,) = struct.unpack(b'!B', self._buffer[offset:new_offset])
+        type_num = ctrl_byte >> 5
+        # Extended type
+        if not type_num:
+            (type_num, new_offset) = self._read_extended(new_offset)
+
+        (size, new_offset) = self._size_from_ctrl_byte(
+            ctrl_byte, new_offset, type_num)
+        return self._type_decoder[type_num](self, size, new_offset)
+
+    def _read_extended(self, offset):
+        (next_byte,) = struct.unpack(b'!B', self._buffer[offset:offset + 1])
+        type_num = next_byte + 7
+        if type_num < 7:
+            raise InvalidDatabaseError(
+                'Something went horribly wrong in the decoder. An '
+                'extended type resolved to a type number < 8 '
+                '({type})'.format(type=type_num))
+        return next_byte + 7, offset + 1
+
+    def _verify_size(self, expected, actual):
+        if expected != actual:
+            raise InvalidDatabaseError(
+                'The MaxMind DB file\'s data section contains bad data '
+                '(unknown data type or corrupt data)'
+            )
+
+    def _size_from_ctrl_byte(self, ctrl_byte, offset, type_num):
+        size = ctrl_byte & 0x1f
+        if type_num == 1:
+            return size, offset
+        bytes_to_read = 0 if size < 29 else size - 28
+        size_bytes = self._buffer[offset:offset + bytes_to_read]
+
+        # Using unpack rather than int_from_bytes as it is about 200 lookups
+        # per second faster here.
+        if size == 29:
+            size = 29 + struct.unpack(b'!B', size_bytes)[0]
+        elif size == 30:
+            size = 285 + struct.unpack(b'!H', size_bytes)[0]
+        elif size > 30:
+            size = struct.unpack(
+                b'!I', size_bytes.rjust(4, b'\x00'))[0] + 65821
+
+        return size, offset + bytes_to_read
diff --git a/maxminddb/errors.py b/maxminddb/errors.py
@@ -0,0 +1,11 @@
+"""
+maxminddb.errors
+~~~~~~~~~~~~~~~~
+
+This module contains custom errors for the MaxMind DB reader
+"""
+
+
+class InvalidDatabaseError(RuntimeError):
+
+    """This error is thrown when unexpected data is found in the database."""