From d0b441e8a2917fe78438de2994c910b4d59b75ff Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 07:43:13 -0800 Subject: [PATCH 01/17] Working pure-Python reader. All of the tests pass. On my machine, the reader does about 2000 lookups per second with CPython and about 7000 with PyPy. I am still working on getting the conditional loading of the C extension working. To do that, I need to make some changes to the extension itself. --- .pylintrc | 6 + .travis.yml | 1 + maxminddb/__init__.py | 11 + maxminddb/compat.py | 36 +++ maxminddb/decoder.py | 143 +++++++++++ maxminddb/errors.py | 3 + .../extension/maxminddb.c | 0 maxminddb/reader.py | 154 ++++++++++++ setup.py | 182 +++++++++++--- tests/decoder_test.py | 231 ++++++++++++++++++ tests/maxminddb_test.py | 28 ++- 11 files changed, 746 insertions(+), 49 deletions(-) create mode 100644 .pylintrc create mode 100644 maxminddb/__init__.py create mode 100644 maxminddb/compat.py create mode 100644 maxminddb/decoder.py create mode 100644 maxminddb/errors.py rename maxminddb.c => maxminddb/extension/maxminddb.c (100%) create mode 100644 maxminddb/reader.py create mode 100644 tests/decoder_test.py diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..19353df --- /dev/null +++ b/.pylintrc @@ -0,0 +1,6 @@ +[MESSAGES CONTROL] +disable=R0201,W0105 + +[BASIC] + +no-docstring-rgx=_.* diff --git a/.travis.yml b/.travis.yml index 2bf8496..f41eaed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ python: - 2.6 - 2.7 - 3.3 + - pypy before_install: - git submodule update --init --recursive diff --git a/maxminddb/__init__.py b/maxminddb/__init__.py new file mode 100644 index 0000000..b87e00d --- /dev/null +++ b/maxminddb/__init__.py @@ -0,0 +1,11 @@ +# pylint:disable=C0111 + +from .decoder import InvalidDatabaseError +from .reader import Reader + + +__title__ = 'maxminddb' +__version__ = '0.3.0' +__author__ = 'Gregory Oschwald' +__license__ = 'LGPLv2.1+' +__copyright__ = 'Copyright 2014 Maxmind, Inc.' diff --git a/maxminddb/compat.py b/maxminddb/compat.py new file mode 100644 index 0000000..c639804 --- /dev/null +++ b/maxminddb/compat.py @@ -0,0 +1,36 @@ +import sys + +is_py2 = sys.version_info[0] == 2 + +is_py3_3_or_better = ( + sys.version_info[0] >= 3 and sys.version_info[1] >= 3) + +if is_py2 and not is_py3_3_or_better: + import ipaddr as ipaddress # pylint:disable=F0401 + ipaddress.ip_address = ipaddress.IPAddress +else: + import ipaddress # pylint:disable=F0401 + + +if is_py2: + int_from_byte = ord + + FileNotFoundError = IOError + + def int_from_bytes(b): + if b: + return int(b.encode("hex"), 16) + return 0 + + byte_from_int = chr + +else: + # XXX -This does apparently slows down the reader by 25 lookups per second + # during the search tree lookup. Figure out alternative + int_from_byte = lambda x: x + + FileNotFoundError = FileNotFoundError + + int_from_bytes = lambda x: int.from_bytes(x, 'big') + + byte_from_int = lambda x: bytes([x]) diff --git a/maxminddb/decoder.py b/maxminddb/decoder.py new file mode 100644 index 0000000..d4d8599 --- /dev/null +++ b/maxminddb/decoder.py @@ -0,0 +1,143 @@ +from __future__ import unicode_literals + +from struct import pack, unpack + +from .compat import byte_from_int, int_from_bytes +from .errors import InvalidDatabaseError + + +class Decoder(object): + + """Decodes the data section of the MaxMind DB""" + + def __init__(self, database_buffer, pointer_base=0, pointer_test=False): + self._pointer_test = pointer_test + self._buffer = database_buffer + self._pointer_base = pointer_base + + def _decode_array(self, size, offset): + array = [] + for _ in range(size): + (value, offset) = self.decode(offset) + array.append(value) + return array, offset + + def _decode_boolean(self, size, offset): + return size != 0, offset + + def _decode_bytes(self, size, offset): + new_offset = offset + size + return self._buffer[offset:new_offset], new_offset + + def _decode_packed_type(type_code, type_size, pad=False): + def unpack_type(self, size, offset): + if not pad: + self._verify_size(size, type_size) + new_offset = offset + type_size + packed_bytes = self._buffer[offset:new_offset] + if pad: + packed_bytes = packed_bytes.rjust(type_size, b'\x00') + (value,) = unpack(type_code, packed_bytes) + return value, new_offset + return unpack_type + + def _decode_map(self, size, offset): + container = {} + for _ in range(size): + (key, offset) = self.decode(offset) + (value, offset) = self.decode(offset) + container[key] = value + return container, offset + + _pointer_value_offset = { + 1: 0, + 2: 2048, + 3: 526336, + 4: 0, + } + + def _decode_pointer(self, size, offset): + pointer_size = ((size >> 3) & 0x3) + 1 + new_offset = offset + pointer_size + b = self._buffer[offset:new_offset] + packed = b if pointer_size == 4 else pack( + b'!c', byte_from_int(size & 0x7)) + b + unpacked = int_from_bytes(packed) + pointer = unpacked + self._pointer_base + \ + self._pointer_value_offset[pointer_size] + if self._pointer_test: + return pointer, new_offset + (value, _) = self.decode(pointer) + return value, new_offset + + def _decode_uint(self, size, offset): + new_offset = offset + size + b = self._buffer[offset:new_offset] + return int_from_bytes(b), new_offset + + def _decode_utf8_string(self, size, offset): + new_offset = offset + size + return self._buffer[offset:new_offset].decode('utf-8'), new_offset + + _type_dispatch = { + 1: _decode_pointer, + 2: _decode_utf8_string, + 3: _decode_packed_type(b'!d', 8), # double, + 4: _decode_bytes, + 5: _decode_uint, # uint16 + 6: _decode_uint, # uint32 + 7: _decode_map, + 8: _decode_packed_type(b'!i', 4, pad=True), # int32 + 9: _decode_uint, # uint64 + 10: _decode_uint, # uint128 + 11: _decode_array, + 14: _decode_boolean, + 15: _decode_packed_type(b'!f', 4), # float, + } + + def decode(self, offset): + new_offset = offset + 1 + (ctrl_byte,) = unpack(b'!B', self._buffer[offset:new_offset]) + type_num = ctrl_byte >> 5 + # Extended type + if not type_num: + (type_num, new_offset) = self._read_extended(new_offset) + + (size, new_offset) = self._size_from_ctrl_byte( + ctrl_byte, new_offset, type_num) + return self._type_dispatch[type_num](self, size, new_offset) + + def _read_extended(self, offset): + (next_byte,) = unpack(b'!B', self._buffer[offset:offset + 1]) + type_num = next_byte + 7 + if type_num < 7: + raise InvalidDatabaseError( + 'Something went horribly wrong in the decoder. An ' + 'extended type resolved to a type number < 8 ' + '({type})'.format(type=type_num)) + return next_byte + 7, offset + 1 + + def _verify_size(self, expected, actual): + if expected != actual: + raise InvalidDatabaseError( + 'The MaxMind DB file\'s data section contains bad data ' + '(unknown data type or corrupt data)' + ) + + def _size_from_ctrl_byte(self, ctrl_byte, offset, type_num): + size = ctrl_byte & 0x1f + if type_num == 1: + return size, offset + bytes_to_read = 0 if size < 29 else size - 28 + size_bytes = self._buffer[offset:offset + bytes_to_read] + + # Using unpack rather than int_from_bytes as it is about 200 lookups + # per second faster here. + if size == 29: + size = 29 + unpack(b'!B', size_bytes)[0] + elif size == 30: + size = 285 + unpack(b'!H', size_bytes)[0] + elif size > 30: + size = unpack(b'!I', size_bytes.rjust(4, b'\x00'))[0] + 65821 + + return size, offset + bytes_to_read diff --git a/maxminddb/errors.py b/maxminddb/errors.py new file mode 100644 index 0000000..d02a772 --- /dev/null +++ b/maxminddb/errors.py @@ -0,0 +1,3 @@ +class InvalidDatabaseError(RuntimeError): + + """This error is thrown when unexpected data is found in the database.""" diff --git a/maxminddb.c b/maxminddb/extension/maxminddb.c similarity index 100% rename from maxminddb.c rename to maxminddb/extension/maxminddb.c diff --git a/maxminddb/reader.py b/maxminddb/reader.py new file mode 100644 index 0000000..8853fc5 --- /dev/null +++ b/maxminddb/reader.py @@ -0,0 +1,154 @@ +from __future__ import unicode_literals + +import mmap +from struct import unpack + +from .compat import byte_from_int, int_from_byte, ipaddress +from .decoder import Decoder +from .errors import InvalidDatabaseError + + +class Reader(object): + + """ + Instances of this class provide a reader for the MaxMind DB format. IP + addresses can be looked up using the get method. + """ + + _DATA_SECTION_SEPARATOR_SIZE = 16 + _METADATA_START_MARKER = b"\xAB\xCD\xEFMaxMind.com" + + _ipv4_start = None + + def __init__(self, database): + """Reader for the MaxMind DB file format + + The file passed to it must be a valid MaxMind DB file such as a GeoIP2 + database file. + """ + with open(database, 'r+b') as db_file: + self._buffer = mmap.mmap( + db_file.fileno(), 0, access=mmap.ACCESS_READ) + + metadata_start = self._buffer.rfind(self._METADATA_START_MARKER, + self._buffer.size() - 128 * 1024) + + if metadata_start == -1: + raise InvalidDatabaseError('Error opening database file ({0}). ' + 'Is this a valid MaxMind DB file?' + ''.format(database)) + + metadata_start += len(self._METADATA_START_MARKER) + metadata_decoder = Decoder(self._buffer, metadata_start) + (metadata, _) = metadata_decoder.decode(metadata_start) + self._metadata = Metadata(**metadata) + + self._decoder = Decoder(self._buffer, self._metadata.search_tree_size + + self._DATA_SECTION_SEPARATOR_SIZE) + + # XXX - consider making a property + def metadata(self): + return self._metadata + + # XXX - change to lookup? + def get(self, ip_address): + """Look up ip_address in the MaxMind DB""" + ip = ipaddress.ip_address(ip_address) + + if ip.version == 6 and self._metadata.ip_version == 4: + raise ValueError('Error looking up {0}. You attempted to look up ' + 'an IPv6 address in an IPv4-only database.'.format( + ip_address)) + pointer = self._find_address_in_tree(ip) + + return self._resolve_data_pointer(pointer) if pointer else None + + def _find_address_in_tree(self, ip_address): + packed = ip_address.packed + + bit_count = len(packed) * 8 + node = self._start_node(bit_count) + + for i in range(bit_count): + if node >= self._metadata.node_count: + break + bit = 1 & (int_from_byte(packed[i >> 3]) >> 7 - (i % 8)) + node = self._read_node(node, bit) + + if node == self._metadata.node_count: + # Record is empty + return 0 + elif node > self._metadata.node_count: + return node + + raise InvalidDatabaseError('Invalid node in search tree') + + def _start_node(self, length): + if self._metadata.ip_version != 6 or length == 128: + return 0 + + # We are looking up an IPv4 address in an IPv6 tree. Skip over the + # first 96 nodes. + if self._ipv4_start: + return self._ipv4_start + + node = 0 + for i in range(96): + if node >= self._metadata.node_count: + break + node = self._read_node(node, 0) + self._ipv4_start = node + return node + + def _read_node(self, node_number, index): + base_offset = node_number * self._metadata.node_byte_size + + record_size = self._metadata.record_size + if record_size == 24: + offset = base_offset + index * 3 + node_bytes = b'\x00' + self._buffer[offset:offset + 3] + elif record_size == 28: + (middle,) = unpack( + b'!B', self._buffer[base_offset + 3:base_offset + 4]) + if index: + middle &= 0x0F + else: + middle = (0xF0 & middle) >> 4 + offset = base_offset + index * 4 + node_bytes = byte_from_int( + middle) + self._buffer[offset:offset + 3] + elif record_size == 32: + offset = base_offset + index * 4 + node_bytes = self._buffer[offset:offset + 4] + else: + raise InvalidDatabaseError( + 'Unknown record size: {0}'.format(record_size)) + return unpack(b'!I', node_bytes)[0] + + def _resolve_data_pointer(self, pointer): + resolved = pointer - self._metadata.node_count + \ + self._metadata.search_tree_size + + if resolved > self._buffer.size(): + raise InvalidDatabaseError( + "The MaxMind DB file's search tree is corrupt") + + (data, _) = self._decoder.decode(resolved) + return data + + def close(self): + self._buffer.close() + + +class Metadata(object): + + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + @property + def node_byte_size(self): + return self.record_size // 4 + + @property + def search_tree_size(self): + return self.node_count * self.node_byte_size diff --git a/setup.py b/setup.py index b33bcc7..2a8cdc6 100644 --- a/setup.py +++ b/setup.py @@ -1,45 +1,153 @@ +import os +import re +import sys + # This import is apparently needed for Nose on Red Hat's Python import multiprocessing +from distutils.command.build_ext import build_ext +from distutils.errors import (CCompilerError, DistutilsExecError, + DistutilsPlatformError) + try: - from setuptools import setup, Extension + from setuptools import setup, Extension, Feature except ImportError: from distutils.core import setup, Extension + Feature = None + +cmdclass = {} +extra = {} +pypy = hasattr(sys, 'pypy_version_info') +jython = sys.platform.startswith('java') + +ext_module = [ + Extension( + 'maxminddb', + libraries=['maxminddb'], + sources=['maxminddb/extension/maxminddb.c'], + extra_compile_args=[ + '-Wall', '-Wextra'], + ) +] + +# Cargo cult code for installing extension with pure Python fallback. +# Taken from SQLAlchemy, but this same basic code exists in many modules. +ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError) +if sys.platform == 'win32': + # 2.6's distutils.msvc9compiler can raise an IOError when failing to + # find the compiler + ext_errors += (IOError,) + + +class BuildFailed(Exception): + + def __init__(self): + self.cause = sys.exc_info()[1] # work around py 2/3 different syntax + + +class ve_build_ext(build_ext): + # This class allows C extension building to fail. + + def run(self): + try: + build_ext.run(self) + except DistutilsPlatformError: + raise BuildFailed() + + def build_extension(self, ext): + try: + build_ext.build_extension(self, ext) + except ext_errors: + raise BuildFailed() + except ValueError: + # this can happen on Windows 64 bit, see Python issue 7511 + if "'path'" in str(sys.exc_info()[1]): # works with both py 2/3 + raise BuildFailed() + raise + +cmdclass['build_ext'] = ve_build_ext + +# + +ROOT = os.path.dirname(__file__) + +with open(os.path.join(ROOT, 'README.rst'), 'rb') as fd: + README = fd.read().decode('utf8') + +with open(os.path.join(ROOT, 'LICENSE'), 'rb') as fd: + LICENSE = fd.read().decode('utf8') + +with open(os.path.join(ROOT, 'maxminddb', '__init__.py'), 'rb') as fd: + VERSION = re.compile( + r".*__version__ = '(.*?)'", re.S).match(fd.read().decode('utf8')).group(1) + + +def run_setup(with_cext): + kwargs = extra.copy() + if with_cext: + if Feature: + kwargs['features'] = {'extension': Feature( + "optional C implementation", + standard=True, + ext_modules=ext_module + )} + else: + kwargs['ext_modules'] = ext_module + + setup( + name='maxminddb', + version=VERSION, + description='Python extension for reading the MaxMind DB format', + long_description=README, + url='http://www.maxmind.com/', + bugtrack_url='https://github.com/maxmind/MaxMind-DB-Reader-python/issues', + packages=['maxminddb'], + package_data={'': ['LICENSE']}, + package_dir={'maxminddb': 'maxminddb'}, + include_package_data=True, + tests_require=['nose'], + test_suite='nose.collector', + license=LICENSE, + cmdclass=cmdclass, + classifiers=( + 'Development Status :: 3 - Alpha', + 'Environment :: Web Environment', + 'Intended Audience :: Developers', + 'Intended Audience :: System Administrators', + 'License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python', + 'Topic :: Internet :: Proxy Servers', + 'Topic :: Internet', + ), + **kwargs + ) + +if pypy or jython: + run_setup(False) + status_msgs( + "WARNING: Disabling C extension due ot Python platform.", + "Plain-Python build succeeded." + ) +else: + try: + run_setup(True) + except BuildFailed as exc: + status_msgs( + exc.cause, + "WARNING: The C extension could not be compiled, " + + "speedups are not enabled.", + "Failure information, if any, is above.", + "Retrying the build without the C extension now." + ) + run_setup(False) -module = Extension( - 'maxminddb', - libraries=['maxminddb'], - sources=['maxminddb.c'], - extra_compile_args=[ - '-Wall', '-Wextra'], -) - -setup( - name='maxminddb', - version='0.2.1', - description='Python extension for reading the MaxMind DB format', - ext_modules=[module], - long_description=open('README.rst').read(), - url='http://www.maxmind.com/', - bugtrack_url='https://github.com/maxmind/MaxMind-DB-Reader-python/issues', - package_data={'': ['LICENSE']}, - include_package_data=True, - tests_require=['nose'], - test_suite='nose.collector', - license=open('LICENSE').read(), - classifiers=( - 'Development Status :: 3 - Alpha', - 'Environment :: Web Environment', - 'Intended Audience :: Developers', - 'Intended Audience :: System Administrators', - 'License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python', - 'Topic :: Internet :: Proxy Servers', - 'Topic :: Internet', - ), -) + status_msgs( + "WARNING: The C extension could not be compiled, " + + "speedups are not enabled.", + "Plain-Python build succeeded." + ) diff --git a/tests/decoder_test.py b/tests/decoder_test.py new file mode 100644 index 0000000..ad1da12 --- /dev/null +++ b/tests/decoder_test.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +import mmap +import sys + +from maxminddb.compat import byte_from_int, int_from_byte +from maxminddb.decoder import Decoder + +if sys.version_info[:2] == (2, 6): + import unittest2 as unittest +else: + import unittest + +if sys.version_info[0] == 2: + unittest.TestCase.assertRaisesRegex = unittest.TestCase.assertRaisesRegexp + unittest.TestCase.assertRegex = unittest.TestCase.assertRegexpMatches + + +class TestDecoder(unittest.TestCase): + + def test_arrays(self): + arrays = { + b'\x00\x04': [], + b'\x01\x04\x43\x46\x6f\x6f': ['Foo'], + b'\x02\x04\x43\x46\x6f\x6f\x43\xe4\xba\xba': + ['Foo', '人'], + } + self.validate_type_decoding('arrays', arrays) + + def test_boolean(self): + booleans = { + b"\x00\x07": False, + b"\x01\x07": True, + } + self.validate_type_decoding('booleans', booleans) + + def test_double(self): + doubles = { + b"\x68\x00\x00\x00\x00\x00\x00\x00\x00": 0.0, + b"\x68\x3F\xE0\x00\x00\x00\x00\x00\x00": 0.5, + b"\x68\x40\x09\x21\xFB\x54\x44\x2E\xEA": 3.14159265359, + b"\x68\x40\x5E\xC0\x00\x00\x00\x00\x00": 123.0, + b"\x68\x41\xD0\x00\x00\x00\x07\xF8\xF4": 1073741824.12457, + b"\x68\xBF\xE0\x00\x00\x00\x00\x00\x00": -0.5, + b"\x68\xC0\x09\x21\xFB\x54\x44\x2E\xEA": -3.14159265359, + b"\x68\xC1\xD0\x00\x00\x00\x07\xF8\xF4": -1073741824.12457, + } + self.validate_type_decoding('double', doubles) + + def test_float(self): + floats = { + b"\x04\x08\x00\x00\x00\x00": 0.0, + b"\x04\x08\x3F\x80\x00\x00": 1.0, + b"\x04\x08\x3F\x8C\xCC\xCD": 1.1, + b"\x04\x08\x40\x48\xF5\xC3": 3.14, + b"\x04\x08\x46\x1C\x3F\xF6": 9999.99, + b"\x04\x08\xBF\x80\x00\x00": -1.0, + b"\x04\x08\xBF\x8C\xCC\xCD": -1.1, + b"\x04\x08\xC0\x48\xF5\xC3": -3.14, + b"\x04\x08\xC6\x1C\x3F\xF6": -9999.99} + self.validate_type_decoding('float', floats) + + def test_int32(self): + int32 = { + b"\x00\x01": 0, + b"\x04\x01\xff\xff\xff\xff": -1, + b"\x01\x01\xff": 255, + b"\x04\x01\xff\xff\xff\x01": -255, + b"\x02\x01\x01\xf4": 500, + b"\x04\x01\xff\xff\xfe\x0c": -500, + b"\x02\x01\xff\xff": 65535, + b"\x04\x01\xff\xff\x00\x01": -65535, + b"\x03\x01\xff\xff\xff": 16777215, + b"\x04\x01\xff\x00\x00\x01": -16777215, + b"\x04\x01\x7f\xff\xff\xff": 2147483647, + b"\x04\x01\x80\x00\x00\x01": -2147483647, + } + self.validate_type_decoding('int32', int32) + + def test_map(self): + maps = { + b'\xe0': {}, + b'\xe1\x42\x65\x6e\x43\x46\x6f\x6f': {'en': 'Foo'}, + b'\xe2\x42\x65\x6e\x43\x46\x6f\x6f\x42\x7a\x68\x43\xe4\xba\xba': + {'en': 'Foo', 'zh': '人'}, + (b'\xe1\x44\x6e\x61\x6d\x65\xe2\x42\x65\x6e' + b'\x43\x46\x6f\x6f\x42\x7a\x68\x43\xe4\xba\xba'): + {'name': {'en': 'Foo', 'zh': '人'}}, + (b'\xe1\x49\x6c\x61\x6e\x67\x75\x61\x67\x65\x73' + b'\x02\x04\x42\x65\x6e\x42\x7a\x68'): + {'languages': ['en', 'zh']}, + } + self.validate_type_decoding('maps', maps) + + def test_pointer(self): + pointers = { + b'\x20\x00': 0, + b'\x20\x05': 5, + b'\x20\x0a': 10, + b'\x23\xff': 1023, + b'\x28\x03\xc9': 3017, + b'\x2f\xf7\xfb': 524283, + b'\x2f\xff\xff': 526335, + b'\x37\xf7\xf7\xfe': 134217726, + b'\x37\xff\xff\xff': 134744063, + b'\x38\x7f\xff\xff\xff': 2147483647, + b'\x38\xff\xff\xff\xff': 4294967295, + } + self.validate_type_decoding('pointers', pointers) + + strings = { + b"\x40": '', + b"\x41\x31": '1', + b"\x43\xE4\xBA\xBA": '人', + (b"\x5b\x31\x32\x33\x34" + b"\x35\x36\x37\x38\x39\x30\x31\x32\x33\x34\x35" + b"\x36\x37\x38\x39\x30\x31\x32\x33\x34\x35\x36\x37"): + '123456789012345678901234567', + (b"\x5c\x31\x32\x33\x34" + b"\x35\x36\x37\x38\x39\x30\x31\x32\x33\x34\x35" + b"\x36\x37\x38\x39\x30\x31\x32\x33\x34\x35\x36" + b"\x37\x38"): '1234567890123456789012345678', + (b"\x5d\x00\x31\x32\x33" + b"\x34\x35\x36\x37\x38\x39\x30\x31\x32\x33\x34" + b"\x35\x36\x37\x38\x39\x30\x31\x32\x33\x34\x35" + b"\x36\x37\x38\x39"): '12345678901234567890123456789', + (b"\x5d\x01\x31\x32\x33" + b"\x34\x35\x36\x37\x38\x39\x30\x31\x32\x33\x34" + b"\x35\x36\x37\x38\x39\x30\x31\x32\x33\x34\x35" + b"\x36\x37\x38\x39\x30"): '123456789012345678901234567890', + b'\x5e\x00\xd7' + 503 * b'\x78': 'x' * 500, + b'\x5e\x06\xb3' + 2000 * b'\x78': 'x' * 2000, + b'\x5f\x00\x10\x53' + 70000 * b'\x78': 'x' * 70000, + } + + def test_string(self): + self.validate_type_decoding('string', self.strings) + + def test_byte(self): + # Python 2.6 doesn't support dictionary comprehension + b = dict((byte_from_int(0xc0 ^ int_from_byte(k[0])) + k[1:], + v.encode('utf-8')) + for k, v in self.strings.items()) + self.validate_type_decoding('byte', b) + + def test_uint16(self): + uint16 = { + b"\xa0": 0, + b"\xa1\xff": 255, + b"\xa2\x01\xf4": 500, + b"\xa2\x2a\x78": 10872, + b"\xa2\xff\xff": 65535, + } + self.validate_type_decoding('uint16', uint16) + + def test_uint32(self): + uint32 = { + b"\xc0": 0, + b"\xc1\xff": 255, + b"\xc2\x01\xf4": 500, + b"\xc2\x2a\x78": 10872, + b"\xc2\xff\xff": 65535, + b"\xc3\xff\xff\xff": 16777215, + b"\xc4\xff\xff\xff\xff": 4294967295, + } + self.validate_type_decoding('uint32', uint32) + + def generate_large_uint(self, bits): + ctrl_byte = b'\x02' if bits == 64 else b'\x03' + uints = { + b'\x00' + ctrl_byte: 0, + b'\x02' + ctrl_byte + b'\x01\xf4': 500, + b'\x02' + ctrl_byte + b'\x2a\x78': 10872, + } + for power in range(bits // 8 + 1): + expected = 2 ** (8 * power) - 1 + input = byte_from_int(power) + ctrl_byte + (b'\xff' * power) + uints[input] = expected + return uints + + def test_uint64(self): + self.validate_type_decoding('uint64', self.generate_large_uint(64)) + + def test_uint128(self): + self.validate_type_decoding('uint128', self.generate_large_uint(128)) + + def validate_type_decoding(self, type, tests): + for input, expected in tests.items(): + self.check_decoding(type, input, expected) + + def check_decoding(self, type, input, expected, name=None): + + name = name or expected + db = mmap.mmap(-1, len(input)) + db.write(input) + + decoder = Decoder(db, pointer_test=True) + (actual, _,) = decoder.decode(0) + + if type in ('float', 'double'): + self.assertAlmostEqual(expected, actual, places=3, msg=type) + else: + self.assertEqual(expected, actual, type) + + def test_real_pointers(self): + with open('tests/data/test-data/maps-with-pointers.raw', 'r+b') as db_file: + mm = mmap.mmap(db_file.fileno(), 0) + decoder = Decoder(mm, 0) + + self.assertEqual(({'long_key': 'long_value1'}, 22), + decoder.decode(0)) + + self.assertEqual(({'long_key': 'long_value2'}, 37), + decoder.decode(22)) + + self.assertEqual(({'long_key2': 'long_value1'}, 50), + decoder.decode(37)) + + self.assertEqual(({'long_key2': 'long_value2'}, 55), + decoder.decode(50)) + + self.assertEqual(({'long_key': 'long_value1'}, 57), + decoder.decode(55)) + + self.assertEqual(({'long_key2': 'long_value2'}, 59), + decoder.decode(57)) + + mm.close() diff --git a/tests/maxminddb_test.py b/tests/maxminddb_test.py index 3f796ab..26b05ed 100644 --- a/tests/maxminddb_test.py +++ b/tests/maxminddb_test.py @@ -4,6 +4,10 @@ from __future__ import unicode_literals import sys + +from maxminddb import Reader, InvalidDatabaseError +from maxminddb.compat import FileNotFoundError + if sys.version_info[:2] == (2, 6): import unittest2 as unittest else: @@ -13,8 +17,6 @@ unittest.TestCase.assertRaisesRegex = unittest.TestCase.assertRaisesRegexp unittest.TestCase.assertRegex = unittest.TestCase.assertRegexpMatches -from maxminddb import Reader, InvalidDatabaseError - class TestReader(unittest.TestCase): @@ -75,8 +77,7 @@ def test_broken_database(self): reader = Reader('tests/data/test-data/' 'GeoIP2-City-Test-Broken-Double-Format.mmdb') with self.assertRaisesRegex(InvalidDatabaseError, - "Error while looking up data for " - "2001:220::. The MaxMind DB file's data " + "The MaxMind DB file's data " "section contains bad data \(unknown data " "type or corrupt data\)" ): @@ -85,14 +86,14 @@ def test_broken_database(self): def test_ip_validation(self): reader = Reader('tests/data/test-data/MaxMind-DB-test-decoder.mmdb') self.assertRaisesRegex(ValueError, - 'The value "not_ip" is not a valid IP ' - 'address.', + "'not_ip' does not appear to be an IPv4 or " + "IPv6 address", reader.get, ('not_ip')) def test_missing_database(self): - self.assertRaisesRegex(ValueError, - 'The file "file-does-not-exist.mmdb" does ' - 'not exist or is not readable.', + self.assertRaisesRegex(FileNotFoundError, + "No such file or directory: " + "u?'file-does-not-exist.mmdb'", Reader, ('file-does-not-exist.mmdb')) def test_nondatabase(self): @@ -131,7 +132,7 @@ def test_metadata_unknown_attribute(self): ) metadata = reader.metadata() with self.assertRaisesRegex(AttributeError, - "'maxminddb.Metadata' object has no " + "'Metadata' object has no " "attribute 'blah'"): metadata.blah @@ -141,6 +142,7 @@ def test_close(self): ) reader.close() + @unittest.skip('XXX - not compatible with pure Python reader') def test_double_close(self): reader = Reader( 'tests/data/test-data/MaxMind-DB-test-decoder.mmdb' @@ -155,10 +157,12 @@ def test_closed_get(self): 'tests/data/test-data/MaxMind-DB-test-decoder.mmdb' ) reader.close() - self.assertRaisesRegex(IOError, - 'Attempt to read from a closed MaxMind DB.', + self.assertRaisesRegex(ValueError, + 'Attempt to read from a closed MaxMind DB.' + '|closed or invalid', reader.get, ('1.1.1.1')) + @unittest.skip('XXX - not compatible with pure Python reader') def test_closed_metadata(self): reader = Reader( 'tests/data/test-data/MaxMind-DB-test-decoder.mmdb' From e4a56c40dfcdacacd02d11599e3737b8e6edcc67 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 10:36:06 -0800 Subject: [PATCH 02/17] Allow graceful fallback to the pure Python module Also brought the errors in line with the pure Python module. --- maxminddb/__init__.py | 7 +++-- maxminddb/extension/maxminddb.c | 32 +++++++++++---------- setup.py | 22 ++++++++++++-- tests/{maxminddb_test.py => reader_test.py} | 0 4 files changed, 42 insertions(+), 19 deletions(-) rename tests/{maxminddb_test.py => reader_test.py} (100%) diff --git a/maxminddb/__init__.py b/maxminddb/__init__.py index b87e00d..a3d5a8f 100644 --- a/maxminddb/__init__.py +++ b/maxminddb/__init__.py @@ -1,7 +1,10 @@ # pylint:disable=C0111 -from .decoder import InvalidDatabaseError -from .reader import Reader +try: + from maxminddb.extension import Reader, InvalidDatabaseError +except ImportError: + from maxminddb.decoder import InvalidDatabaseError + from maxminddb.reader import Reader __title__ = 'maxminddb' diff --git a/maxminddb/extension/maxminddb.c b/maxminddb/extension/maxminddb.c index a2c8795..f8bdd5e 100644 --- a/maxminddb/extension/maxminddb.c +++ b/maxminddb/extension/maxminddb.c @@ -35,10 +35,12 @@ static PyObject *from_uint128(const MMDB_entry_data_list_s *entry_data_list); #if PY_MAJOR_VERSION >= 3 #define MOD_INIT(name) PyMODINIT_FUNC PyInit_ ## name(void) #define RETURN_MOD_INIT(m) return (m) + #define FILE_NOT_FOUND_ERROR PyExc_FileNotFoundError #else #define MOD_INIT(name) PyMODINIT_FUNC init ## name(void) #define RETURN_MOD_INIT(m) return #define PyInt_FromLong PyLong_FromLong + #define FILE_NOT_FOUND_ERROR PyExc_IOError #endif #ifdef __GNUC__ @@ -56,8 +58,8 @@ static PyObject *Reader_constructor(PyObject *UNUSED(self), PyObject *args) } if (0 != access(filename, R_OK)) { - PyErr_Format(PyExc_ValueError, - "The file \"%s\" does not exist or is not readable.", + PyErr_Format(FILE_NOT_FOUND_ERROR, + "No such file or directory: '%s'", filename); return NULL; } @@ -102,7 +104,7 @@ static PyObject *Reader_get(PyObject *self, PyObject *args) MMDB_s *mmdb = mmdb_obj->mmdb; if (NULL == mmdb) { - PyErr_SetString(PyExc_IOError, + PyErr_SetString(PyExc_ValueError, "Attempt to read from a closed MaxMind DB."); return NULL; } @@ -115,7 +117,7 @@ static PyObject *Reader_get(PyObject *self, PyObject *args) if (0 != gai_error) { PyErr_Format(PyExc_ValueError, - "The value \"%s\" is not a valid IP address.", + "'%s' does not appear to be an IPv4 or IPv6 address.", ip_address); return NULL; } @@ -415,10 +417,10 @@ static PyTypeObject Reader_Type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_basicsize = sizeof(Reader_obj), .tp_dealloc = Reader_dealloc, - .tp_doc = "maxminddb.Reader object", + .tp_doc = "Reader object", .tp_flags = Py_TPFLAGS_DEFAULT, .tp_methods = Reader_methods, - .tp_name = "maxminddb.Reader", + .tp_name = "Reader", }; static PyMethodDef Metadata_methods[] = { @@ -453,24 +455,24 @@ static PyTypeObject Metadata_Type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_basicsize = sizeof(Metadata_obj), .tp_dealloc = Metadata_dealloc, - .tp_doc = "maxminddb.Metadata object", + .tp_doc = "Metadata object", .tp_flags = Py_TPFLAGS_DEFAULT, .tp_members = Metadata_members, .tp_methods = Metadata_methods, - .tp_name = "maxminddb.Metadata", + .tp_name = "Metadata", }; static PyMethodDef MaxMindDB_methods[] = { { "Reader", Reader_constructor, METH_VARARGS, - "Creates a new maxminddb.Reader object" }, + "Creates a new maxminddb.extension.Reader object" }, { NULL, NULL, 0, NULL} }; #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef MaxMindDB_module = { PyModuleDef_HEAD_INIT, - .m_name = "maxminddb", - .m_doc = "This is a module to read MaxMind DB file format", + .m_name = "extension", + .m_doc = "This is a C extension to read MaxMind DB file format", .m_methods = MaxMindDB_methods, }; #endif @@ -481,17 +483,17 @@ static void init_type(PyObject *m, PyTypeObject *type) if (PyType_Ready(type) == 0) { Py_INCREF(type); - PyModule_AddObject(m, "maxminddb", (PyObject *)type); + PyModule_AddObject(m, "extension", (PyObject *)type); } } -MOD_INIT(maxminddb){ +MOD_INIT(extension){ PyObject *m; #if PY_MAJOR_VERSION >= 3 m = PyModule_Create(&MaxMindDB_module); #else - m = Py_InitModule("maxminddb", MaxMindDB_methods); + m = Py_InitModule("extension", MaxMindDB_methods); #endif if (!m) { @@ -501,7 +503,7 @@ MOD_INIT(maxminddb){ init_type(m, &Reader_Type); init_type(m, &Metadata_Type); - MaxMindDB_error = PyErr_NewException("maxminddb.InvalidDatabaseError", NULL, + MaxMindDB_error = PyErr_NewException("extension.InvalidDatabaseError", NULL, NULL); if (MaxMindDB_error == NULL) { RETURN_MOD_INIT(NULL); diff --git a/setup.py b/setup.py index 2a8cdc6..8b1b2aa 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ ext_module = [ Extension( - 'maxminddb', + 'maxminddb.extension', libraries=['maxminddb'], sources=['maxminddb/extension/maxminddb.c'], extra_compile_args=[ @@ -82,6 +82,24 @@ def build_extension(self, ext): r".*__version__ = '(.*?)'", re.S).match(fd.read().decode('utf8')).group(1) +def status_msgs(*msgs): + print('*' * 75) + for msg in msgs: + print(msg) + print('*' * 75) + + +def find_packages(location): + packages = [] + for pkg in ['maxminddb']: + for _dir, subdirectories, files in ( + os.walk(os.path.join(location, pkg))): + if '__init__.py' in files: + tokens = _dir.split(os.sep)[len(location.split(os.sep)):] + packages.append(".".join(tokens)) + return packages + + def run_setup(with_cext): kwargs = extra.copy() if with_cext: @@ -101,7 +119,7 @@ def run_setup(with_cext): long_description=README, url='http://www.maxmind.com/', bugtrack_url='https://github.com/maxmind/MaxMind-DB-Reader-python/issues', - packages=['maxminddb'], + packages=find_packages('lib'), package_data={'': ['LICENSE']}, package_dir={'maxminddb': 'maxminddb'}, include_package_data=True, diff --git a/tests/maxminddb_test.py b/tests/reader_test.py similarity index 100% rename from tests/maxminddb_test.py rename to tests/reader_test.py From 2bd82af38b507d318a864c5d9d1e6023f1ff2dfd Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 10:40:11 -0800 Subject: [PATCH 03/17] Require ipaddr for older Pythons --- setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.py b/setup.py index 8b1b2aa..fe848ff 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,11 @@ extra = {} pypy = hasattr(sys, 'pypy_version_info') jython = sys.platform.startswith('java') +requirements = [] + +if sys.version_info[0] == 2 or (sys.version_info[0] == 3 + and sys.version_info[1] < 3): + requirements.append('ipaddr') ext_module = [ Extension( From f9a92cb2cc8eec31581b1319481837df90194265 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 10:47:54 -0800 Subject: [PATCH 04/17] Actually require the requirements --- setup.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index fe848ff..364293f 100644 --- a/setup.py +++ b/setup.py @@ -16,9 +16,8 @@ Feature = None cmdclass = {} -extra = {} -pypy = hasattr(sys, 'pypy_version_info') -jython = sys.platform.startswith('java') +PYPY = hasattr(sys, 'pypy_version_info') +JYTHON = sys.platform.startswith('java') requirements = [] if sys.version_info[0] == 2 or (sys.version_info[0] == 3 @@ -106,7 +105,7 @@ def find_packages(location): def run_setup(with_cext): - kwargs = extra.copy() + kwargs = {} if with_cext: if Feature: kwargs['features'] = {'extension': Feature( @@ -128,6 +127,7 @@ def run_setup(with_cext): package_data={'': ['LICENSE']}, package_dir={'maxminddb': 'maxminddb'}, include_package_data=True, + install_requires=requirements, tests_require=['nose'], test_suite='nose.collector', license=LICENSE, @@ -149,10 +149,10 @@ def run_setup(with_cext): **kwargs ) -if pypy or jython: +if PYPY or JYTHON: run_setup(False) status_msgs( - "WARNING: Disabling C extension due ot Python platform.", + "WARNING: Disabling C extension due to Python platform.", "Plain-Python build succeeded." ) else: From ef31bc14308d3c9f2cfd7ed47b5a9042b8003c24 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 10:56:20 -0800 Subject: [PATCH 05/17] Test both the pure Python and extension in Travis --- .travis.yml | 1 + maxminddb/__init__.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index f41eaed..b35c0fa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,7 @@ before_install: script: - CFLAGS="-Werror -Wall -Wextra" python setup.py test + - MAXMINDDB_PURE_PYTHON=1 python3 setup.py test notifications: email: diff --git a/maxminddb/__init__.py b/maxminddb/__init__.py index a3d5a8f..180f9b9 100644 --- a/maxminddb/__init__.py +++ b/maxminddb/__init__.py @@ -1,6 +1,9 @@ # pylint:disable=C0111 +import os try: + if os.environ.get('MAXMINDDB_PURE_PYTHON'): + raise ImportError() from maxminddb.extension import Reader, InvalidDatabaseError except ImportError: from maxminddb.decoder import InvalidDatabaseError From a32b6914d2263118eeaa68c250fe880ae4910f9e Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 11:13:04 -0800 Subject: [PATCH 06/17] Increased compatibility between extension and PP implementation For closed files. Also temporarily disabled a test. (Didn't use skip as it isn't working on 2.6.) --- maxminddb/extension/maxminddb.c | 11 ++++------- tests/reader_test.py | 30 ++++++++++++++++-------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/maxminddb/extension/maxminddb.c b/maxminddb/extension/maxminddb.c index f8bdd5e..22f780a 100644 --- a/maxminddb/extension/maxminddb.c +++ b/maxminddb/extension/maxminddb.c @@ -234,14 +234,11 @@ static PyObject *Reader_close(PyObject *self, PyObject *UNUSED(args)) { Reader_obj *mmdb_obj = (Reader_obj *)self; - if (NULL == mmdb_obj->mmdb) { - PyErr_SetString(PyExc_IOError, - "Attempt to close a closed MaxMind DB."); - return NULL; + if (NULL != mmdb_obj->mmdb) { + MMDB_close(mmdb_obj->mmdb); + free(mmdb_obj->mmdb); + mmdb_obj->mmdb = NULL; } - MMDB_close(mmdb_obj->mmdb); - free(mmdb_obj->mmdb); - mmdb_obj->mmdb = NULL; Py_RETURN_NONE; } diff --git a/tests/reader_test.py b/tests/reader_test.py index 26b05ed..be014da 100644 --- a/tests/reader_test.py +++ b/tests/reader_test.py @@ -142,15 +142,12 @@ def test_close(self): ) reader.close() - @unittest.skip('XXX - not compatible with pure Python reader') def test_double_close(self): reader = Reader( 'tests/data/test-data/MaxMind-DB-test-decoder.mmdb' ) - reader.close() - self.assertRaisesRegex(IOError, - 'Attempt to close a closed MaxMind DB.', - reader.close) + self.assertIsNone( + reader.close(), 'Double close does not throw an exception') def test_closed_get(self): reader = Reader( @@ -162,15 +159,20 @@ def test_closed_get(self): '|closed or invalid', reader.get, ('1.1.1.1')) - @unittest.skip('XXX - not compatible with pure Python reader') - def test_closed_metadata(self): - reader = Reader( - 'tests/data/test-data/MaxMind-DB-test-decoder.mmdb' - ) - reader.close() - self.assertRaisesRegex(IOError, - 'Attempt to read from a closed MaxMind DB.', - reader.metadata) + # XXX - Figure out whether we want to have the same behavior on both the + # extension and the pure Python reader. If we do, the pure Python + # reader will need to throw an exception or the extension will need + # to keep the metadata in memory. + # + # def test_closed_metadata(self): + # reader = Reader( + # 'tests/data/test-data/MaxMind-DB-test-decoder.mmdb' + # ) + # reader.close() + + # self.assertRaisesRegex(IOError, + # 'Attempt to read from a closed MaxMind DB.', + # reader.metadata) def _check_metadata(self, reader, ip_version, record_size): metadata = reader.metadata() From 2ec26fb892f66599c0cc071cc5fe48a82ed468e8 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 11:16:08 -0800 Subject: [PATCH 07/17] Don't try to use python3 in Python 2 Travis instances --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b35c0fa..4f9ca39 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ before_install: script: - CFLAGS="-Werror -Wall -Wextra" python setup.py test - - MAXMINDDB_PURE_PYTHON=1 python3 setup.py test + - MAXMINDDB_PURE_PYTHON=1 python setup.py test notifications: email: From 12c8e7e400a5c3c2700424a73b01eae311a46b4c Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 12:38:21 -0800 Subject: [PATCH 08/17] Update readme for module changes --- README.rst | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index d7737ad..3e1013d 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ -=========================== -MaxMind DB Python Extension -=========================== +======================== +MaxMind DB Python Module +======================== Beta Note --------- @@ -10,17 +10,21 @@ release. Description ----------- -This is a Python extension for reading MaxMind DB files. MaxMind DB is a -binary file format that stores data indexed by IP address subnets (IPv4 or -IPv6). +This is a Python module for reading MaxMind DB files. The module includes both +a pure Python reader and an optional C extension. + +MaxMind DB is a binary file format that stores data indexed by IP address +subnets (IPv4 or IPv6). Installation ------------ -You must have the `libmaxminddb `_ C -library installed before installing this extension. +If you want to use the C extension, you must first install `libmaxminddb +`_ C library installed before +installing this extension. If the library is not available, the module will +fall-back to a pure Python implementation. -To install the MaxMind DB extension, type: +To install maxminddb, type: .. code-block:: bash @@ -74,13 +78,16 @@ invalid IP address or an IPv6 address in an IPv4 database. Requirements ------------ -This code requires CPython 2.6+ or 3.3+. Older versions are not supported. +This code requires Python 2.6+ or 3.3+. The C extension requires CPython. The +pure Python implementation has been tested with PyPy. + +On Python 2, the `ipaddr module `_ is +required. Versioning ---------- -The MaxMind DB Python extension uses -`Semantic Versioning `_. +The MaxMind DB Python module uses `Semantic Versioning `_. Support ------- From e12365d1d6e4086ecff21b259076098e3bad458d Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 14:20:26 -0800 Subject: [PATCH 09/17] Misc code cleanup and documentation --- maxminddb/compat.py | 6 ++--- maxminddb/decoder.py | 56 ++++++++++++++++++++++++++++----------- maxminddb/errors.py | 3 +++ maxminddb/reader.py | 63 +++++++++++++++++++++++++++++++++----------- 4 files changed, 94 insertions(+), 34 deletions(-) diff --git a/maxminddb/compat.py b/maxminddb/compat.py index c639804..24e4f0e 100644 --- a/maxminddb/compat.py +++ b/maxminddb/compat.py @@ -1,5 +1,7 @@ import sys +# pylint: skip-file + is_py2 = sys.version_info[0] == 2 is_py3_3_or_better = ( @@ -25,12 +27,10 @@ def int_from_bytes(b): byte_from_int = chr else: - # XXX -This does apparently slows down the reader by 25 lookups per second - # during the search tree lookup. Figure out alternative int_from_byte = lambda x: x FileNotFoundError = FileNotFoundError int_from_bytes = lambda x: int.from_bytes(x, 'big') - byte_from_int = lambda x: bytes([x]) + byte_from_int = lambda x: bytes([x]) diff --git a/maxminddb/decoder.py b/maxminddb/decoder.py index d4d8599..f1e892f 100644 --- a/maxminddb/decoder.py +++ b/maxminddb/decoder.py @@ -1,16 +1,30 @@ +""" +maxminddb.decoder +~~~~~~~~~~~~~~~~~ + +This package contains code for decoding the MaxMind DB data section. + +""" from __future__ import unicode_literals -from struct import pack, unpack +import struct -from .compat import byte_from_int, int_from_bytes -from .errors import InvalidDatabaseError +from maxminddb.compat import byte_from_int, int_from_bytes +from maxminddb.errors import InvalidDatabaseError -class Decoder(object): +class Decoder(object): # pylint: disable=too-few-public-methods - """Decodes the data section of the MaxMind DB""" + """Decoder for the data section of the MaxMind DB""" def __init__(self, database_buffer, pointer_base=0, pointer_test=False): + """Created a Decoder for a MaxMind DB + + Arguments: + database_buffer -- an mmap'd MaxMind DB file. + pointer_base -- the base number to use when decoding a pointer + pointer_test -- used for internal unit testing of pointer code + """ self._pointer_test = pointer_test self._buffer = database_buffer self._pointer_base = pointer_base @@ -29,7 +43,11 @@ def _decode_bytes(self, size, offset): new_offset = offset + size return self._buffer[offset:new_offset], new_offset + # pylint: disable=no-self-argument + # |-> I am open to better ways of doing this as long as it doesn't involve + # lots of code duplication. def _decode_packed_type(type_code, type_size, pad=False): + # pylint: disable=protected-access, missing-docstring def unpack_type(self, size, offset): if not pad: self._verify_size(size, type_size) @@ -37,7 +55,7 @@ def unpack_type(self, size, offset): packed_bytes = self._buffer[offset:new_offset] if pad: packed_bytes = packed_bytes.rjust(type_size, b'\x00') - (value,) = unpack(type_code, packed_bytes) + (value,) = struct.unpack(type_code, packed_bytes) return value, new_offset return unpack_type @@ -59,9 +77,9 @@ def _decode_map(self, size, offset): def _decode_pointer(self, size, offset): pointer_size = ((size >> 3) & 0x3) + 1 new_offset = offset + pointer_size - b = self._buffer[offset:new_offset] - packed = b if pointer_size == 4 else pack( - b'!c', byte_from_int(size & 0x7)) + b + pointer_bytes = self._buffer[offset:new_offset] + packed = pointer_bytes if pointer_size == 4 else struct.pack( + b'!c', byte_from_int(size & 0x7)) + pointer_bytes unpacked = int_from_bytes(packed) pointer = unpacked + self._pointer_base + \ self._pointer_value_offset[pointer_size] @@ -72,8 +90,8 @@ def _decode_pointer(self, size, offset): def _decode_uint(self, size, offset): new_offset = offset + size - b = self._buffer[offset:new_offset] - return int_from_bytes(b), new_offset + uint_bytes = self._buffer[offset:new_offset] + return int_from_bytes(uint_bytes), new_offset def _decode_utf8_string(self, size, offset): new_offset = offset + size @@ -96,8 +114,13 @@ def _decode_utf8_string(self, size, offset): } def decode(self, offset): + """Decode a section of the data section starting at offset + + Arguments: + offset -- the location of the data structure to decode + """ new_offset = offset + 1 - (ctrl_byte,) = unpack(b'!B', self._buffer[offset:new_offset]) + (ctrl_byte,) = struct.unpack(b'!B', self._buffer[offset:new_offset]) type_num = ctrl_byte >> 5 # Extended type if not type_num: @@ -108,7 +131,7 @@ def decode(self, offset): return self._type_dispatch[type_num](self, size, new_offset) def _read_extended(self, offset): - (next_byte,) = unpack(b'!B', self._buffer[offset:offset + 1]) + (next_byte,) = struct.unpack(b'!B', self._buffer[offset:offset + 1]) type_num = next_byte + 7 if type_num < 7: raise InvalidDatabaseError( @@ -134,10 +157,11 @@ def _size_from_ctrl_byte(self, ctrl_byte, offset, type_num): # Using unpack rather than int_from_bytes as it is about 200 lookups # per second faster here. if size == 29: - size = 29 + unpack(b'!B', size_bytes)[0] + size = 29 + struct.unpack(b'!B', size_bytes)[0] elif size == 30: - size = 285 + unpack(b'!H', size_bytes)[0] + size = 285 + struct.unpack(b'!H', size_bytes)[0] elif size > 30: - size = unpack(b'!I', size_bytes.rjust(4, b'\x00'))[0] + 65821 + size = struct.unpack( + b'!I', size_bytes.rjust(4, b'\x00'))[0] + 65821 return size, offset + bytes_to_read diff --git a/maxminddb/errors.py b/maxminddb/errors.py index d02a772..3e094f5 100644 --- a/maxminddb/errors.py +++ b/maxminddb/errors.py @@ -1,3 +1,6 @@ +"""This module contains custom errors for the MaxMind DB reader""" + + class InvalidDatabaseError(RuntimeError): """This error is thrown when unexpected data is found in the database.""" diff --git a/maxminddb/reader.py b/maxminddb/reader.py index 8853fc5..bf18fbf 100644 --- a/maxminddb/reader.py +++ b/maxminddb/reader.py @@ -1,11 +1,18 @@ +""" +maxminddb.reader +~~~~~~~~~~~~~~~~ + +This module contains the pure Python database reader and related classes. + +""" from __future__ import unicode_literals import mmap -from struct import unpack +import struct -from .compat import byte_from_int, int_from_byte, ipaddress -from .decoder import Decoder -from .errors import InvalidDatabaseError +from maxminddb.compat import byte_from_int, int_from_byte, ipaddress +from maxminddb.decoder import Decoder +from maxminddb.errors import InvalidDatabaseError class Reader(object): @@ -23,8 +30,9 @@ class Reader(object): def __init__(self, database): """Reader for the MaxMind DB file format - The file passed to it must be a valid MaxMind DB file such as a GeoIP2 - database file. + Arguments: + database -- A path to a valid MaxMind DB file such as a GeoIP2 + database file. """ with open(database, 'r+b') as db_file: self._buffer = mmap.mmap( @@ -41,25 +49,31 @@ def __init__(self, database): metadata_start += len(self._METADATA_START_MARKER) metadata_decoder = Decoder(self._buffer, metadata_start) (metadata, _) = metadata_decoder.decode(metadata_start) - self._metadata = Metadata(**metadata) + self._metadata = Metadata(**metadata) # pylint: disable=star-args self._decoder = Decoder(self._buffer, self._metadata.search_tree_size + self._DATA_SECTION_SEPARATOR_SIZE) # XXX - consider making a property def metadata(self): + """Return the metadata associated with the MaxMind DB file""" return self._metadata # XXX - change to lookup? def get(self, ip_address): - """Look up ip_address in the MaxMind DB""" - ip = ipaddress.ip_address(ip_address) + """Return the record for the ip_address in the MaxMind DB + - if ip.version == 6 and self._metadata.ip_version == 4: + Arguments: + ip_address -- an IP address in the standard string notation + """ + address = ipaddress.ip_address(ip_address) + + if address.version == 6 and self._metadata.ip_version == 4: raise ValueError('Error looking up {0}. You attempted to look up ' 'an IPv6 address in an IPv4-only database.'.format( ip_address)) - pointer = self._find_address_in_tree(ip) + pointer = self._find_address_in_tree(address) return self._resolve_data_pointer(pointer) if pointer else None @@ -93,7 +107,7 @@ def _start_node(self, length): return self._ipv4_start node = 0 - for i in range(96): + for _ in range(96): if node >= self._metadata.node_count: break node = self._read_node(node, 0) @@ -108,7 +122,7 @@ def _read_node(self, node_number, index): offset = base_offset + index * 3 node_bytes = b'\x00' + self._buffer[offset:offset + 3] elif record_size == 28: - (middle,) = unpack( + (middle,) = struct.unpack( b'!B', self._buffer[base_offset + 3:base_offset + 4]) if index: middle &= 0x0F @@ -123,7 +137,7 @@ def _read_node(self, node_number, index): else: raise InvalidDatabaseError( 'Unknown record size: {0}'.format(record_size)) - return unpack(b'!I', node_bytes)[0] + return struct.unpack(b'!I', node_bytes)[0] def _resolve_data_pointer(self, pointer): resolved = pointer - self._metadata.node_count + \ @@ -137,18 +151,37 @@ def _resolve_data_pointer(self, pointer): return data def close(self): + """Closes the MaxMind DB file and returns the resources to the system""" self._buffer.close() class Metadata(object): + """Metadata for the MaxMind DB reader""" + + # pylint: disable=too-many-instance-attributes def __init__(self, **kwargs): - self.__dict__.update(kwargs) + """Creates new Metadata object. kwargs are key/value pairs from spec""" + # Although I could just update __dict__, that is less obvious and it + # doesn't work well with static analysis tools and some IDEs + self.node_count = kwargs['node_count'] + self.record_size = kwargs['record_size'] + self.ip_version = kwargs['ip_version'] + self.database_type = kwargs['database_type'] + self.languages = kwargs['languages'] + self.binary_format_major_version = kwargs[ + 'binary_format_major_version'] + self.binary_format_minor_version = kwargs[ + 'binary_format_minor_version'] + self.build_epoch = kwargs['build_epoch'] + self.description = kwargs['description'] @property def node_byte_size(self): + """The size of a node in bytes""" return self.record_size // 4 @property def search_tree_size(self): + """The size of the search tree""" return self.node_count * self.node_byte_size From 841bea84e9b0d3a037c370fb5173c09ecd2694d0 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 14:22:59 -0800 Subject: [PATCH 10/17] Run pylint from Travis --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 4f9ca39..70ecffd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,6 +21,7 @@ before_install: script: - CFLAGS="-Werror -Wall -Wextra" python setup.py test - MAXMINDDB_PURE_PYTHON=1 python setup.py test + - if [[ $TRAVIS_PYTHON_VERSION != '3.3' ]]; then pylint maxminddb; fi notifications: email: From 9c97336627a4f0f026d82835c20ea000a3ccc8d4 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 14:40:21 -0800 Subject: [PATCH 11/17] Minor cleanup --- maxminddb/decoder.py | 4 ++-- maxminddb/reader.py | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/maxminddb/decoder.py b/maxminddb/decoder.py index f1e892f..eaf849b 100644 --- a/maxminddb/decoder.py +++ b/maxminddb/decoder.py @@ -97,7 +97,7 @@ def _decode_utf8_string(self, size, offset): new_offset = offset + size return self._buffer[offset:new_offset].decode('utf-8'), new_offset - _type_dispatch = { + _type_decoder = { 1: _decode_pointer, 2: _decode_utf8_string, 3: _decode_packed_type(b'!d', 8), # double, @@ -128,7 +128,7 @@ def decode(self, offset): (size, new_offset) = self._size_from_ctrl_byte( ctrl_byte, new_offset, type_num) - return self._type_dispatch[type_num](self, size, new_offset) + return self._type_decoder[type_num](self, size, new_offset) def _read_extended(self, offset): (next_byte,) = struct.unpack(b'!B', self._buffer[offset:offset + 1]) diff --git a/maxminddb/reader.py b/maxminddb/reader.py index bf18fbf..ff9afbc 100644 --- a/maxminddb/reader.py +++ b/maxminddb/reader.py @@ -19,7 +19,7 @@ class Reader(object): """ Instances of this class provide a reader for the MaxMind DB format. IP - addresses can be looked up using the get method. + addresses can be looked up using the ``get`` method. """ _DATA_SECTION_SEPARATOR_SIZE = 16 @@ -54,12 +54,10 @@ def __init__(self, database): self._decoder = Decoder(self._buffer, self._metadata.search_tree_size + self._DATA_SECTION_SEPARATOR_SIZE) - # XXX - consider making a property def metadata(self): """Return the metadata associated with the MaxMind DB file""" return self._metadata - # XXX - change to lookup? def get(self, ip_address): """Return the record for the ip_address in the MaxMind DB From dcafedd2430aea8d42866026dcb204cf160697ae Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 15:03:31 -0800 Subject: [PATCH 12/17] Re-enable test --- tests/reader_test.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tests/reader_test.py b/tests/reader_test.py index be014da..c2f1e74 100644 --- a/tests/reader_test.py +++ b/tests/reader_test.py @@ -163,16 +163,22 @@ def test_closed_get(self): # extension and the pure Python reader. If we do, the pure Python # reader will need to throw an exception or the extension will need # to keep the metadata in memory. - # - # def test_closed_metadata(self): - # reader = Reader( - # 'tests/data/test-data/MaxMind-DB-test-decoder.mmdb' - # ) - # reader.close() - - # self.assertRaisesRegex(IOError, - # 'Attempt to read from a closed MaxMind DB.', - # reader.metadata) + def test_closed_metadata(self): + reader = Reader( + 'tests/data/test-data/MaxMind-DB-test-decoder.mmdb' + ) + reader.close() + + # The primary purpose of this is to ensure the extension doesn't + # segfault + try: + metadata = reader.metadata() + except IOError as ex: + self.assertEqual('Attempt to read from a closed MaxMind DB.', + str(ex), 'extension throws exception') + else: + self.assertIsNotNone( + metadata, 'pure Python implementation returns value') def _check_metadata(self, reader, ip_version, record_size): metadata = reader.metadata() From a94c11e42c683b0fc85a70e59d6b37dfdcc727ed Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 15:08:02 -0800 Subject: [PATCH 13/17] Install pylint --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 70ecffd..94dd126 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,7 @@ before_install: - sudo make install - sudo ldconfig - cd .. + - pip install pylint - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2; fi script: From 6326c14f41ebb23b53f4195abeeaa64e149c1723 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 15:16:27 -0800 Subject: [PATCH 14/17] Disable pylint tests for now --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 94dd126..d33da94 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,13 +16,13 @@ before_install: - sudo make install - sudo ldconfig - cd .. - - pip install pylint +# - pip install pylint - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2; fi script: - CFLAGS="-Werror -Wall -Wextra" python setup.py test - MAXMINDDB_PURE_PYTHON=1 python setup.py test - - if [[ $TRAVIS_PYTHON_VERSION != '3.3' ]]; then pylint maxminddb; fi +# - if [[ $TRAVIS_PYTHON_VERSION != '3.3' ]]; then pylint maxminddb; fi notifications: email: From b8f33cea83ff41b7dfb55212698f218071940fc9 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 15:23:52 -0800 Subject: [PATCH 15/17] Actually use .pylintrc --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index d33da94..c3264fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,13 +16,13 @@ before_install: - sudo make install - sudo ldconfig - cd .. -# - pip install pylint + - pip install pylint - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2; fi script: - CFLAGS="-Werror -Wall -Wextra" python setup.py test - MAXMINDDB_PURE_PYTHON=1 python setup.py test -# - if [[ $TRAVIS_PYTHON_VERSION != '3.3' ]]; then pylint maxminddb; fi + - if [[ $TRAVIS_PYTHON_VERSION != '3.3' ]]; then pylint --rcfile .pylintrc maxminddb/*.py; fi notifications: email: From 13475ef62e78010c81b5918a9df8c082274c1020 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Mon, 10 Feb 2014 15:51:47 -0800 Subject: [PATCH 16/17] Only run pylint on 2.7 for simplicity --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c3264fb..b154a51 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ before_install: script: - CFLAGS="-Werror -Wall -Wextra" python setup.py test - MAXMINDDB_PURE_PYTHON=1 python setup.py test - - if [[ $TRAVIS_PYTHON_VERSION != '3.3' ]]; then pylint --rcfile .pylintrc maxminddb/*.py; fi + - if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pylint --rcfile .pylintrc maxminddb/*.py; fi notifications: email: From 9eddc72bd61e2ccc5483671efa56d687f8b9e8e8 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Tue, 11 Feb 2014 07:33:36 -0800 Subject: [PATCH 17/17] Updated error docs --- maxminddb/errors.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/maxminddb/errors.py b/maxminddb/errors.py index 3e094f5..f04ff02 100644 --- a/maxminddb/errors.py +++ b/maxminddb/errors.py @@ -1,4 +1,9 @@ -"""This module contains custom errors for the MaxMind DB reader""" +""" +maxminddb.errors +~~~~~~~~~~~~~~~~ + +This module contains custom errors for the MaxMind DB reader +""" class InvalidDatabaseError(RuntimeError):