Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[MESSAGES CONTROL]
disable=R0201,W0105

[BASIC]

no-docstring-rgx=_.*
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ python:
- 2.6
- 2.7
- 3.3
- pypy

before_install:
- git submodule update --init --recursive
Expand All @@ -15,10 +16,13 @@ before_install:
- sudo make install
- sudo ldconfig
- cd ..
- pip install pylint
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2; fi

script:
- CFLAGS="-Werror -Wall -Wextra" python setup.py test
- MAXMINDDB_PURE_PYTHON=1 python setup.py test
- if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pylint --rcfile .pylintrc maxminddb/*.py; fi

notifications:
email:
Expand Down
31 changes: 19 additions & 12 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
===========================
MaxMind DB Python Extension
===========================
========================
MaxMind DB Python Module
========================

Beta Note
---------
Expand All @@ -10,17 +10,21 @@ release.
Description
-----------

This is a Python extension for reading MaxMind DB files. MaxMind DB is a
binary file format that stores data indexed by IP address subnets (IPv4 or
IPv6).
This is a Python module for reading MaxMind DB files. The module includes both
a pure Python reader and an optional C extension.

MaxMind DB is a binary file format that stores data indexed by IP address
subnets (IPv4 or IPv6).

Installation
------------

You must have the `libmaxminddb <https://github.com/maxmind/libmaxminddb>`_ C
library installed before installing this extension.
If you want to use the C extension, you must first install `libmaxminddb
<https://github.com/maxmind/libmaxminddb>`_ C library installed before
installing this extension. If the library is not available, the module will
fall-back to a pure Python implementation.

To install the MaxMind DB extension, type:
To install maxminddb, type:

.. code-block:: bash

Expand Down Expand Up @@ -74,13 +78,16 @@ invalid IP address or an IPv6 address in an IPv4 database.
Requirements
------------

This code requires CPython 2.6+ or 3.3+. Older versions are not supported.
This code requires Python 2.6+ or 3.3+. The C extension requires CPython. The
pure Python implementation has been tested with PyPy.

On Python 2, the `ipaddr module <https://code.google.com/p/ipaddr-py/>`_ is
required.

Versioning
----------

The MaxMind DB Python extension uses
`Semantic Versioning <http://semver.org/>`_.
The MaxMind DB Python module uses `Semantic Versioning <http://semver.org/>`_.

Support
-------
Expand Down
17 changes: 17 additions & 0 deletions maxminddb/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# pylint:disable=C0111
import os

try:
if os.environ.get('MAXMINDDB_PURE_PYTHON'):
raise ImportError()
from maxminddb.extension import Reader, InvalidDatabaseError
except ImportError:
from maxminddb.decoder import InvalidDatabaseError
from maxminddb.reader import Reader


__title__ = 'maxminddb'
__version__ = '0.3.0'
__author__ = 'Gregory Oschwald'
__license__ = 'LGPLv2.1+'
__copyright__ = 'Copyright 2014 Maxmind, Inc.'
36 changes: 36 additions & 0 deletions maxminddb/compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import sys

# pylint: skip-file

is_py2 = sys.version_info[0] == 2

is_py3_3_or_better = (
sys.version_info[0] >= 3 and sys.version_info[1] >= 3)

if is_py2 and not is_py3_3_or_better:
import ipaddr as ipaddress # pylint:disable=F0401
ipaddress.ip_address = ipaddress.IPAddress
else:
import ipaddress # pylint:disable=F0401


if is_py2:
int_from_byte = ord

FileNotFoundError = IOError

def int_from_bytes(b):
if b:
return int(b.encode("hex"), 16)
return 0

byte_from_int = chr

else:
int_from_byte = lambda x: x

FileNotFoundError = FileNotFoundError

int_from_bytes = lambda x: int.from_bytes(x, 'big')

byte_from_int = lambda x: bytes([x])
167 changes: 167 additions & 0 deletions maxminddb/decoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
"""
maxminddb.decoder
~~~~~~~~~~~~~~~~~

This package contains code for decoding the MaxMind DB data section.

"""
from __future__ import unicode_literals

import struct

from maxminddb.compat import byte_from_int, int_from_bytes
from maxminddb.errors import InvalidDatabaseError


class Decoder(object): # pylint: disable=too-few-public-methods

"""Decoder for the data section of the MaxMind DB"""

def __init__(self, database_buffer, pointer_base=0, pointer_test=False):
"""Created a Decoder for a MaxMind DB

Arguments:
database_buffer -- an mmap'd MaxMind DB file.
pointer_base -- the base number to use when decoding a pointer
pointer_test -- used for internal unit testing of pointer code
"""
self._pointer_test = pointer_test
self._buffer = database_buffer
self._pointer_base = pointer_base

def _decode_array(self, size, offset):
array = []
for _ in range(size):
(value, offset) = self.decode(offset)
array.append(value)
return array, offset

def _decode_boolean(self, size, offset):
return size != 0, offset

def _decode_bytes(self, size, offset):
new_offset = offset + size
return self._buffer[offset:new_offset], new_offset

# pylint: disable=no-self-argument
# |-> I am open to better ways of doing this as long as it doesn't involve
# lots of code duplication.
def _decode_packed_type(type_code, type_size, pad=False):
# pylint: disable=protected-access, missing-docstring
def unpack_type(self, size, offset):
if not pad:
self._verify_size(size, type_size)
new_offset = offset + type_size
packed_bytes = self._buffer[offset:new_offset]
if pad:
packed_bytes = packed_bytes.rjust(type_size, b'\x00')
(value,) = struct.unpack(type_code, packed_bytes)
return value, new_offset
return unpack_type

def _decode_map(self, size, offset):
container = {}
for _ in range(size):
(key, offset) = self.decode(offset)
(value, offset) = self.decode(offset)
container[key] = value
return container, offset

_pointer_value_offset = {
1: 0,
2: 2048,
3: 526336,
4: 0,
}

def _decode_pointer(self, size, offset):
pointer_size = ((size >> 3) & 0x3) + 1
new_offset = offset + pointer_size
pointer_bytes = self._buffer[offset:new_offset]
packed = pointer_bytes if pointer_size == 4 else struct.pack(
b'!c', byte_from_int(size & 0x7)) + pointer_bytes
unpacked = int_from_bytes(packed)
pointer = unpacked + self._pointer_base + \
self._pointer_value_offset[pointer_size]
if self._pointer_test:
return pointer, new_offset
(value, _) = self.decode(pointer)
return value, new_offset

def _decode_uint(self, size, offset):
new_offset = offset + size
uint_bytes = self._buffer[offset:new_offset]
return int_from_bytes(uint_bytes), new_offset

def _decode_utf8_string(self, size, offset):
new_offset = offset + size
return self._buffer[offset:new_offset].decode('utf-8'), new_offset

_type_decoder = {
1: _decode_pointer,
2: _decode_utf8_string,
3: _decode_packed_type(b'!d', 8), # double,
4: _decode_bytes,
5: _decode_uint, # uint16
6: _decode_uint, # uint32
7: _decode_map,
8: _decode_packed_type(b'!i', 4, pad=True), # int32
9: _decode_uint, # uint64
10: _decode_uint, # uint128
11: _decode_array,
14: _decode_boolean,
15: _decode_packed_type(b'!f', 4), # float,
}

def decode(self, offset):
"""Decode a section of the data section starting at offset

Arguments:
offset -- the location of the data structure to decode
"""
new_offset = offset + 1
(ctrl_byte,) = struct.unpack(b'!B', self._buffer[offset:new_offset])
type_num = ctrl_byte >> 5
# Extended type
if not type_num:
(type_num, new_offset) = self._read_extended(new_offset)

(size, new_offset) = self._size_from_ctrl_byte(
ctrl_byte, new_offset, type_num)
return self._type_decoder[type_num](self, size, new_offset)

def _read_extended(self, offset):
(next_byte,) = struct.unpack(b'!B', self._buffer[offset:offset + 1])
type_num = next_byte + 7
if type_num < 7:
raise InvalidDatabaseError(
'Something went horribly wrong in the decoder. An '
'extended type resolved to a type number < 8 '
'({type})'.format(type=type_num))
return next_byte + 7, offset + 1

def _verify_size(self, expected, actual):
if expected != actual:
raise InvalidDatabaseError(
'The MaxMind DB file\'s data section contains bad data '
'(unknown data type or corrupt data)'
)

def _size_from_ctrl_byte(self, ctrl_byte, offset, type_num):
size = ctrl_byte & 0x1f
if type_num == 1:
return size, offset
bytes_to_read = 0 if size < 29 else size - 28
size_bytes = self._buffer[offset:offset + bytes_to_read]

# Using unpack rather than int_from_bytes as it is about 200 lookups
# per second faster here.
if size == 29:
size = 29 + struct.unpack(b'!B', size_bytes)[0]
elif size == 30:
size = 285 + struct.unpack(b'!H', size_bytes)[0]
elif size > 30:
size = struct.unpack(
b'!I', size_bytes.rjust(4, b'\x00'))[0] + 65821

return size, offset + bytes_to_read
11 changes: 11 additions & 0 deletions maxminddb/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
maxminddb.errors
~~~~~~~~~~~~~~~~

This module contains custom errors for the MaxMind DB reader
"""


class InvalidDatabaseError(RuntimeError):

"""This error is thrown when unexpected data is found in the database."""
Loading