Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,22 @@
History
-------

1.2.0 (2015-04-XX)
++++++++++++++++++

* Previously if ``MODE_FILE`` was used and the database was loaded before
forking, the parent and children would use the same file table entry without
locking causing errors reading the database due to the offset being changed
by other processes. In ``MODE_FILE``, the reader will now use ``os.pread``
when available and a lock when ``os.pread`` is not available (e.g., Python
2). If you are using ``MODE_FILE`` on a Python without ``os.pread``, it is
recommended that you open the database after forking to reduce resource
contention.
* The ``Metadata`` class now overloads ``__repr__`` to provide a useful
representation of the contents when debugging.
* An ``InvalidDatabaseError`` will now be thrown if the data type read from
the database is invalid. Previously a ``KeyError`` was thrown.

1.1.1 (2014-12-10)
++++++++++++++++++

Expand Down
14 changes: 3 additions & 11 deletions maxminddb/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,10 @@

# pylint: skip-file

is_py2 = sys.version_info[0] == 2

is_py3_3_or_better = (
sys.version_info[0] >= 3 and sys.version_info[1] >= 3)

if is_py2 and not is_py3_3_or_better:
if sys.version_info[0] == 2:
import ipaddr as ipaddress # pylint:disable=F0401
ipaddress.ip_address = ipaddress.IPAddress
else:
import ipaddress # pylint:disable=F0401


if is_py2:
int_from_byte = ord

FileNotFoundError = IOError
Expand All @@ -25,8 +16,9 @@ def int_from_bytes(b):
return 0

byte_from_int = chr

else:
import ipaddress # pylint:disable=F0401

int_from_byte = lambda x: x

FileNotFoundError = FileNotFoundError
Expand Down
4 changes: 4 additions & 0 deletions maxminddb/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ def decode(self, offset):
if not type_num:
(type_num, new_offset) = self._read_extended(new_offset)

if not type_num in self._type_decoder:
raise InvalidDatabaseError('Unexpected type number ({type}) '
'encountered'.format(type=type_num))

(size, new_offset) = self._size_from_ctrl_byte(
ctrl_byte, new_offset, type_num)
return self._type_decoder[type_num](self, size, new_offset)
Expand Down
41 changes: 34 additions & 7 deletions maxminddb/file.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"""This is intended for internal use only"""
"""For internal use only. It provides a slice-like file reader."""

import os

try:
from multiprocessing import Lock
except ImportError:
from threading import Lock


class FileBuffer(object):

Expand All @@ -10,21 +15,20 @@ class FileBuffer(object):
def __init__(self, database):
self._handle = open(database, 'rb')
self._size = os.fstat(self._handle.fileno()).st_size
if not hasattr(os, 'pread'):
self._lock = Lock()

def __getitem__(self, key):
if isinstance(key, slice):
self._handle.seek(key.start)
return self._handle.read(key.stop - key.start)
return self._read(key.stop - key.start, key.start)
elif isinstance(key, int):
self._handle.seek(key)
return self._handle.read(1)
return self._read(1, key)
else:
raise TypeError("Invalid argument type.")

def rfind(self, needle, start):
"""Reverse find needle from start"""
self._handle.seek(start)
pos = self._handle.read(self._size - start - 1).rfind(needle)
pos = self._read(self._size - start - 1, start).rfind(needle)
if pos == -1:
return pos
return start + pos
Expand All @@ -36,3 +40,26 @@ def size(self):
def close(self):
"""Close file"""
self._handle.close()

if hasattr(os, 'pread'):

def _read(self, buffersize, offset):
"""read that uses pread"""
# pylint: disable=no-member
return os.pread(self._handle.fileno(), buffersize, offset)

else:

def _read(self, buffersize, offset):
"""read with a lock

This lock is necessary as after a fork, the different processes
will share the same file table entry, even if we dup the fd, and
as such the same offsets. There does not appear to be a way to
duplicate the file table entry and we cannot re-open based on the
original path as that file may have replaced with another or
unlinked.
"""
with self._lock:
self._handle.seek(offset)
return self._handle.read(buffersize)
11 changes: 10 additions & 1 deletion maxminddb/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def __init__(self, database, mode=MODE_AUTO):
metadata_start += len(self._METADATA_START_MARKER)
metadata_decoder = Decoder(self._buffer, metadata_start)
(metadata, _) = metadata_decoder.decode(metadata_start)
self._metadata = Metadata(**metadata) # pylint: disable=star-args
self._metadata = Metadata(
**metadata) # pylint: disable=bad-option-value

self._decoder = Decoder(self._buffer, self._metadata.search_tree_size
+ self._DATA_SECTION_SEPARATOR_SIZE)
Expand Down Expand Up @@ -176,6 +177,7 @@ def _resolve_data_pointer(self, pointer):

def close(self):
"""Closes the MaxMind DB file and returns the resources to the system"""
# pylint: disable=unidiomatic-typecheck
if type(self._buffer) not in (str, bytes):
self._buffer.close()

Expand Down Expand Up @@ -210,3 +212,10 @@ def node_byte_size(self):
def search_tree_size(self):
"""The size of the search tree"""
return self.node_count * self.node_byte_size

def __repr__(self):
args = ', '.join('%s=%r' % x for x in self.__dict__.items())
return '{module}.{class_name}({data})'.format(
module=self.__module__,
class_name=self.__class__.__name__,
data=args)
2 changes: 1 addition & 1 deletion tests/data
Submodule data updated 37 files
+1 −0 .gitignore
+11 −0 .perltidyallrc
+7 −0 .tidyallrc
+52 −20 MaxMind-DB-spec.md
+0 −46 bin/regen-github-pages
+32 −0 source-data/GeoIP2-Anonymous-IP-Test.json
+12,376 −239 source-data/GeoIP2-City-Test.json
+102 −0 source-data/GeoIP2-Connection-Type-Test.json
+10,975 −0 source-data/GeoIP2-Country-Test.json
+452 −0 source-data/GeoIP2-Domain-Test.json
+12,585 −0 source-data/GeoIP2-ISP-Test.json
+12,454 −0 source-data/GeoIP2-Precision-City-Test.json
+8 −0 source-data/README
+ test-data/GeoIP2-Anonymous-IP-Test.mmdb
+ test-data/GeoIP2-City-Test-Broken-Double-Format.mmdb
+ test-data/GeoIP2-City-Test.mmdb
+ test-data/GeoIP2-Connection-Type-Test.mmdb
+ test-data/GeoIP2-Country-Test.mmdb
+ test-data/GeoIP2-Domain-Test.mmdb
+ test-data/GeoIP2-ISP-Test.mmdb
+ test-data/GeoIP2-Precision-City-Test.mmdb
+ test-data/MaxMind-DB-no-ipv4-search-tree.mmdb
+ test-data/MaxMind-DB-string-value-entries.mmdb
+ test-data/MaxMind-DB-test-broken-pointers-24.mmdb
+ test-data/MaxMind-DB-test-broken-search-tree-24.mmdb
+ test-data/MaxMind-DB-test-decoder.mmdb
+ test-data/MaxMind-DB-test-ipv4-24.mmdb
+ test-data/MaxMind-DB-test-ipv4-28.mmdb
+ test-data/MaxMind-DB-test-ipv4-32.mmdb
+ test-data/MaxMind-DB-test-ipv6-24.mmdb
+ test-data/MaxMind-DB-test-ipv6-28.mmdb
+ test-data/MaxMind-DB-test-ipv6-32.mmdb
+ test-data/MaxMind-DB-test-mixed-24.mmdb
+ test-data/MaxMind-DB-test-mixed-28.mmdb
+ test-data/MaxMind-DB-test-mixed-32.mmdb
+ test-data/MaxMind-DB-test-nested.mmdb
+113 −45 test-data/write-test-data.pl
41 changes: 41 additions & 0 deletions tests/reader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@

from __future__ import unicode_literals

import logging
import os
import sys
import threading

from multiprocessing import Process, Pipe

import maxminddb

Expand Down Expand Up @@ -233,6 +237,43 @@ def test_closed_metadata(self):
self.assertIsNotNone(
metadata, 'pure Python implementation returns value')

def test_multiprocessing(self):
self._check_concurrency(Process)

def test_threading(self):
self._check_concurrency(threading.Thread)

def _check_concurrency(self, worker_class):
reader = open_database(
'tests/data/test-data/GeoIP2-Domain-Test.mmdb',
self.mode
)

def lookup(pipe):
try:
for i in range(32):
reader.get('65.115.240.{i}'.format(i=i))
pipe.send(1)
except:
pipe.send(0)
finally:
if worker_class is Process:
reader.close()
pipe.close()

pipes = [Pipe() for _ in range(32)]
procs = [worker_class(target=lookup, args=(c,)) for (p, c) in pipes]
for proc in procs:
proc.start()
for proc in procs:
proc.join()

reader.close()

count = sum([p.recv() for (p, c) in pipes])

self.assertEqual(count, 32, 'expected number of successful lookups')

def _check_metadata(self, reader, ip_version, record_size):
metadata = reader.metadata()

Expand Down