aec685b Mar 25, 2015
@behackett @ajdavis @shearic @rozza @tompko @llvtt @zewt
293 lines (234 sloc) 9.06 KB
# Copyright 2009-2015 MongoDB, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for working with MongoDB `ObjectIds
import binascii
import calendar
import datetime
import hashlib
import os
import random
import socket
import struct
import threading
import time
from bson.errors import InvalidId
from bson.py3compat import PY3, bytes_from_hex, string_type, text_type
from bson.tz_util import utc
def _machine_bytes():
"""Get the machine portion of an ObjectId.
machine_hash = hashlib.md5()
if PY3:
# gethostname() returns a unicode string in python 3.x
# while update() requires a byte string.
# Calling encode() here will fail with non-ascii hostnames
return machine_hash.digest()[0:3]
def _raise_invalid_id(oid):
raise InvalidId(
"%r is not a valid ObjectId, it must be a 12-byte input"
" or a 24-character hex string" % oid)
class ObjectId(object):
"""A MongoDB ObjectId.
_inc = random.randint(0, 0xFFFFFF)
_inc_lock = threading.Lock()
_machine_bytes = _machine_bytes()
__slots__ = ('__id')
_type_marker = 7
def __init__(self, oid=None):
"""Initialize a new ObjectId.
An ObjectId is a 12-byte unique identifier consisting of:
- a 4-byte value representing the seconds since the Unix epoch,
- a 3-byte machine identifier,
- a 2-byte process id, and
- a 3-byte counter, starting with a random value.
By default, ``ObjectId()`` creates a new unique identifier. The
optional parameter `oid` can be an :class:`ObjectId`, or any 12
:class:`bytes` or, in Python 2, any 12-character :class:`str`.
For example, the 12 bytes b'foo-bar-quux' do not follow the ObjectId
specification but they are acceptable input::
>>> ObjectId(b'foo-bar-quux')
`oid` can also be a :class:`unicode` or :class:`str` of 24 hex digits::
>>> ObjectId('0123456789ab0123456789ab')
>>> # A u-prefixed unicode literal:
>>> ObjectId(u'0123456789ab0123456789ab')
Raises :class:`~bson.errors.InvalidId` if `oid` is not 12 bytes nor
24 hex digits, or :class:`TypeError` if `oid` is not an accepted type.
- `oid` (optional): a valid ObjectId.
.. mongodoc:: objectids
if oid is None:
elif isinstance(oid, bytes) and len(oid) == 12:
self.__id = oid
def from_datetime(cls, generation_time):
"""Create a dummy ObjectId instance with a specific generation time.
This method is useful for doing range queries on a field
containing :class:`ObjectId` instances.
.. warning::
It is not safe to insert a document containing an ObjectId
generated using this method. This method deliberately
eliminates the uniqueness guarantee that ObjectIds
generally provide. ObjectIds generated with this method
should be used exclusively in queries.
`generation_time` will be converted to UTC. Naive datetime
instances will be treated as though they already contain UTC.
An example using this helper to get documents where ``"_id"``
was generated before January 1, 2010 would be:
>>> gen_time = datetime.datetime(2010, 1, 1)
>>> dummy_id = ObjectId.from_datetime(gen_time)
>>> result = collection.find({"_id": {"$lt": dummy_id}})
- `generation_time`: :class:`~datetime.datetime` to be used
as the generation time for the resulting ObjectId.
if generation_time.utcoffset() is not None:
generation_time = generation_time - generation_time.utcoffset()
timestamp = calendar.timegm(generation_time.timetuple())
oid = struct.pack(
">i", int(timestamp)) + b"\x00\x00\x00\x00\x00\x00\x00\x00"
return cls(oid)
def is_valid(cls, oid):
"""Checks if a `oid` string is valid or not.
- `oid`: the object id to validate
.. versionadded:: 2.3
if not oid:
return False
return True
except (InvalidId, TypeError):
return False
def __generate(self):
"""Generate a new value for this ObjectId.
# 4 bytes current time
oid = struct.pack(">i", int(time.time()))
# 3 bytes machine
oid += ObjectId._machine_bytes
# 2 bytes pid
oid += struct.pack(">H", os.getpid() % 0xFFFF)
# 3 bytes inc
with ObjectId._inc_lock:
oid += struct.pack(">i", ObjectId._inc)[1:4]
ObjectId._inc = (ObjectId._inc + 1) % 0xFFFFFF
self.__id = oid
def __validate(self, oid):
"""Validate and use the given id for this ObjectId.
Raises TypeError if id is not an instance of
(:class:`basestring` (:class:`str` or :class:`bytes`
in python 3), ObjectId) and InvalidId if it is not a
valid ObjectId.
- `oid`: a valid ObjectId
if isinstance(oid, ObjectId):
self.__id = oid.binary
# bytes or unicode in python 2, str in python 3
elif isinstance(oid, string_type):
if len(oid) == 24:
self.__id = bytes_from_hex(oid)
except (TypeError, ValueError):
raise TypeError("id must be an instance of (bytes, %s, ObjectId), "
"not %s" % (text_type.__name__, type(oid)))
def binary(self):
"""12-byte binary representation of this ObjectId.
return self.__id
def generation_time(self):
"""A :class:`datetime.datetime` instance representing the time of
generation for this :class:`ObjectId`.
The :class:`datetime.datetime` is timezone aware, and
represents the generation time in UTC. It is precise to the
timestamp = struct.unpack(">i", self.__id[0:4])[0]
return datetime.datetime.fromtimestamp(timestamp, utc)
def __getstate__(self):
"""return value of object for pickling.
needed explicitly because __slots__() defined.
return self.__id
def __setstate__(self, value):
"""explicit state set from pickling
# Provide backwards compatability with OIDs
# pickled with pymongo-1.9 or older.
if isinstance(value, dict):
oid = value["_ObjectId__id"]
oid = value
# ObjectIds pickled in python 2.x used `str` for __id.
# In python 3.x this has to be converted to `bytes`
# by encoding latin-1.
if PY3 and isinstance(oid, text_type):
self.__id = oid.encode('latin-1')
self.__id = oid
def __str__(self):
if PY3:
return binascii.hexlify(self.__id).decode()
return binascii.hexlify(self.__id)
def __repr__(self):
return "ObjectId('%s')" % (str(self),)
def __eq__(self, other):
if isinstance(other, ObjectId):
return self.__id == other.binary
return NotImplemented
def __ne__(self, other):
if isinstance(other, ObjectId):
return self.__id != other.binary
return NotImplemented
def __lt__(self, other):
if isinstance(other, ObjectId):
return self.__id < other.binary
return NotImplemented
def __le__(self, other):
if isinstance(other, ObjectId):
return self.__id <= other.binary
return NotImplemented
def __gt__(self, other):
if isinstance(other, ObjectId):
return self.__id > other.binary
return NotImplemented
def __ge__(self, other):
if isinstance(other, ObjectId):
return self.__id >= other.binary
return NotImplemented
def __hash__(self):
"""Get a hash value for this :class:`ObjectId`."""
return hash(self.__id)