Permalink
507f954 Dec 2, 2017
@behackett @ajdavis @ShaneHarvey @rozza @puentesarrin
858 lines (715 sloc) 32 KB
# Copyright 2009-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for using Python's :mod:`json` module with BSON documents.
This module provides two helper methods `dumps` and `loads` that wrap the
native :mod:`json` methods and provide explicit BSON conversion to and from
JSON. :class:`~bson.json_util.JSONOptions` provides a way to control how JSON
is emitted and parsed, with the default being the legacy PyMongo format.
:mod:`~bson.json_util` can also generate Canonical or Relaxed `Extended JSON`_
when :const:`CANONICAL_JSON_OPTIONS` or :const:`RELAXED_JSON_OPTIONS` is
provided, respectively.
.. _Extended JSON: https://github.com/mongodb/specifications/blob/master/source/extended-json.rst
Example usage (deserialization):
.. doctest::
>>> from bson.json_util import loads
>>> loads('[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "80", "$binary": "AQIDBA=="}}]')
[{u'foo': [1, 2]}, {u'bar': {u'hello': u'world'}}, {u'code': Code('function x() { return 1; }', {})}, {u'bin': Binary('...', 128)}]
Example usage (serialization):
.. doctest::
>>> from bson import Binary, Code
>>> from bson.json_util import dumps
>>> dumps([{'foo': [1, 2]},
... {'bar': {'hello': 'world'}},
... {'code': Code("function x() { return 1; }", {})},
... {'bin': Binary(b"\x01\x02\x03\x04")}])
'[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }", "$scope": {}}}, {"bin": {"$binary": "AQIDBA==", "$type": "00"}}]'
Example usage (with :const:`CANONICAL_JSON_OPTIONS`):
.. doctest::
>>> from bson import Binary, Code
>>> from bson.json_util import dumps, CANONICAL_JSON_OPTIONS
>>> dumps([{'foo': [1, 2]},
... {'bar': {'hello': 'world'}},
... {'code': Code("function x() { return 1; }")},
... {'bin': Binary(b"\x01\x02\x03\x04")}],
... json_options=CANONICAL_JSON_OPTIONS)
'[{"foo": [{"$numberInt": "1"}, {"$numberInt": "2"}]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]'
Example usage (with :const:`RELAXED_JSON_OPTIONS`):
.. doctest::
>>> from bson import Binary, Code
>>> from bson.json_util import dumps, RELAXED_JSON_OPTIONS
>>> dumps([{'foo': [1, 2]},
... {'bar': {'hello': 'world'}},
... {'code': Code("function x() { return 1; }")},
... {'bin': Binary(b"\x01\x02\x03\x04")}],
... json_options=RELAXED_JSON_OPTIONS)
'[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]'
Alternatively, you can manually pass the `default` to :func:`json.dumps`.
It won't handle :class:`~bson.binary.Binary` and :class:`~bson.code.Code`
instances (as they are extended strings you can't provide custom defaults),
but it will be faster as there is less recursion.
.. note::
If your application does not need the flexibility offered by
:class:`JSONOptions` and spends a large amount of time in the `json_util`
module, look to
`python-bsonjs <https://pypi.python.org/pypi/python-bsonjs>`_ for a nice
performance improvement. `python-bsonjs` is a fast BSON to MongoDB
Extended JSON converter for Python built on top of
`libbson <https://github.com/mongodb/libbson>`_. `python-bsonjs` works best
with PyMongo when using :class:`~bson.raw_bson.RawBSONDocument`.
.. versionchanged:: 2.8
The output format for :class:`~bson.timestamp.Timestamp` has changed from
'{"t": <int>, "i": <int>}' to '{"$timestamp": {"t": <int>, "i": <int>}}'.
This new format will be decoded to an instance of
:class:`~bson.timestamp.Timestamp`. The old format will continue to be
decoded to a python dict as before. Encoding to the old format is no longer
supported as it was never correct and loses type information.
Added support for $numberLong and $undefined - new in MongoDB 2.6 - and
parsing $date in ISO-8601 format.
.. versionchanged:: 2.7
Preserves order when rendering SON, Timestamp, Code, Binary, and DBRef
instances.
.. versionchanged:: 2.3
Added dumps and loads helpers to automatically handle conversion to and
from json and supports :class:`~bson.binary.Binary` and
:class:`~bson.code.Code`
"""
import base64
import datetime
import math
import re
import sys
import uuid
if sys.version_info[:2] == (2, 6):
# In Python 2.6, json does not include object_pairs_hook. Use simplejson
# instead.
try:
import simplejson as json
except ImportError:
import json
else:
import json
from pymongo.errors import ConfigurationError
import bson
from bson import EPOCH_AWARE, EPOCH_NAIVE, RE_TYPE, SON
from bson.binary import (Binary, JAVA_LEGACY, CSHARP_LEGACY, OLD_UUID_SUBTYPE,
UUID_SUBTYPE)
from bson.code import Code
from bson.codec_options import CodecOptions
from bson.dbref import DBRef
from bson.decimal128 import Decimal128
from bson.int64 import Int64
from bson.max_key import MaxKey
from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.py3compat import (PY3, iteritems, integer_types, string_type,
text_type)
from bson.regex import Regex
from bson.timestamp import Timestamp
from bson.tz_util import utc
try:
json.loads("{}", object_pairs_hook=dict)
_HAS_OBJECT_PAIRS_HOOK = True
except TypeError:
_HAS_OBJECT_PAIRS_HOOK = False
_RE_OPT_TABLE = {
"i": re.I,
"l": re.L,
"m": re.M,
"s": re.S,
"u": re.U,
"x": re.X,
}
# Dollar-prefixed keys which may appear in DBRefs.
_DBREF_KEYS = frozenset(['$id', '$ref', '$db'])
class DatetimeRepresentation:
LEGACY = 0
"""Legacy MongoDB Extended JSON datetime representation.
:class:`datetime.datetime` instances will be encoded to JSON in the
format `{"$date": <dateAsMilliseconds>}`, where `dateAsMilliseconds` is
a 64-bit signed integer giving the number of milliseconds since the Unix
epoch UTC. This was the default encoding before PyMongo version 3.4.
.. versionadded:: 3.4
"""
NUMBERLONG = 1
"""NumberLong datetime representation.
:class:`datetime.datetime` instances will be encoded to JSON in the
format `{"$date": {"$numberLong": "<dateAsMilliseconds>"}}`,
where `dateAsMilliseconds` is the string representation of a 64-bit signed
integer giving the number of milliseconds since the Unix epoch UTC.
.. versionadded:: 3.4
"""
ISO8601 = 2
"""ISO-8601 datetime representation.
:class:`datetime.datetime` instances greater than or equal to the Unix
epoch UTC will be encoded to JSON in the format `{"$date": "<ISO-8601>"}`.
:class:`datetime.datetime` instances before the Unix epoch UTC will be
encoded as if the datetime representation is
:const:`~DatetimeRepresentation.NUMBERLONG`.
.. versionadded:: 3.4
"""
class JSONMode:
LEGACY = 0
"""Legacy Extended JSON representation.
In this mode, :func:`~bson.json_util.dumps` produces PyMongo's legacy
non-standard JSON output. Consider using
:const:`~bson.json_util.JSONMode.RELAXED` or
:const:`~bson.json_util.JSONMode.CANONICAL` instead.
.. versionadded:: 3.5
"""
RELAXED = 1
"""Relaxed Extended JSON representation.
In this mode, :func:`~bson.json_util.dumps` produces Relaxed Extended JSON,
a mostly JSON-like format. Consider using this for things like a web API,
where one is sending a document (or a projection of a document) that only
uses ordinary JSON type primitives. In particular, the ``int``,
:class:`~bson.int64.Int64`, and ``float`` numeric types are represented in
the native JSON number format. This output is also the most human readable
and is useful for debugging and documentation.
.. seealso:: The specification for Relaxed `Extended JSON`_.
.. versionadded:: 3.5
"""
CANONICAL = 2
"""Canonical Extended JSON representation.
In this mode, :func:`~bson.json_util.dumps` produces Canonical Extended
JSON, a type preserving format. Consider using this for things like
testing, where one has to precisely specify expected types in JSON. In
particular, the ``int``, :class:`~bson.int64.Int64`, and ``float`` numeric
types are encoded with type wrappers.
.. seealso:: The specification for Canonical `Extended JSON`_.
.. versionadded:: 3.5
"""
class JSONOptions(CodecOptions):
"""Encapsulates JSON options for :func:`dumps` and :func:`loads`.
Raises :exc:`~pymongo.errors.ConfigurationError` on Python 2.6 if
`simplejson >= 2.1.0 <https://pypi.python.org/pypi/simplejson>`_ is not
installed and document_class is not the default (:class:`dict`).
:Parameters:
- `strict_number_long`: If ``True``, :class:`~bson.int64.Int64` objects
are encoded to MongoDB Extended JSON's *Strict mode* type
`NumberLong`, ie ``'{"$numberLong": "<number>" }'``. Otherwise they
will be encoded as an `int`. Defaults to ``False``.
- `datetime_representation`: The representation to use when encoding
instances of :class:`datetime.datetime`. Defaults to
:const:`~DatetimeRepresentation.LEGACY`.
- `strict_uuid`: If ``True``, :class:`uuid.UUID` object are encoded to
MongoDB Extended JSON's *Strict mode* type `Binary`. Otherwise it
will be encoded as ``'{"$uuid": "<hex>" }'``. Defaults to ``False``.
- `json_mode`: The :class:`JSONMode` to use when encoding BSON types to
Extended JSON. Defaults to :const:`~JSONMode.LEGACY`.
- `document_class`: BSON documents returned by :func:`loads` will be
decoded to an instance of this class. Must be a subclass of
:class:`collections.MutableMapping`. Defaults to :class:`dict`.
- `uuid_representation`: The BSON representation to use when encoding
and decoding instances of :class:`uuid.UUID`. Defaults to
:const:`~bson.binary.PYTHON_LEGACY`.
- `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type
`Date` will be decoded to timezone aware instances of
:class:`datetime.datetime`. Otherwise they will be naive. Defaults
to ``True``.
- `tzinfo`: A :class:`datetime.tzinfo` subclass that specifies the
timezone from which :class:`~datetime.datetime` objects should be
decoded. Defaults to :const:`~bson.tz_util.utc`.
- `args`: arguments to :class:`~bson.codec_options.CodecOptions`
- `kwargs`: arguments to :class:`~bson.codec_options.CodecOptions`
.. seealso:: The specification for Relaxed and Canonical `Extended JSON`_.
.. versionadded:: 3.4
.. versionchanged:: 3.5
Accepts the optional parameter `json_mode`.
"""
def __new__(cls, strict_number_long=False,
datetime_representation=DatetimeRepresentation.LEGACY,
strict_uuid=False, json_mode=JSONMode.LEGACY,
*args, **kwargs):
kwargs["tz_aware"] = kwargs.get("tz_aware", True)
if kwargs["tz_aware"]:
kwargs["tzinfo"] = kwargs.get("tzinfo", utc)
if datetime_representation not in (DatetimeRepresentation.LEGACY,
DatetimeRepresentation.NUMBERLONG,
DatetimeRepresentation.ISO8601):
raise ConfigurationError(
"JSONOptions.datetime_representation must be one of LEGACY, "
"NUMBERLONG, or ISO8601 from DatetimeRepresentation.")
self = super(JSONOptions, cls).__new__(cls, *args, **kwargs)
if not _HAS_OBJECT_PAIRS_HOOK and self.document_class != dict:
raise ConfigurationError(
"Support for JSONOptions.document_class on Python 2.6 "
"requires simplejson >= 2.1.0"
"(https://pypi.python.org/pypi/simplejson) to be installed.")
if json_mode not in (JSONMode.LEGACY,
JSONMode.RELAXED,
JSONMode.CANONICAL):
raise ConfigurationError(
"JSONOptions.json_mode must be one of LEGACY, RELAXED, "
"or CANONICAL from JSONMode.")
self.json_mode = json_mode
if self.json_mode == JSONMode.RELAXED:
self.strict_number_long = False
self.datetime_representation = DatetimeRepresentation.ISO8601
self.strict_uuid = True
elif self.json_mode == JSONMode.CANONICAL:
self.strict_number_long = True
self.datetime_representation = DatetimeRepresentation.NUMBERLONG
self.strict_uuid = True
else:
self.strict_number_long = strict_number_long
self.datetime_representation = datetime_representation
self.strict_uuid = strict_uuid
return self
def _arguments_repr(self):
return ('strict_number_long=%r, '
'datetime_representation=%r, '
'strict_uuid=%r, json_mode=%r, %s' % (
self.strict_number_long,
self.datetime_representation,
self.strict_uuid,
self.json_mode,
super(JSONOptions, self)._arguments_repr()))
LEGACY_JSON_OPTIONS = JSONOptions(json_mode=JSONMode.LEGACY)
""":class:`JSONOptions` for encoding to PyMongo's legacy JSON format.
.. seealso:: The documentation for :const:`bson.json_util.JSONMode.LEGACY`.
.. versionadded:: 3.5
"""
DEFAULT_JSON_OPTIONS = LEGACY_JSON_OPTIONS
"""The default :class:`JSONOptions` for JSON encoding/decoding.
The same as :const:`LEGACY_JSON_OPTIONS`. This will change to
:const:`RELAXED_JSON_OPTIONS` in a future release.
.. versionadded:: 3.4
"""
CANONICAL_JSON_OPTIONS = JSONOptions(json_mode=JSONMode.CANONICAL)
""":class:`JSONOptions` for Canonical Extended JSON.
.. seealso:: The documentation for :const:`bson.json_util.JSONMode.CANONICAL`.
.. versionadded:: 3.5
"""
RELAXED_JSON_OPTIONS = JSONOptions(json_mode=JSONMode.RELAXED)
""":class:`JSONOptions` for Relaxed Extended JSON.
.. seealso:: The documentation for :const:`bson.json_util.JSONMode.RELAXED`.
.. versionadded:: 3.5
"""
STRICT_JSON_OPTIONS = JSONOptions(
strict_number_long=True,
datetime_representation=DatetimeRepresentation.ISO8601,
strict_uuid=True)
"""**DEPRECATED** - :class:`JSONOptions` for MongoDB Extended JSON's *Strict
mode* encoding.
.. versionadded:: 3.4
.. versionchanged:: 3.5
Deprecated. Use :const:`RELAXED_JSON_OPTIONS` or
:const:`CANONICAL_JSON_OPTIONS` instead.
"""
def dumps(obj, *args, **kwargs):
"""Helper function that wraps :func:`json.dumps`.
Recursive function that handles all BSON types including
:class:`~bson.binary.Binary` and :class:`~bson.code.Code`.
:Parameters:
- `json_options`: A :class:`JSONOptions` instance used to modify the
encoding of MongoDB Extended JSON types. Defaults to
:const:`DEFAULT_JSON_OPTIONS`.
.. versionchanged:: 3.4
Accepts optional parameter `json_options`. See :class:`JSONOptions`.
.. versionchanged:: 2.7
Preserves order when rendering SON, Timestamp, Code, Binary, and DBRef
instances.
"""
json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS)
return json.dumps(_json_convert(obj, json_options), *args, **kwargs)
def loads(s, *args, **kwargs):
"""Helper function that wraps :func:`json.loads`.
Automatically passes the object_hook for BSON type conversion.
Raises ``TypeError``, ``ValueError``, ``KeyError``, or
:exc:`~bson.errors.InvalidId` on invalid MongoDB Extended JSON.
:Parameters:
- `json_options`: A :class:`JSONOptions` instance used to modify the
decoding of MongoDB Extended JSON types. Defaults to
:const:`DEFAULT_JSON_OPTIONS`.
.. versionchanged:: 3.5
Parses Relaxed and Canonical Extended JSON as well as PyMongo's legacy
format. Now raises ``TypeError`` or ``ValueError`` when parsing JSON
type wrappers with values of the wrong type or any extra keys.
.. versionchanged:: 3.4
Accepts optional parameter `json_options`. See :class:`JSONOptions`.
"""
json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS)
if _HAS_OBJECT_PAIRS_HOOK:
kwargs["object_pairs_hook"] = lambda pairs: object_pairs_hook(
pairs, json_options)
else:
kwargs["object_hook"] = lambda obj: object_hook(obj, json_options)
return json.loads(s, *args, **kwargs)
def _json_convert(obj, json_options=DEFAULT_JSON_OPTIONS):
"""Recursive helper method that converts BSON types so they can be
converted into json.
"""
if hasattr(obj, 'iteritems') or hasattr(obj, 'items'): # PY3 support
return SON(((k, _json_convert(v, json_options))
for k, v in iteritems(obj)))
elif hasattr(obj, '__iter__') and not isinstance(obj, (text_type, bytes)):
return list((_json_convert(v, json_options) for v in obj))
try:
return default(obj, json_options)
except TypeError:
return obj
def object_pairs_hook(pairs, json_options=DEFAULT_JSON_OPTIONS):
return object_hook(json_options.document_class(pairs), json_options)
def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS):
if "$oid" in dct:
return _parse_canonical_oid(dct)
if "$ref" in dct:
return _parse_canonical_dbref(dct)
if "$date" in dct:
return _parse_canonical_datetime(dct, json_options)
if "$regex" in dct:
return _parse_legacy_regex(dct)
if "$minKey" in dct:
return _parse_canonical_minkey(dct)
if "$maxKey" in dct:
return _parse_canonical_maxkey(dct)
if "$binary" in dct:
if "$type" in dct:
return _parse_legacy_binary(dct, json_options)
else:
return _parse_canonical_binary(dct, json_options)
if "$code" in dct:
return _parse_canonical_code(dct)
if "$uuid" in dct:
return _parse_legacy_uuid(dct)
if "$undefined" in dct:
return None
if "$numberLong" in dct:
return _parse_canonical_int64(dct)
if "$timestamp" in dct:
tsp = dct["$timestamp"]
return Timestamp(tsp["t"], tsp["i"])
if "$numberDecimal" in dct:
return _parse_canonical_decimal128(dct)
if "$dbPointer" in dct:
return _parse_canonical_dbpointer(dct)
if "$regularExpression" in dct:
return _parse_canonical_regex(dct)
if "$symbol" in dct:
return _parse_canonical_symbol(dct)
if "$numberInt" in dct:
return _parse_canonical_int32(dct)
if "$numberDouble" in dct:
return _parse_canonical_double(dct)
return dct
def _parse_legacy_regex(doc):
pattern = doc["$regex"]
# Check if this is the $regex query operator.
if isinstance(pattern, Regex):
return doc
flags = 0
# PyMongo always adds $options but some other tools may not.
for opt in doc.get("$options", ""):
flags |= _RE_OPT_TABLE.get(opt, 0)
return Regex(pattern, flags)
def _parse_legacy_uuid(doc):
"""Decode a JSON legacy $uuid to Python UUID."""
if len(doc) != 1:
raise TypeError('Bad $uuid, extra field(s): %s' % (doc,))
return uuid.UUID(doc["$uuid"])
def _binary_or_uuid(data, subtype, json_options):
# special handling for UUID
if subtype == OLD_UUID_SUBTYPE:
if json_options.uuid_representation == CSHARP_LEGACY:
return uuid.UUID(bytes_le=data)
if json_options.uuid_representation == JAVA_LEGACY:
data = data[7::-1] + data[:7:-1]
return uuid.UUID(bytes=data)
if subtype == UUID_SUBTYPE:
return uuid.UUID(bytes=data)
if PY3 and subtype == 0:
return data
return Binary(data, subtype)
def _parse_legacy_binary(doc, json_options):
if isinstance(doc["$type"], int):
doc["$type"] = "%02x" % doc["$type"]
subtype = int(doc["$type"], 16)
if subtype >= 0xffffff80: # Handle mongoexport values
subtype = int(doc["$type"][6:], 16)
data = base64.b64decode(doc["$binary"].encode())
return _binary_or_uuid(data, subtype, json_options)
def _parse_canonical_binary(doc, json_options):
binary = doc["$binary"]
b64 = binary["base64"]
subtype = binary["subType"]
if not isinstance(b64, string_type):
raise TypeError('$binary base64 must be a string: %s' % (doc,))
if not isinstance(subtype, string_type) or len(subtype) > 2:
raise TypeError('$binary subType must be a string at most 2 '
'characters: %s' % (doc,))
if len(binary) != 2:
raise TypeError('$binary must include only "base64" and "subType" '
'components: %s' % (doc,))
data = base64.b64decode(b64.encode())
return _binary_or_uuid(data, int(subtype, 16), json_options)
def _parse_canonical_datetime(doc, json_options):
"""Decode a JSON datetime to python datetime.datetime."""
dtm = doc["$date"]
if len(doc) != 1:
raise TypeError('Bad $date, extra field(s): %s' % (doc,))
# mongoexport 2.6 and newer
if isinstance(dtm, string_type):
# Parse offset
if dtm[-1] == 'Z':
dt = dtm[:-1]
offset = 'Z'
elif dtm[-3] == ':':
# (+|-)HH:MM
dt = dtm[:-6]
offset = dtm[-6:]
elif dtm[-5] in ('+', '-'):
# (+|-)HHMM
dt = dtm[:-5]
offset = dtm[-5:]
elif dtm[-3] in ('+', '-'):
# (+|-)HH
dt = dtm[:-3]
offset = dtm[-3:]
else:
dt = dtm
offset = ''
# Parse the optional factional seconds portion.
dot_index = dt.rfind('.')
microsecond = 0
if dot_index != -1:
microsecond = int(float(dt[dot_index:]) * 1000000)
dt = dt[:dot_index]
aware = datetime.datetime.strptime(
dt, "%Y-%m-%dT%H:%M:%S").replace(microsecond=microsecond,
tzinfo=utc)
if offset and offset != 'Z':
if len(offset) == 6:
hours, minutes = offset[1:].split(':')
secs = (int(hours) * 3600 + int(minutes) * 60)
elif len(offset) == 5:
secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60)
elif len(offset) == 3:
secs = int(offset[1:3]) * 3600
if offset[0] == "-":
secs *= -1
aware = aware - datetime.timedelta(seconds=secs)
if json_options.tz_aware:
if json_options.tzinfo:
aware = aware.astimezone(json_options.tzinfo)
return aware
else:
return aware.replace(tzinfo=None)
return bson._millis_to_datetime(int(dtm), json_options)
def _parse_canonical_oid(doc):
"""Decode a JSON ObjectId to bson.objectid.ObjectId."""
if len(doc) != 1:
raise TypeError('Bad $oid, extra field(s): %s' % (doc,))
return ObjectId(doc['$oid'])
def _parse_canonical_symbol(doc):
"""Decode a JSON symbol to Python string."""
symbol = doc['$symbol']
if len(doc) != 1:
raise TypeError('Bad $symbol, extra field(s): %s' % (doc,))
return text_type(symbol)
def _parse_canonical_code(doc):
"""Decode a JSON code to bson.code.Code."""
for key in doc:
if key not in ('$code', '$scope'):
raise TypeError('Bad $code, extra field(s): %s' % (doc,))
return Code(doc['$code'], scope=doc.get('$scope'))
def _parse_canonical_regex(doc):
"""Decode a JSON regex to bson.regex.Regex."""
regex = doc['$regularExpression']
if len(doc) != 1:
raise TypeError('Bad $regularExpression, extra field(s): %s' % (doc,))
if len(regex) != 2:
raise TypeError('Bad $regularExpression must include only "pattern"'
'and "options" components: %s' % (doc,))
return Regex(regex['pattern'], regex['options'])
def _parse_canonical_dbref(doc):
"""Decode a JSON DBRef to bson.dbref.DBRef."""
for key in doc:
if key.startswith('$') and key not in _DBREF_KEYS:
# Other keys start with $, so dct cannot be parsed as a DBRef.
return doc
return DBRef(doc.pop('$ref'), doc.pop('$id'),
database=doc.pop('$db', None), **doc)
def _parse_canonical_dbpointer(doc):
"""Decode a JSON (deprecated) DBPointer to bson.dbref.DBRef."""
dbref = doc['$dbPointer']
if len(doc) != 1:
raise TypeError('Bad $dbPointer, extra field(s): %s' % (doc,))
if isinstance(dbref, DBRef):
dbref_doc = dbref.as_doc()
# DBPointer must not contain $db in its value.
if dbref.database is not None:
raise TypeError(
'Bad $dbPointer, extra field $db: %s' % (dbref_doc,))
if not isinstance(dbref.id, ObjectId):
raise TypeError(
'Bad $dbPointer, $id must be an ObjectId: %s' % (dbref_doc,))
if len(dbref_doc) != 2:
raise TypeError(
'Bad $dbPointer, extra field(s) in DBRef: %s' % (dbref_doc,))
return dbref
else:
raise TypeError('Bad $dbPointer, expected a DBRef: %s' % (doc,))
def _parse_canonical_int32(doc):
"""Decode a JSON int32 to python int."""
i_str = doc['$numberInt']
if len(doc) != 1:
raise TypeError('Bad $numberInt, extra field(s): %s' % (doc,))
if not isinstance(i_str, string_type):
raise TypeError('$numberInt must be string: %s' % (doc,))
return int(i_str)
def _parse_canonical_int64(doc):
"""Decode a JSON int64 to bson.int64.Int64."""
l_str = doc['$numberLong']
if len(doc) != 1:
raise TypeError('Bad $numberLong, extra field(s): %s' % (doc,))
return Int64(l_str)
def _parse_canonical_double(doc):
"""Decode a JSON double to python float."""
d_str = doc['$numberDouble']
if len(doc) != 1:
raise TypeError('Bad $numberDouble, extra field(s): %s' % (doc,))
if not isinstance(d_str, string_type):
raise TypeError('$numberDouble must be string: %s' % (doc,))
return float(d_str)
def _parse_canonical_decimal128(doc):
"""Decode a JSON decimal128 to bson.decimal128.Decimal128."""
d_str = doc['$numberDecimal']
if len(doc) != 1:
raise TypeError('Bad $numberDecimal, extra field(s): %s' % (doc,))
if not isinstance(d_str, string_type):
raise TypeError('$numberDecimal must be string: %s' % (doc,))
return Decimal128(d_str)
def _parse_canonical_minkey(doc):
"""Decode a JSON MinKey to bson.min_key.MinKey."""
if doc['$minKey'] is not 1:
raise TypeError('$minKey value must be 1: %s' % (doc,))
if len(doc) != 1:
raise TypeError('Bad $minKey, extra field(s): %s' % (doc,))
return MinKey()
def _parse_canonical_maxkey(doc):
"""Decode a JSON MaxKey to bson.max_key.MaxKey."""
if doc['$maxKey'] is not 1:
raise TypeError('$maxKey value must be 1: %s', (doc,))
if len(doc) != 1:
raise TypeError('Bad $minKey, extra field(s): %s' % (doc,))
return MaxKey()
def _encode_binary(data, subtype, json_options):
if json_options.json_mode == JSONMode.LEGACY:
return SON([
('$binary', base64.b64encode(data).decode()),
('$type', "%02x" % subtype)])
return {'$binary': SON([
('base64', base64.b64encode(data).decode()),
('subType', "%02x" % subtype)])}
def default(obj, json_options=DEFAULT_JSON_OPTIONS):
# We preserve key order when rendering SON, DBRef, etc. as JSON by
# returning a SON for those types instead of a dict.
if isinstance(obj, ObjectId):
return {"$oid": str(obj)}
if isinstance(obj, DBRef):
return _json_convert(obj.as_doc(), json_options=json_options)
if isinstance(obj, datetime.datetime):
if (json_options.datetime_representation ==
DatetimeRepresentation.ISO8601):
if not obj.tzinfo:
obj = obj.replace(tzinfo=utc)
if obj >= EPOCH_AWARE:
off = obj.tzinfo.utcoffset(obj)
if (off.days, off.seconds, off.microseconds) == (0, 0, 0):
tz_string = 'Z'
else:
tz_string = obj.strftime('%z')
millis = int(obj.microsecond / 1000)
fracsecs = ".%03d" % (millis,) if millis else ""
return {"$date": "%s%s%s" % (
obj.strftime("%Y-%m-%dT%H:%M:%S"), fracsecs, tz_string)}
millis = bson._datetime_to_millis(obj)
if (json_options.datetime_representation ==
DatetimeRepresentation.LEGACY):
return {"$date": millis}
return {"$date": {"$numberLong": str(millis)}}
if json_options.strict_number_long and isinstance(obj, Int64):
return {"$numberLong": str(obj)}
if isinstance(obj, (RE_TYPE, Regex)):
flags = ""
if obj.flags & re.IGNORECASE:
flags += "i"
if obj.flags & re.LOCALE:
flags += "l"
if obj.flags & re.MULTILINE:
flags += "m"
if obj.flags & re.DOTALL:
flags += "s"
if obj.flags & re.UNICODE:
flags += "u"
if obj.flags & re.VERBOSE:
flags += "x"
if isinstance(obj.pattern, text_type):
pattern = obj.pattern
else:
pattern = obj.pattern.decode('utf-8')
if json_options.json_mode == JSONMode.LEGACY:
return SON([("$regex", pattern), ("$options", flags)])
return {'$regularExpression': SON([("pattern", pattern),
("options", flags)])}
if isinstance(obj, MinKey):
return {"$minKey": 1}
if isinstance(obj, MaxKey):
return {"$maxKey": 1}
if isinstance(obj, Timestamp):
return {"$timestamp": SON([("t", obj.time), ("i", obj.inc)])}
if isinstance(obj, Code):
if obj.scope is None:
return {'$code': str(obj)}
return SON([
('$code', str(obj)),
('$scope', _json_convert(obj.scope, json_options))])
if isinstance(obj, Binary):
return _encode_binary(obj, obj.subtype, json_options)
if PY3 and isinstance(obj, bytes):
return _encode_binary(obj, 0, json_options)
if isinstance(obj, uuid.UUID):
if json_options.strict_uuid:
data = obj.bytes
subtype = OLD_UUID_SUBTYPE
if json_options.uuid_representation == CSHARP_LEGACY:
data = obj.bytes_le
elif json_options.uuid_representation == JAVA_LEGACY:
data = data[7::-1] + data[:7:-1]
elif json_options.uuid_representation == UUID_SUBTYPE:
subtype = UUID_SUBTYPE
return _encode_binary(data, subtype, json_options)
else:
return {"$uuid": obj.hex}
if isinstance(obj, Decimal128):
return {"$numberDecimal": str(obj)}
if isinstance(obj, bool):
return obj
if (json_options.json_mode == JSONMode.CANONICAL and
isinstance(obj, integer_types)):
if -2 ** 31 <= obj < 2 ** 31:
return {'$numberInt': text_type(obj)}
return {'$numberLong': text_type(obj)}
if json_options.json_mode != JSONMode.LEGACY and isinstance(obj, float):
if math.isnan(obj):
return {'$numberDouble': 'NaN'}
elif math.isinf(obj):
representation = 'Infinity' if obj > 0 else '-Infinity'
return {'$numberDouble': representation}
elif json_options.json_mode == JSONMode.CANONICAL:
# repr() will return the shortest string guaranteed to produce the
# original value, when float() is called on it. str produces a
# shorter string in Python 2.
return {'$numberDouble': text_type(repr(obj))}
raise TypeError("%r is not JSON serializable" % obj)