From b13a2136fc383c25ac5e08f66c01101427d5375d Mon Sep 17 00:00:00 2001 From: Wil Clouser Date: Wed, 22 Sep 2010 10:16:56 -0700 Subject: [PATCH] Adding mongoengine --- lib/python/gridfs/__init__.py | 236 ++++++ lib/python/gridfs/errors.py | 56 ++ lib/python/gridfs/grid_file.py | 457 +++++++++++ lib/python/mongoengine/__init__.py | 24 + lib/python/mongoengine/base.py | 419 ++++++++++ lib/python/mongoengine/connection.py | 62 ++ lib/python/mongoengine/django/__init__.py | 0 lib/python/mongoengine/django/auth.py | 118 +++ lib/python/mongoengine/django/sessions.py | 63 ++ lib/python/mongoengine/document.py | 157 ++++ lib/python/mongoengine/fields.py | 464 +++++++++++ lib/python/mongoengine/queryset.py | 941 ++++++++++++++++++++++ 12 files changed, 2997 insertions(+) create mode 100644 lib/python/gridfs/__init__.py create mode 100644 lib/python/gridfs/errors.py create mode 100644 lib/python/gridfs/grid_file.py create mode 100644 lib/python/mongoengine/__init__.py create mode 100644 lib/python/mongoengine/base.py create mode 100644 lib/python/mongoengine/connection.py create mode 100644 lib/python/mongoengine/django/__init__.py create mode 100644 lib/python/mongoengine/django/auth.py create mode 100644 lib/python/mongoengine/django/sessions.py create mode 100644 lib/python/mongoengine/document.py create mode 100644 lib/python/mongoengine/fields.py create mode 100644 lib/python/mongoengine/queryset.py diff --git a/lib/python/gridfs/__init__.py b/lib/python/gridfs/__init__.py new file mode 100644 index 0000000..3d8df02 --- /dev/null +++ b/lib/python/gridfs/__init__.py @@ -0,0 +1,236 @@ +# Copyright 2009-2010 10gen, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GridFS is a specification for storing large objects in Mongo. + +The :mod:`gridfs` package is an implementation of GridFS on top of +:mod:`pymongo`, exposing a file-like interface. + +.. mongodoc:: gridfs +""" + +from gridfs.errors import (NoFile, + UnsupportedAPI) +from gridfs.grid_file import (GridIn, + GridOut) +from pymongo import (ASCENDING, + DESCENDING) +from pymongo.database import Database + + +class GridFS(object): + """An instance of GridFS on top of a single Database. + """ + def __init__(self, database, collection="fs"): + """Create a new instance of :class:`GridFS`. + + Raises :class:`TypeError` if `database` is not an instance of + :class:`~pymongo.database.Database`. + + :Parameters: + - `database`: database to use + - `collection` (optional): root collection to use + + .. versionadded:: 1.6 + The `collection` parameter. + + .. mongodoc:: gridfs + """ + if not isinstance(database, Database): + raise TypeError("database must be an instance of Database") + + self.__database = database + self.__collection = database[collection] + self.__files = self.__collection.files + self.__chunks = self.__collection.chunks + self.__chunks.ensure_index([("files_id", ASCENDING), ("n", ASCENDING)], + unique=True) + + def new_file(self, **kwargs): + """Create a new file in GridFS. + + Returns a new :class:`~gridfs.grid_file.GridIn` instance to + which data can be written. Any keyword arguments will be + passed through to :meth:`~gridfs.grid_file.GridIn`. + + If the ``"_id"`` of the file is manually specified, it must + not already exist in GridFS. Otherwise + :class:`~gridfs.errors.FileExists` is raised. + + :Parameters: + - `**kwargs` (optional): keyword arguments for file creation + + .. versionadded:: 1.6 + """ + return GridIn(self.__collection, **kwargs) + + def put(self, data, **kwargs): + """Put data in GridFS as a new file. + + Equivalent to doing: + + >>> f = new_file(**kwargs) + >>> try: + >>> f.write(data) + >>> finally: + >>> f.close() + + `data` can be either an instance of :class:`str` or a + file-like object providing a :meth:`read` method. Any keyword + arguments will be passed through to the created file - see + :meth:`~gridfs.grid_file.GridIn` for possible + arguments. Returns the ``"_id"`` of the created file. + + If the ``"_id"`` of the file is manually specified, it must + not already exist in GridFS. Otherwise + :class:`~gridfs.errors.FileExists` is raised. + + :Parameters: + - `data`: data to be written as a file. + - `**kwargs` (optional): keyword arguments for file creation + + .. versionadded:: 1.6 + """ + grid_file = GridIn(self.__collection, **kwargs) + try: + grid_file.write(data) + finally: + grid_file.close() + return grid_file._id + + def get(self, file_id): + """Get a file from GridFS by ``"_id"``. + + Returns an instance of :class:`~gridfs.grid_file.GridOut`, + which provides a file-like interface for reading. + + :Parameters: + - `file_id`: ``"_id"`` of the file to get + + .. versionadded:: 1.6 + """ + return GridOut(self.__collection, file_id) + + def get_last_version(self, filename): + """Get a file from GridFS by ``"filename"``. + + Returns the most recently uploaded file in GridFS with the + name `filename` as an instance of + :class:`~gridfs.grid_file.GridOut`. Raises + :class:`~gridfs.errors.NoFile` if no such file exists. + + An index on ``{filename: 1, uploadDate: -1}`` will + automatically be created when this method is called the first + time. + + :Parameters: + - `filename`: ``"filename"`` of the file to get + + .. versionadded:: 1.6 + """ + self.__files.ensure_index([("filename", ASCENDING), + ("uploadDate", DESCENDING)]) + + cursor = self.__files.find({"filename": filename}) + cursor.limit(-1).sort("uploadDate", DESCENDING) + try: + grid_file = cursor.next() + return GridOut(self.__collection, grid_file["_id"]) + except StopIteration: + raise NoFile("no file in gridfs with filename %r" % filename) + + # TODO add optional safe mode for chunk removal? + def delete(self, file_id): + """Delete a file from GridFS by ``"_id"``. + + Removes all data belonging to the file with ``"_id"``: + `file_id`. + + .. warning:: Any processes/threads reading from the file while + this method is executing will likely see an invalid/corrupt + file. Care should be taken to avoid concurrent reads to a file + while it is being deleted. + + :Parameters: + - `file_id`: ``"_id"`` of the file to delete + + .. versionadded:: 1.6 + """ + self.__files.remove({"_id": file_id}, safe=True) + self.__chunks.remove({"files_id": file_id}) + + def list(self): + """List the names of all files stored in this instance of + :class:`GridFS`. + + .. versionchanged:: 1.6 + Removed the `collection` argument. + """ + return self.__files.distinct("filename") + + def exists(self, document_or_id=None, **kwargs): + """Check if a file exists in this instance of :class:`GridFS`. + + The file to check for can be specified by the value of it's + ``_id`` key, or by passing in a query document. A query + document can be passed in as dictionary, or by using keyword + arguments. Thus, the following three calls are equivalent: + + >>> fs.exists(file_id) + >>> fs.exists({"_id": file_id}) + >>> fs.exists(_id=file_id) + + As are the following two calls: + + >>> fs.exists({"filename": "mike.txt"}) + >>> fs.exists(filename="mike.txt") + + And the following two: + + >>> fs.exists({"foo": {"$gt": 12}}) + >>> fs.exists(foo={"$gt": 12}) + + Returns ``True`` if a matching file exists, ``False`` + otherwise. Calls to :meth:`exists` will not automatically + create appropriate indexes; application developers should be + sure to create indexes if needed and as appropriate. + + :Parameters: + - `document_or_id` (optional): query document, or _id of the + document to check for + - `**kwargs` (optional): keyword arguments are used as a + query document, if they're present. + + .. versionadded:: 1.8 + """ + if kwargs: + return self.__files.find_one(kwargs) is not None + return self.__files.find_one(document_or_id) is not None + + def open(self, *args, **kwargs): + """No longer supported. + + .. versionchanged:: 1.6 + The open method is no longer supported. + """ + raise UnsupportedAPI("The open method is no longer supported.") + + def remove(self, *args, **kwargs): + """No longer supported. + + .. versionchanged:: 1.6 + The remove method is no longer supported. + """ + raise UnsupportedAPI("The remove method is no longer supported. " + "Please use the delete method instead.") diff --git a/lib/python/gridfs/errors.py b/lib/python/gridfs/errors.py new file mode 100644 index 0000000..8c787ba --- /dev/null +++ b/lib/python/gridfs/errors.py @@ -0,0 +1,56 @@ +# Copyright 2009-2010 10gen, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Exceptions raised by the :mod:`gridfs` package""" + +from pymongo.errors import PyMongoError + + +class GridFSError(PyMongoError): + """Base class for all GridFS exceptions. + + .. versionadded:: 1.5 + """ + + +class CorruptGridFile(GridFSError): + """Raised when a file in :class:`~gridfs.GridFS` is malformed. + """ + + +class NoFile(GridFSError): + """Raised when trying to read from a non-existent file. + + .. versionadded:: 1.6 + """ + + +class FileExists(GridFSError): + """Raised when trying to create a file that already exists. + + .. versionadded:: 1.7 + """ + + +class UnsupportedAPI(GridFSError): + """Raised when trying to use the old GridFS API. + + In version 1.6 of the PyMongo distribution there were backwards + incompatible changes to the GridFS API. Upgrading shouldn't be + difficult, but the old API is no longer supported (with no + deprecation period). This exception will be raised when attempting + to use unsupported constructs from the old API. + + .. versionadded:: 1.6 + """ diff --git a/lib/python/gridfs/grid_file.py b/lib/python/gridfs/grid_file.py new file mode 100644 index 0000000..f2a7649 --- /dev/null +++ b/lib/python/gridfs/grid_file.py @@ -0,0 +1,457 @@ +# Copyright 2009-2010 10gen, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tools for representing files stored in GridFS.""" + +import datetime +import math +import os +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +from gridfs.errors import (CorruptGridFile, + FileExists, + NoFile, + UnsupportedAPI) +from pymongo import ASCENDING +from pymongo.binary import Binary +from pymongo.collection import Collection +from pymongo.errors import DuplicateKeyError +from pymongo.objectid import ObjectId + +try: + _SEEK_SET = os.SEEK_SET + _SEEK_CUR = os.SEEK_CUR + _SEEK_END = os.SEEK_END +# before 2.5 +except AttributeError: + _SEEK_SET = 0 + _SEEK_CUR = 1 + _SEEK_END = 2 + + +"""Default chunk size, in bytes.""" +DEFAULT_CHUNK_SIZE = 256 * 1024 + + +def _create_property(field_name, docstring, + read_only=False, closed_only=False): + """Helper for creating properties to read/write to files. + """ + def getter(self): + if closed_only and not self._closed: + raise AttributeError("can only get %r on a closed file" % + field_name) + return self._file.get(field_name, None) + + def setter(self, value): + if self._closed: + raise AttributeError("cannot set %r on a closed file" % + field_name) + self._file[field_name] = value + + if read_only: + docstring = docstring + "\n\nThis attribute is read-only." + elif not closed_only: + docstring = "%s\n\n%s" % (docstring, "This attribute can only be " + "set before :meth:`close` has been called.") + else: + docstring = "%s\n\n%s" % (docstring, "This attribute is read-only and " + "can only be read after :meth:`close` " + "has been called.") + + if not read_only and not closed_only: + return property(getter, setter, doc=docstring) + return property(getter, doc=docstring) + + +class GridIn(object): + """Class to write data to GridFS. + """ + def __init__(self, root_collection, **kwargs): + """Write a file to GridFS + + Application developers should generally not need to + instantiate this class directly - instead see the methods + provided by :class:`~gridfs.GridFS`. + + Raises :class:`TypeError` if `root_collection` is not an + instance of :class:`~pymongo.collection.Collection`. + + Any of the file level options specified in the `GridFS Spec + `_ may be passed as + keyword arguments. Any additional keyword arguments will be + set as additional fields on the file document. Valid keyword + arguments include: + + - ``"_id"``: unique ID for this file (default: + :class:`~pymongo.objectid.ObjectId`) - this ``"_id"`` must + not have already been used for another file + + - ``"filename"``: human name for the file + + - ``"contentType"`` or ``"content_type"``: valid mime-type + for the file + + - ``"chunkSize"`` or ``"chunk_size"``: size of each of the + chunks, in bytes (default: 256 kb) + + :Parameters: + - `root_collection`: root collection to write to + - `**kwargs` (optional): file level options (see above) + """ + if not isinstance(root_collection, Collection): + raise TypeError("root_collection must be an " + "instance of Collection") + + # Handle alternative naming + if "content_type" in kwargs: + kwargs["contentType"] = kwargs.pop("content_type") + if "chunk_size" in kwargs: + kwargs["chunkSize"] = kwargs.pop("chunk_size") + + # Defaults + kwargs["_id"] = kwargs.get("_id", ObjectId()) + kwargs["chunkSize"] = kwargs.get("chunkSize", DEFAULT_CHUNK_SIZE) + + root_collection.chunks.ensure_index([("files_id", ASCENDING), + ("n", ASCENDING)], + unique=True) + object.__setattr__(self, "_coll", root_collection) + object.__setattr__(self, "_chunks", root_collection.chunks) + object.__setattr__(self, "_file", kwargs) + object.__setattr__(self, "_buffer", StringIO()) + object.__setattr__(self, "_position", 0) + object.__setattr__(self, "_chunk_number", 0) + object.__setattr__(self, "_closed", False) + + @property + def closed(self): + """Is this file closed? + """ + return self._closed + + _id = _create_property("_id", "The ``'_id'`` value for this file.", + read_only=True) + filename = _create_property("filename", "Name of this file.") + content_type = _create_property("contentType", "Mime-type for this file.") + length = _create_property("length", "Length (in bytes) of this file.", + closed_only=True) + chunk_size = _create_property("chunkSize", "Chunk size for this file.", + read_only=True) + upload_date = _create_property("uploadDate", + "Date that this file was uploaded.", + closed_only=True) + md5 = _create_property("md5", "MD5 of the contents of this file " + "(generated on the server).", + closed_only=True) + + def __getattr__(self, name): + if name in self._file: + return self._file[name] + raise AttributeError("GridIn object has no attribute '%s'" % name) + + def __setattr__(self, name, value): + if self._closed: + raise AttributeError("cannot set %r on a closed file" % name) + object.__setattr__(self, name, value) + + def __flush_data(self, data): + """Flush `data` to a chunk. + """ + if not data: + return + assert(len(data) <= self.chunk_size) + + chunk = {"files_id": self._file["_id"], + "n": self._chunk_number, + "data": Binary(data)} + + self._chunks.insert(chunk) + self._chunk_number += 1 + self._position += len(data) + + def __flush_buffer(self): + """Flush the buffer contents out to a chunk. + """ + self.__flush_data(self._buffer.getvalue()) + self._buffer.close() + self._buffer = StringIO() + + def __flush(self): + """Flush the file to the database. + """ + self.__flush_buffer() + + md5 = self._coll.database.command("filemd5", self._id, + root=self._coll.name)["md5"] + + self._file["md5"] = md5 + self._file["length"] = self._position + self._file["uploadDate"] = datetime.datetime.utcnow() + + try: + return self._coll.files.insert(self._file, safe=True) + except DuplicateKeyError: + raise FileExists("file with _id %r already exists" % self._id) + + def close(self): + """Flush the file and close it. + + A closed file cannot be written any more. Calling + :meth:`close` more than once is allowed. + """ + if not self._closed: + self.__flush() + self._closed = True + + # TODO should support writing unicode to a file. this means that files will + # need to have an encoding attribute. + def write(self, data): + """Write data to the file. There is no return value. + + `data` can be either a string of bytes or a file-like object + (implementing :meth:`read`). + + Due to buffering, the data may not actually be written to the + database until the :meth:`close` method is called. Raises + :class:`ValueError` if this file is already closed. Raises + :class:`TypeError` if `data` is not an instance of + :class:`str` or a file-like object. + + :Parameters: + - `data`: string of bytes or file-like object to be written + to the file + """ + if self._closed: + raise ValueError("cannot write to a closed file") + + # file-like + try: + if self._buffer.tell() > 0: + space = self.chunk_size - self._buffer.tell() + self._buffer.write(data.read(space)) + self.__flush_buffer() + to_write = data.read(self.chunk_size) + while to_write and len(to_write) == self.chunk_size: + self.__flush_data(to_write) + to_write = data.read(self.chunk_size) + self._buffer.write(to_write) + # string + except AttributeError: + if not isinstance(data, str): + raise TypeError("can only write strings or file-like objects") + + while data: + space = self.chunk_size - self._buffer.tell() + + if len(data) <= space: + self._buffer.write(data) + break + else: + self._buffer.write(data[:space]) + self.__flush_buffer() + data = data[space:] + + def writelines(self, sequence): + """Write a sequence of strings to the file. + + Does not add seperators. + """ + for line in sequence: + self.write(line) + + def __enter__(self): + """Support for the context manager protocol. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Support for the context manager protocol. + + Close the file and allow exceptions to propogate. + """ + self.close() + + # propogate exceptions + return False + + +class GridOut(object): + """Class to read data out of GridFS. + """ + def __init__(self, root_collection, file_id): + """Read a file from GridFS + + Application developers should generally not need to + instantiate this class directly - instead see the methods + provided by :class:`~gridfs.GridFS`. + + Raises :class:`TypeError` if `root_collection` is not an instance of + :class:`~pymongo.collection.Collection`. + + :Parameters: + - `root_collection`: root collection to read from + - `file_id`: value of ``"_id"`` for the file to read + """ + if not isinstance(root_collection, Collection): + raise TypeError("root_collection must be an " + "instance of Collection") + + self.__chunks = root_collection.chunks + self._file = root_collection.files.find_one({"_id": file_id}) + + if not self._file: + raise NoFile("no file in gridfs collection %r with _id %r" % + (root_collection, file_id)) + + self.__buffer = "" + self.__position = 0 + + _id = _create_property("_id", "The ``'_id'`` value for this file.", True) + name = _create_property("filename", "Name of this file.", True) + content_type = _create_property("contentType", "Mime-type for this file.", + True) + length = _create_property("length", "Length (in bytes) of this file.", + True) + chunk_size = _create_property("chunkSize", "Chunk size for this file.", + True) + upload_date = _create_property("uploadDate", + "Date that this file was first uploaded.", + True) + aliases = _create_property("aliases", "List of aliases for this file.", + True) + metadata = _create_property("metadata", "Metadata attached to this file.", + True) + md5 = _create_property("md5", "MD5 of the contents of this file " + "(generated on the server).", True) + + def __getattr__(self, name): + if name in self._file: + return self._file[name] + raise AttributeError("GridIn object has no attribute '%s'" % name) + + def read(self, size=-1): + """Read at most `size` bytes from the file (less if there + isn't enough data). + + The bytes are returned as an instance of :class:`str`. If + `size` is negative or omitted all data is read. + + :Parameters: + - `size` (optional): the number of bytes to read + """ + if size == 0: + return "" + + remainder = int(self.length) - self.__position + if size < 0 or size > remainder: + size = remainder + + data = self.__buffer + chunk_number = (len(data) + self.__position) / self.chunk_size + + while len(data) < size: + chunk = self.__chunks.find_one({"files_id": self._id, + "n": chunk_number}) + if not chunk: + raise CorruptGridFile("no chunk #%d" % chunk_number) + + if not data: + data += chunk["data"][self.__position % self.chunk_size:] + else: + data += chunk["data"] + + chunk_number += 1 + + self.__position += size + to_return = data[:size] + self.__buffer = data[size:] + return to_return + + def tell(self): + """Return the current position of this file. + """ + return self.__position + + def seek(self, pos, whence=_SEEK_SET): + """Set the current position of this file. + + :Parameters: + - `pos`: the position (or offset if using relative + positioning) to seek to + - `whence` (optional): where to seek + from. :attr:`os.SEEK_SET` (``0``) for absolute file + positioning, :attr:`os.SEEK_CUR` (``1``) to seek relative + to the current position, :attr:`os.SEEK_END` (``2``) to + seek relative to the file's end. + """ + if whence == _SEEK_SET: + new_pos = pos + elif whence == _SEEK_CUR: + new_pos = self.__position + pos + elif whence == _SEEK_END: + new_pos = int(self.length) + pos + else: + raise IOError(22, "Invalid value for `whence`") + + if new_pos < 0: + raise IOError(22, "Invalid value for `pos` - must be positive") + + self.__position = new_pos + self.__buffer = "" + + def __iter__(self): + """Return an iterator over all of this file's data. + + The iterator will return chunk-sized instances of + :class:`str`. This can be useful when serving files using a + webserver that handles such an iterator efficiently. + """ + return GridOutIterator(self, self.__chunks) + + +class GridOutIterator(object): + def __init__(self, grid_out, chunks): + self.__id = grid_out._id + self.__chunks = chunks + self.__current_chunk = 0 + self.__max_chunk = math.ceil(float(grid_out.length) / + grid_out.chunk_size) + + def __iter__(self): + return self + + def next(self): + if self.__current_chunk >= self.__max_chunk: + raise StopIteration + chunk = self.__chunks.find_one({"files_id": self.__id, + "n": self.__current_chunk}) + if not chunk: + raise CorruptGridFile("no chunk #%d" % self.__current_chunk) + self.__current_chunk += 1 + return str(chunk["data"]) + + +class GridFile(object): + """No longer supported. + + .. versionchanged:: 1.6 + The GridFile class is no longer supported. + """ + def __init__(self, *args, **kwargs): + raise UnsupportedAPI("The GridFile class is no longer supported. " + "Please use GridIn or GridOut instead.") diff --git a/lib/python/mongoengine/__init__.py b/lib/python/mongoengine/__init__.py new file mode 100644 index 0000000..e01d31a --- /dev/null +++ b/lib/python/mongoengine/__init__.py @@ -0,0 +1,24 @@ +import document +from document import * +import fields +from fields import * +import connection +from connection import * +import queryset +from queryset import * + +__all__ = (document.__all__ + fields.__all__ + connection.__all__ + + queryset.__all__) + +__author__ = 'Harry Marr' + +VERSION = (0, 3, 0) + +def get_version(): + version = '%s.%s' % (VERSION[0], VERSION[1]) + if VERSION[2]: + version = '%s.%s' % (version, VERSION[2]) + return version + +__version__ = get_version() + diff --git a/lib/python/mongoengine/base.py b/lib/python/mongoengine/base.py new file mode 100644 index 0000000..0a90446 --- /dev/null +++ b/lib/python/mongoengine/base.py @@ -0,0 +1,419 @@ +from queryset import QuerySet, QuerySetManager + +import pymongo + + +_document_registry = {} + +def get_document(name): + return _document_registry[name] + + +class ValidationError(Exception): + pass + + +class BaseField(object): + """A base class for fields in a MongoDB document. Instances of this class + may be added to subclasses of `Document` to define a document's schema. + """ + + # Fields may have _types inserted into indexes by default + _index_with_types = True + + def __init__(self, db_field=None, name=None, required=False, default=None, + unique=False, unique_with=None, primary_key=False): + self.db_field = (db_field or name) if not primary_key else '_id' + if name: + import warnings + msg = "Fields' 'name' attribute deprecated in favour of 'db_field'" + warnings.warn(msg, DeprecationWarning) + self.name = None + self.required = required or primary_key + self.default = default + self.unique = bool(unique or unique_with) + self.unique_with = unique_with + self.primary_key = primary_key + + def __get__(self, instance, owner): + """Descriptor for retrieving a value from a field in a document. Do + any necessary conversion between Python and MongoDB types. + """ + if instance is None: + # Document class being used rather than a document object + return self + + # Get value from document instance if available, if not use default + value = instance._data.get(self.name) + if value is None: + value = self.default + # Allow callable default values + if callable(value): + value = value() + return value + + def __set__(self, instance, value): + """Descriptor for assigning a value to a field in a document. + """ + instance._data[self.name] = value + + def to_python(self, value): + """Convert a MongoDB-compatible type to a Python type. + """ + return value + + def to_mongo(self, value): + """Convert a Python type to a MongoDB-compatible type. + """ + return self.to_python(value) + + def prepare_query_value(self, op, value): + """Prepare a value that is being used in a query for PyMongo. + """ + return value + + def validate(self, value): + """Perform validation on a value. + """ + pass + + +class ObjectIdField(BaseField): + """An field wrapper around MongoDB's ObjectIds. + """ + + def to_python(self, value): + return value + # return unicode(value) + + def to_mongo(self, value): + if not isinstance(value, pymongo.objectid.ObjectId): + try: + return pymongo.objectid.ObjectId(str(value)) + except Exception, e: + #e.message attribute has been deprecated since Python 2.6 + raise ValidationError(str(e)) + return value + + def prepare_query_value(self, op, value): + return self.to_mongo(value) + + def validate(self, value): + try: + pymongo.objectid.ObjectId(str(value)) + except: + raise ValidationError('Invalid Object ID') + + +class DocumentMetaclass(type): + """Metaclass for all documents. + """ + + def __new__(cls, name, bases, attrs): + metaclass = attrs.get('__metaclass__') + super_new = super(DocumentMetaclass, cls).__new__ + if metaclass and issubclass(metaclass, DocumentMetaclass): + return super_new(cls, name, bases, attrs) + + doc_fields = {} + class_name = [name] + superclasses = {} + simple_class = True + for base in bases: + # Include all fields present in superclasses + if hasattr(base, '_fields'): + doc_fields.update(base._fields) + class_name.append(base._class_name) + # Get superclasses from superclass + superclasses[base._class_name] = base + superclasses.update(base._superclasses) + + if hasattr(base, '_meta'): + # Ensure that the Document class may be subclassed - + # inheritance may be disabled to remove dependency on + # additional fields _cls and _types + if base._meta.get('allow_inheritance', True) == False: + raise ValueError('Document %s may not be subclassed' % + base.__name__) + else: + simple_class = False + + meta = attrs.get('_meta', attrs.get('meta', {})) + + if 'allow_inheritance' not in meta: + meta['allow_inheritance'] = True + + # Only simple classes - direct subclasses of Document - may set + # allow_inheritance to False + if not simple_class and not meta['allow_inheritance']: + raise ValueError('Only direct subclasses of Document may set ' + '"allow_inheritance" to False') + attrs['_meta'] = meta + + attrs['_class_name'] = '.'.join(reversed(class_name)) + attrs['_superclasses'] = superclasses + + # Add the document's fields to the _fields attribute + for attr_name, attr_value in attrs.items(): + if hasattr(attr_value, "__class__") and \ + issubclass(attr_value.__class__, BaseField): + attr_value.name = attr_name + if not attr_value.db_field: + attr_value.db_field = attr_name + doc_fields[attr_name] = attr_value + attrs['_fields'] = doc_fields + + new_class = super_new(cls, name, bases, attrs) + for field in new_class._fields.values(): + field.owner_document = new_class + + return new_class + + +class TopLevelDocumentMetaclass(DocumentMetaclass): + """Metaclass for top-level documents (i.e. documents that have their own + collection in the database. + """ + + def __new__(cls, name, bases, attrs): + global _document_registry + + super_new = super(TopLevelDocumentMetaclass, cls).__new__ + # Classes defined in this package are abstract and should not have + # their own metadata with DB collection, etc. + # __metaclass__ is only set on the class with the __metaclass__ + # attribute (i.e. it is not set on subclasses). This differentiates + # 'real' documents from the 'Document' class + if attrs.get('__metaclass__') == TopLevelDocumentMetaclass: + return super_new(cls, name, bases, attrs) + + collection = name.lower() + + id_field = None + base_indexes = [] + + # Subclassed documents inherit collection from superclass + for base in bases: + if hasattr(base, '_meta') and 'collection' in base._meta: + collection = base._meta['collection'] + + id_field = id_field or base._meta.get('id_field') + base_indexes += base._meta.get('indexes', []) + + meta = { + 'collection': collection, + 'max_documents': None, + 'max_size': None, + 'ordering': [], # default ordering applied at runtime + 'indexes': [], # indexes to be ensured at runtime + 'id_field': id_field, + } + + # Apply document-defined meta options + meta.update(attrs.get('meta', {})) + attrs['_meta'] = meta + + # Set up collection manager, needs the class to have fields so use + # DocumentMetaclass before instantiating CollectionManager object + new_class = super_new(cls, name, bases, attrs) + new_class.objects = QuerySetManager() + + user_indexes = [QuerySet._build_index_spec(new_class, spec) + for spec in meta['indexes']] + base_indexes + new_class._meta['indexes'] = user_indexes + + unique_indexes = [] + for field_name, field in new_class._fields.items(): + # Generate a list of indexes needed by uniqueness constraints + if field.unique: + field.required = True + unique_fields = [field_name] + + # Add any unique_with fields to the back of the index spec + if field.unique_with: + if isinstance(field.unique_with, basestring): + field.unique_with = [field.unique_with] + + # Convert unique_with field names to real field names + unique_with = [] + for other_name in field.unique_with: + parts = other_name.split('.') + # Lookup real name + parts = QuerySet._lookup_field(new_class, parts) + name_parts = [part.db_field for part in parts] + unique_with.append('.'.join(name_parts)) + # Unique field should be required + parts[-1].required = True + unique_fields += unique_with + + # Add the new index to the list + index = [(f, pymongo.ASCENDING) for f in unique_fields] + unique_indexes.append(index) + + # Check for custom primary key + if field.primary_key: + if not new_class._meta['id_field']: + new_class._meta['id_field'] = field_name + # Make 'Document.id' an alias to the real primary key field + new_class.id = field + #new_class._fields['id'] = field + else: + raise ValueError('Cannot override primary key field') + + new_class._meta['unique_indexes'] = unique_indexes + + if not new_class._meta['id_field']: + new_class._meta['id_field'] = 'id' + new_class._fields['id'] = ObjectIdField(db_field='_id') + new_class.id = new_class._fields['id'] + + _document_registry[name] = new_class + + return new_class + + +class BaseDocument(object): + + def __init__(self, **values): + self._data = {} + # Assign initial values to instance + for attr_name, attr_value in self._fields.items(): + if attr_name in values: + setattr(self, attr_name, values.pop(attr_name)) + else: + # Use default value if present + value = getattr(self, attr_name, None) + setattr(self, attr_name, value) + + def validate(self): + """Ensure that all fields' values are valid and that required fields + are present. + """ + # Get a list of tuples of field names and their current values + fields = [(field, getattr(self, name)) + for name, field in self._fields.items()] + + # Ensure that each field is matched to a valid value + for field, value in fields: + if value is not None: + try: + field.validate(value) + except (ValueError, AttributeError, AssertionError), e: + raise ValidationError('Invalid value for field of type "' + + field.__class__.__name__ + '"') + elif field.required: + raise ValidationError('Field "%s" is required' % field.name) + + @classmethod + def _get_subclasses(cls): + """Return a dictionary of all subclasses (found recursively). + """ + try: + subclasses = cls.__subclasses__() + except: + subclasses = cls.__subclasses__(cls) + + all_subclasses = {} + for subclass in subclasses: + all_subclasses[subclass._class_name] = subclass + all_subclasses.update(subclass._get_subclasses()) + return all_subclasses + + def __iter__(self): + return iter(self._fields) + + def __getitem__(self, name): + """Dictionary-style field access, return a field's value if present. + """ + try: + if name in self._fields: + return getattr(self, name) + except AttributeError: + pass + raise KeyError(name) + + def __setitem__(self, name, value): + """Dictionary-style field access, set a field's value. + """ + # Ensure that the field exists before settings its value + if name not in self._fields: + raise KeyError(name) + return setattr(self, name, value) + + def __contains__(self, name): + try: + val = getattr(self, name) + return val is not None + except AttributeError: + return False + + def __len__(self): + return len(self._data) + + def __repr__(self): + try: + u = unicode(self) + except (UnicodeEncodeError, UnicodeDecodeError): + u = '[Bad Unicode data]' + return u'<%s: %s>' % (self.__class__.__name__, u) + + def __str__(self): + if hasattr(self, '__unicode__'): + return unicode(self).encode('utf-8') + return '%s object' % self.__class__.__name__ + + def to_mongo(self): + """Return data dictionary ready for use with MongoDB. + """ + data = {} + for field_name, field in self._fields.items(): + value = getattr(self, field_name, None) + if value is not None: + data[field.db_field] = field.to_mongo(value) + # Only add _cls and _types if allow_inheritance is not False + if not (hasattr(self, '_meta') and + self._meta.get('allow_inheritance', True) == False): + data['_cls'] = self._class_name + data['_types'] = self._superclasses.keys() + [self._class_name] + return data + + @classmethod + def _from_son(cls, son): + """Create an instance of a Document (subclass) from a PyMongo SON. + """ + # get the class name from the document, falling back to the given + # class if unavailable + class_name = son.get(u'_cls', cls._class_name) + + data = dict((str(key), value) for key, value in son.items()) + + if '_types' in data: + del data['_types'] + + if '_cls' in data: + del data['_cls'] + + # Return correct subclass for document type + if class_name != cls._class_name: + subclasses = cls._get_subclasses() + if class_name not in subclasses: + # Type of document is probably more generic than the class + # that has been queried to return this SON + return None + cls = subclasses[class_name] + + present_fields = data.keys() + + for field_name, field in cls._fields.items(): + if field.db_field in data: + data[field_name] = field.to_python(data[field.db_field]) + + obj = cls(**data) + obj._present_fields = present_fields + return obj + + def __eq__(self, other): + if isinstance(other, self.__class__) and hasattr(other, 'id'): + if self.id == other.id: + return True + return False diff --git a/lib/python/mongoengine/connection.py b/lib/python/mongoengine/connection.py new file mode 100644 index 0000000..ec3bf78 --- /dev/null +++ b/lib/python/mongoengine/connection.py @@ -0,0 +1,62 @@ +from pymongo import Connection + + +__all__ = ['ConnectionError', 'connect'] + + +_connection_settings = { + 'host': 'localhost', + 'port': 27017, +} +_connection = None + +_db_name = None +_db_username = None +_db_password = None +_db = None + + +class ConnectionError(Exception): + pass + + +def _get_connection(): + global _connection + # Connect to the database if not already connected + if _connection is None: + try: + _connection = Connection(**_connection_settings) + except: + raise ConnectionError('Cannot connect to the database') + return _connection + +def _get_db(): + global _db, _connection + # Connect if not already connected + if _connection is None: + _connection = _get_connection() + + if _db is None: + # _db_name will be None if the user hasn't called connect() + if _db_name is None: + raise ConnectionError('Not connected to the database') + + # Get DB from current connection and authenticate if necessary + _db = _connection[_db_name] + if _db_username and _db_password: + _db.authenticate(_db_username, _db_password) + + return _db + +def connect(db, username=None, password=None, **kwargs): + """Connect to the database specified by the 'db' argument. Connection + settings may be provided here as well if the database is not running on + the default port on localhost. If authentication is needed, provide + username and password arguments as well. + """ + global _connection_settings, _db_name, _db_username, _db_password + _connection_settings.update(kwargs) + _db_name = db + _db_username = username + _db_password = password + return _get_db() \ No newline at end of file diff --git a/lib/python/mongoengine/django/__init__.py b/lib/python/mongoengine/django/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/python/mongoengine/django/auth.py b/lib/python/mongoengine/django/auth.py new file mode 100644 index 0000000..d4b0ff0 --- /dev/null +++ b/lib/python/mongoengine/django/auth.py @@ -0,0 +1,118 @@ +from mongoengine import * + +from django.utils.hashcompat import md5_constructor, sha_constructor +from django.utils.encoding import smart_str +from django.contrib.auth.models import AnonymousUser + +import datetime + +REDIRECT_FIELD_NAME = 'next' + +def get_hexdigest(algorithm, salt, raw_password): + raw_password, salt = smart_str(raw_password), smart_str(salt) + if algorithm == 'md5': + return md5_constructor(salt + raw_password).hexdigest() + elif algorithm == 'sha1': + return sha_constructor(salt + raw_password).hexdigest() + raise ValueError('Got unknown password algorithm type in password') + + +class User(Document): + """A User document that aims to mirror most of the API specified by Django + at http://docs.djangoproject.com/en/dev/topics/auth/#users + """ + username = StringField(max_length=30, required=True) + first_name = StringField(max_length=30) + last_name = StringField(max_length=30) + email = StringField() + password = StringField(max_length=128) + is_staff = BooleanField(default=False) + is_active = BooleanField(default=True) + is_superuser = BooleanField(default=False) + last_login = DateTimeField(default=datetime.datetime.now) + date_joined = DateTimeField(default=datetime.datetime.now) + + def get_full_name(self): + """Returns the users first and last names, separated by a space. + """ + full_name = u'%s %s' % (self.first_name or '', self.last_name or '') + return full_name.strip() + + def is_anonymous(self): + return False + + def is_authenticated(self): + return True + + def set_password(self, raw_password): + """Sets the user's password - always use this rather than directly + assigning to :attr:`~mongoengine.django.auth.User.password` as the + password is hashed before storage. + """ + from random import random + algo = 'sha1' + salt = get_hexdigest(algo, str(random()), str(random()))[:5] + hash = get_hexdigest(algo, salt, raw_password) + self.password = '%s$%s$%s' % (algo, salt, hash) + self.save() + return self + + def check_password(self, raw_password): + """Checks the user's password against a provided password - always use + this rather than directly comparing to + :attr:`~mongoengine.django.auth.User.password` as the password is + hashed before storage. + """ + algo, salt, hash = self.password.split('$') + return hash == get_hexdigest(algo, salt, raw_password) + + @classmethod + def create_user(cls, username, password, email=None): + """Create (and save) a new user with the given username, password and + email address. + """ + now = datetime.datetime.now() + + # Normalize the address by lowercasing the domain part of the email + # address. + # Not sure why we'r allowing null email when its not allowed in django + if email is not None: + try: + email_name, domain_part = email.strip().split('@', 1) + except ValueError: + pass + else: + email = '@'.join([email_name, domain_part.lower()]) + + user = User(username=username, email=email, date_joined=now) + user.set_password(password) + user.save() + return user + + def get_and_delete_messages(self): + return [] + + +class MongoEngineBackend(object): + """Authenticate using MongoEngine and mongoengine.django.auth.User. + """ + + def authenticate(self, username=None, password=None): + user = User.objects(username=username).first() + if user: + if password and user.check_password(password): + return user + return None + + def get_user(self, user_id): + return User.objects.with_id(user_id) + + +def get_user(userid): + """Returns a User object from an id (User.id). Django's equivalent takes + request, but taking an id instead leaves it up to the developer to store + the id in any way they want (session, signed cookie, etc.) + """ + if not userid: + return AnonymousUser() + return MongoEngineBackend().get_user(userid) or AnonymousUser() diff --git a/lib/python/mongoengine/django/sessions.py b/lib/python/mongoengine/django/sessions.py new file mode 100644 index 0000000..7405c85 --- /dev/null +++ b/lib/python/mongoengine/django/sessions.py @@ -0,0 +1,63 @@ +from django.contrib.sessions.backends.base import SessionBase, CreateError +from django.core.exceptions import SuspiciousOperation +from django.utils.encoding import force_unicode + +from mongoengine.document import Document +from mongoengine import fields +from mongoengine.queryset import OperationError + +from datetime import datetime + + +class MongoSession(Document): + session_key = fields.StringField(primary_key=True, max_length=40) + session_data = fields.StringField() + expire_date = fields.DateTimeField() + + meta = {'collection': 'django_session', 'allow_inheritance': False} + + +class SessionStore(SessionBase): + """A MongoEngine-based session store for Django. + """ + + def load(self): + try: + s = MongoSession.objects(session_key=self.session_key, + expire_date__gt=datetime.now())[0] + return self.decode(force_unicode(s.session_data)) + except (IndexError, SuspiciousOperation): + self.create() + return {} + + def exists(self, session_key): + return bool(MongoSession.objects(session_key=session_key).first()) + + def create(self): + while True: + self.session_key = self._get_new_session_key() + try: + self.save(must_create=True) + except CreateError: + continue + self.modified = True + self._session_cache = {} + return + + def save(self, must_create=False): + s = MongoSession(session_key=self.session_key) + s.session_data = self.encode(self._get_session(no_load=must_create)) + s.expire_date = self.get_expiry_date() + try: + s.save(force_insert=must_create, safe=True) + except OperationError: + if must_create: + raise CreateError + raise + + def delete(self, session_key=None): + if session_key is None: + if self.session_key is None: + return + session_key = self.session_key + MongoSession.objects(session_key=session_key).delete() diff --git a/lib/python/mongoengine/document.py b/lib/python/mongoengine/document.py new file mode 100644 index 0000000..e5dec14 --- /dev/null +++ b/lib/python/mongoengine/document.py @@ -0,0 +1,157 @@ +from base import (DocumentMetaclass, TopLevelDocumentMetaclass, BaseDocument, + ValidationError) +from queryset import OperationError +from connection import _get_db + +import pymongo + + +__all__ = ['Document', 'EmbeddedDocument', 'ValidationError', 'OperationError'] + + +class EmbeddedDocument(BaseDocument): + """A :class:`~mongoengine.Document` that isn't stored in its own + collection. :class:`~mongoengine.EmbeddedDocument`\ s should be used as + fields on :class:`~mongoengine.Document`\ s through the + :class:`~mongoengine.EmbeddedDocumentField` field type. + """ + + __metaclass__ = DocumentMetaclass + + +class Document(BaseDocument): + """The base class used for defining the structure and properties of + collections of documents stored in MongoDB. Inherit from this class, and + add fields as class attributes to define a document's structure. + Individual documents may then be created by making instances of the + :class:`~mongoengine.Document` subclass. + + By default, the MongoDB collection used to store documents created using a + :class:`~mongoengine.Document` subclass will be the name of the subclass + converted to lowercase. A different collection may be specified by + providing :attr:`collection` to the :attr:`meta` dictionary in the class + definition. + + A :class:`~mongoengine.Document` subclass may be itself subclassed, to + create a specialised version of the document that will be stored in the + same collection. To facilitate this behaviour, `_cls` and `_types` + fields are added to documents (hidden though the MongoEngine interface + though). To disable this behaviour and remove the dependence on the + presence of `_cls` and `_types`, set :attr:`allow_inheritance` to + ``False`` in the :attr:`meta` dictionary. + + A :class:`~mongoengine.Document` may use a **Capped Collection** by + specifying :attr:`max_documents` and :attr:`max_size` in the :attr:`meta` + dictionary. :attr:`max_documents` is the maximum number of documents that + is allowed to be stored in the collection, and :attr:`max_size` is the + maximum size of the collection in bytes. If :attr:`max_size` is not + specified and :attr:`max_documents` is, :attr:`max_size` defaults to + 10000000 bytes (10MB). + + Indexes may be created by specifying :attr:`indexes` in the :attr:`meta` + dictionary. The value should be a list of field names or tuples of field + names. Index direction may be specified by prefixing the field names with + a **+** or **-** sign. + """ + + __metaclass__ = TopLevelDocumentMetaclass + + def save(self, safe=True, force_insert=False): + """Save the :class:`~mongoengine.Document` to the database. If the + document already exists, it will be updated, otherwise it will be + created. + + If ``safe=True`` and the operation is unsuccessful, an + :class:`~mongoengine.OperationError` will be raised. + + :param safe: check if the operation succeeded before returning + :param force_insert: only try to create a new document, don't allow + updates of existing documents + """ + self.validate() + doc = self.to_mongo() + try: + collection = self.__class__.objects._collection + if force_insert: + object_id = collection.insert(doc, safe=safe) + else: + object_id = collection.save(doc, safe=safe) + except pymongo.errors.OperationFailure, err: + message = 'Could not save document (%s)' + if u'duplicate key' in unicode(err): + message = u'Tried to save duplicate unique keys (%s)' + raise OperationError(message % unicode(err)) + id_field = self._meta['id_field'] + self[id_field] = self._fields[id_field].to_python(object_id) + + def delete(self, safe=False): + """Delete the :class:`~mongoengine.Document` from the database. This + will only take effect if the document has been previously saved. + + :param safe: check if the operation succeeded before returning + """ + id_field = self._meta['id_field'] + object_id = self._fields[id_field].to_mongo(self[id_field]) + try: + self.__class__.objects(**{id_field: object_id}).delete(safe=safe) + except pymongo.errors.OperationFailure, err: + message = u'Could not delete document (%s)' % err.message + raise OperationError(message) + + def reload(self): + """Reloads all attributes from the database. + + .. versionadded:: 0.1.2 + """ + id_field = self._meta['id_field'] + obj = self.__class__.objects(**{id_field: self[id_field]}).first() + for field in self._fields: + setattr(self, field, obj[field]) + + @classmethod + def drop_collection(cls): + """Drops the entire collection associated with this + :class:`~mongoengine.Document` type from the database. + """ + db = _get_db() + db.drop_collection(cls._meta['collection']) + + +class MapReduceDocument(object): + """A document returned from a map/reduce query. + + :param collection: An instance of :class:`~pymongo.Collection` + :param key: Document/result key, often an instance of + :class:`~pymongo.objectid.ObjectId`. If supplied as + an ``ObjectId`` found in the given ``collection``, + the object can be accessed via the ``object`` property. + :param value: The result(s) for this key. + + .. versionadded:: 0.3 + """ + + def __init__(self, document, collection, key, value): + self._document = document + self._collection = collection + self.key = key + self.value = value + + @property + def object(self): + """Lazy-load the object referenced by ``self.key``. ``self.key`` + should be the ``primary_key``. + """ + id_field = self._document()._meta['id_field'] + id_field_type = type(id_field) + + if not isinstance(self.key, id_field_type): + try: + self.key = id_field_type(self.key) + except: + raise Exception("Could not cast key as %s" % \ + id_field_type.__name__) + + if not hasattr(self, "_key_object"): + self._key_object = self._document.objects.with_id(self.key) + return self._key_object + return self._key_object diff --git a/lib/python/mongoengine/fields.py b/lib/python/mongoengine/fields.py new file mode 100644 index 0000000..9a9f4e0 --- /dev/null +++ b/lib/python/mongoengine/fields.py @@ -0,0 +1,464 @@ +from base import BaseField, ObjectIdField, ValidationError, get_document +from document import Document, EmbeddedDocument +from connection import _get_db + +import re +import pymongo +import datetime +import decimal + + +__all__ = ['StringField', 'IntField', 'FloatField', 'BooleanField', + 'DateTimeField', 'EmbeddedDocumentField', 'ListField', 'DictField', + 'ObjectIdField', 'ReferenceField', 'ValidationError', + 'DecimalField', 'URLField', 'GenericReferenceField', + 'BinaryField'] + +RECURSIVE_REFERENCE_CONSTANT = 'self' + + +class StringField(BaseField): + """A unicode string field. + """ + + def __init__(self, regex=None, max_length=None, **kwargs): + self.regex = re.compile(regex) if regex else None + self.max_length = max_length + super(StringField, self).__init__(**kwargs) + + def to_python(self, value): + return unicode(value) + + def validate(self, value): + assert isinstance(value, (str, unicode)) + + if self.max_length is not None and len(value) > self.max_length: + raise ValidationError('String value is too long') + + if self.regex is not None and self.regex.match(value) is None: + message = 'String value did not match validation regex' + raise ValidationError(message) + + def lookup_member(self, member_name): + return None + + def prepare_query_value(self, op, value): + if not isinstance(op, basestring): + return value + + if op.lstrip('i') in ('startswith', 'endswith', 'contains'): + flags = 0 + if op.startswith('i'): + flags = re.IGNORECASE + op = op.lstrip('i') + + regex = r'%s' + if op == 'startswith': + regex = r'^%s' + elif op == 'endswith': + regex = r'%s$' + value = re.compile(regex % value, flags) + return value + + +class URLField(StringField): + """A field that validates input as a URL. + + .. versionadded:: 0.3 + """ + + URL_REGEX = re.compile( + r'^https?://' + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' + r'localhost|' + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' + r'(?::\d+)?' + r'(?:/?|[/?]\S+)$', re.IGNORECASE + ) + + def __init__(self, verify_exists=False, **kwargs): + self.verify_exists = verify_exists + super(URLField, self).__init__(**kwargs) + + def validate(self, value): + if not URLField.URL_REGEX.match(value): + raise ValidationError('Invalid URL: %s' % value) + + if self.verify_exists: + import urllib2 + try: + request = urllib2.Request(value) + response = urllib2.urlopen(request) + except Exception, e: + message = 'This URL appears to be a broken link: %s' % e + raise ValidationError(message) + + +class IntField(BaseField): + """An integer field. + """ + + def __init__(self, min_value=None, max_value=None, **kwargs): + self.min_value, self.max_value = min_value, max_value + super(IntField, self).__init__(**kwargs) + + def to_python(self, value): + return int(value) + + def validate(self, value): + try: + value = int(value) + except: + raise ValidationError('%s could not be converted to int' % value) + + if self.min_value is not None and value < self.min_value: + raise ValidationError('Integer value is too small') + + if self.max_value is not None and value > self.max_value: + raise ValidationError('Integer value is too large') + + +class FloatField(BaseField): + """An floating point number field. + """ + + def __init__(self, min_value=None, max_value=None, **kwargs): + self.min_value, self.max_value = min_value, max_value + super(FloatField, self).__init__(**kwargs) + + def to_python(self, value): + return float(value) + + def validate(self, value): + if isinstance(value, int): + value = float(value) + assert isinstance(value, float) + + if self.min_value is not None and value < self.min_value: + raise ValidationError('Float value is too small') + + if self.max_value is not None and value > self.max_value: + raise ValidationError('Float value is too large') + + +class DecimalField(BaseField): + """A fixed-point decimal number field. + + .. versionadded:: 0.3 + """ + + def __init__(self, min_value=None, max_value=None, **kwargs): + self.min_value, self.max_value = min_value, max_value + super(DecimalField, self).__init__(**kwargs) + + def to_python(self, value): + if not isinstance(value, basestring): + value = unicode(value) + return decimal.Decimal(value) + + def validate(self, value): + if not isinstance(value, decimal.Decimal): + if not isinstance(value, basestring): + value = str(value) + try: + value = decimal.Decimal(value) + except Exception, exc: + raise ValidationError('Could not convert to decimal: %s' % exc) + + if self.min_value is not None and value < self.min_value: + raise ValidationError('Decimal value is too small') + + if self.max_value is not None and value > self.max_value: + raise ValidationError('Decimal value is too large') + + +class BooleanField(BaseField): + """A boolean field type. + + .. versionadded:: 0.1.2 + """ + + def to_python(self, value): + return bool(value) + + def validate(self, value): + assert isinstance(value, bool) + + +class DateTimeField(BaseField): + """A datetime field. + """ + + def validate(self, value): + assert isinstance(value, datetime.datetime) + + +class EmbeddedDocumentField(BaseField): + """An embedded document field. Only valid values are subclasses of + :class:`~mongoengine.EmbeddedDocument`. + """ + + def __init__(self, document, **kwargs): + if not issubclass(document, EmbeddedDocument): + raise ValidationError('Invalid embedded document class provided ' + 'to an EmbeddedDocumentField') + self.document = document + super(EmbeddedDocumentField, self).__init__(**kwargs) + + def to_python(self, value): + if not isinstance(value, self.document): + return self.document._from_son(value) + return value + + def to_mongo(self, value): + return self.document.to_mongo(value) + + def validate(self, value): + """Make sure that the document instance is an instance of the + EmbeddedDocument subclass provided when the document was defined. + """ + # Using isinstance also works for subclasses of self.document + if not isinstance(value, self.document): + raise ValidationError('Invalid embedded document instance ' + 'provided to an EmbeddedDocumentField') + self.document.validate(value) + + def lookup_member(self, member_name): + return self.document._fields.get(member_name) + + def prepare_query_value(self, op, value): + return self.to_mongo(value) + + +class ListField(BaseField): + """A list field that wraps a standard field, allowing multiple instances + of the field to be used as a list in the database. + """ + + # ListFields cannot be indexed with _types - MongoDB doesn't support this + _index_with_types = False + + def __init__(self, field, **kwargs): + if not isinstance(field, BaseField): + raise ValidationError('Argument to ListField constructor must be ' + 'a valid field') + self.field = field + super(ListField, self).__init__(**kwargs) + + def __get__(self, instance, owner): + """Descriptor to automatically dereference references. + """ + if instance is None: + # Document class being used rather than a document object + return self + + if isinstance(self.field, ReferenceField): + referenced_type = self.field.document_type + # Get value from document instance if available + value_list = instance._data.get(self.name) + if value_list: + deref_list = [] + for value in value_list: + # Dereference DBRefs + if isinstance(value, (pymongo.dbref.DBRef)): + value = _get_db().dereference(value) + deref_list.append(referenced_type._from_son(value)) + else: + deref_list.append(value) + instance._data[self.name] = deref_list + + if isinstance(self.field, GenericReferenceField): + value_list = instance._data.get(self.name) + if value_list: + deref_list = [] + for value in value_list: + # Dereference DBRefs + if isinstance(value, (dict, pymongo.son.SON)): + deref_list.append(self.field.dereference(value)) + else: + deref_list.append(value) + instance._data[self.name] = deref_list + + return super(ListField, self).__get__(instance, owner) + + def to_python(self, value): + return [self.field.to_python(item) for item in value] + + def to_mongo(self, value): + return [self.field.to_mongo(item) for item in value] + + def validate(self, value): + """Make sure that a list of valid fields is being used. + """ + if not isinstance(value, (list, tuple)): + raise ValidationError('Only lists and tuples may be used in a ' + 'list field') + + try: + [self.field.validate(item) for item in value] + except Exception, err: + raise ValidationError('Invalid ListField item (%s)' % str(err)) + + def prepare_query_value(self, op, value): + if op in ('set', 'unset'): + return [self.field.to_mongo(v) for v in value] + return self.field.to_mongo(value) + + def lookup_member(self, member_name): + return self.field.lookup_member(member_name) + + +class DictField(BaseField): + """A dictionary field that wraps a standard Python dictionary. This is + similar to an embedded document, but the structure is not defined. + + .. versionadded:: 0.3 + """ + + def validate(self, value): + """Make sure that a list of valid fields is being used. + """ + if not isinstance(value, dict): + raise ValidationError('Only dictionaries may be used in a ' + 'DictField') + + if any(('.' in k or '$' in k) for k in value): + raise ValidationError('Invalid dictionary key name - keys may not ' + 'contain "." or "$" characters') + + def lookup_member(self, member_name): + return BaseField(db_field=member_name) + + +class ReferenceField(BaseField): + """A reference to a document that will be automatically dereferenced on + access (lazily). + """ + + def __init__(self, document_type, **kwargs): + if not isinstance(document_type, basestring): + if not issubclass(document_type, (Document, basestring)): + raise ValidationError('Argument to ReferenceField constructor ' + 'must be a document class or a string') + self.document_type_obj = document_type + self.document_obj = None + super(ReferenceField, self).__init__(**kwargs) + + @property + def document_type(self): + if isinstance(self.document_type_obj, basestring): + if self.document_type_obj == RECURSIVE_REFERENCE_CONSTANT: + self.document_type_obj = self.owner_document + else: + self.document_type_obj = get_document(self.document_type_obj) + return self.document_type_obj + + def __get__(self, instance, owner): + """Descriptor to allow lazy dereferencing. + """ + if instance is None: + # Document class being used rather than a document object + return self + + # Get value from document instance if available + value = instance._data.get(self.name) + # Dereference DBRefs + if isinstance(value, (pymongo.dbref.DBRef)): + value = _get_db().dereference(value) + if value is not None: + instance._data[self.name] = self.document_type._from_son(value) + + return super(ReferenceField, self).__get__(instance, owner) + + def to_mongo(self, document): + id_field_name = self.document_type._meta['id_field'] + id_field = self.document_type._fields[id_field_name] + + if isinstance(document, Document): + # We need the id from the saved object to create the DBRef + id_ = document.id + if id_ is None: + raise ValidationError('You can only reference documents once ' + 'they have been saved to the database') + else: + id_ = document + + id_ = id_field.to_mongo(id_) + collection = self.document_type._meta['collection'] + return pymongo.dbref.DBRef(collection, id_) + + def prepare_query_value(self, op, value): + return self.to_mongo(value) + + def validate(self, value): + assert isinstance(value, (self.document_type, pymongo.dbref.DBRef)) + + def lookup_member(self, member_name): + return self.document_type._fields.get(member_name) + + +class GenericReferenceField(BaseField): + """A reference to *any* :class:`~mongoengine.document.Document` subclass + that will be automatically dereferenced on access (lazily). + + .. versionadded:: 0.3 + """ + + def __get__(self, instance, owner): + if instance is None: + return self + + value = instance._data.get(self.name) + if isinstance(value, (dict, pymongo.son.SON)): + instance._data[self.name] = self.dereference(value) + + return super(GenericReferenceField, self).__get__(instance, owner) + + def dereference(self, value): + doc_cls = get_document(value['_cls']) + reference = value['_ref'] + doc = _get_db().dereference(reference) + if doc is not None: + doc = doc_cls._from_son(doc) + return doc + + def to_mongo(self, document): + id_field_name = document.__class__._meta['id_field'] + id_field = document.__class__._fields[id_field_name] + + if isinstance(document, Document): + # We need the id from the saved object to create the DBRef + id_ = document.id + if id_ is None: + raise ValidationError('You can only reference documents once ' + 'they have been saved to the database') + else: + id_ = document + + id_ = id_field.to_mongo(id_) + collection = document._meta['collection'] + ref = pymongo.dbref.DBRef(collection, id_) + return {'_cls': document.__class__.__name__, '_ref': ref} + + def prepare_query_value(self, op, value): + return self.to_mongo(value)['_ref'] + +class BinaryField(BaseField): + """A binary data field. + """ + + def __init__(self, max_bytes=None, **kwargs): + self.max_bytes = max_bytes + super(BinaryField, self).__init__(**kwargs) + + def to_mongo(self, value): + return pymongo.binary.Binary(value) + + def to_python(self, value): + return str(value) + + def validate(self, value): + assert isinstance(value, str) + + if self.max_bytes is not None and len(value) > self.max_bytes: + raise ValidationError('Binary value is too long') diff --git a/lib/python/mongoengine/queryset.py b/lib/python/mongoengine/queryset.py new file mode 100644 index 0000000..11dc2bc --- /dev/null +++ b/lib/python/mongoengine/queryset.py @@ -0,0 +1,941 @@ +from connection import _get_db + +import pymongo +import re +import copy + + +__all__ = ['queryset_manager', 'Q', 'InvalidQueryError', + 'InvalidCollectionError'] + +# The maximum number of items to display in a QuerySet.__repr__ +REPR_OUTPUT_SIZE = 20 + + +class DoesNotExist(Exception): + pass + + +class MultipleObjectsReturned(Exception): + pass + + +class InvalidQueryError(Exception): + pass + + +class OperationError(Exception): + pass + + +RE_TYPE = type(re.compile('')) + + +class Q(object): + + OR = '||' + AND = '&&' + OPERATORS = { + 'eq': 'this.%(field)s == %(value)s', + 'ne': 'this.%(field)s != %(value)s', + 'gt': 'this.%(field)s > %(value)s', + 'gte': 'this.%(field)s >= %(value)s', + 'lt': 'this.%(field)s < %(value)s', + 'lte': 'this.%(field)s <= %(value)s', + 'lte': 'this.%(field)s <= %(value)s', + 'in': '%(value)s.indexOf(this.%(field)s) != -1', + 'nin': '%(value)s.indexOf(this.%(field)s) == -1', + 'mod': '%(field)s %% %(value)s', + 'all': ('%(value)s.every(function(a){' + 'return this.%(field)s.indexOf(a) != -1 })'), + 'size': 'this.%(field)s.length == %(value)s', + 'exists': 'this.%(field)s != null', + 'regex_eq': '%(value)s.test(this.%(field)s)', + 'regex_ne': '!%(value)s.test(this.%(field)s)', + } + + def __init__(self, **query): + self.query = [query] + + def _combine(self, other, op): + obj = Q() + obj.query = ['('] + copy.deepcopy(self.query) + [op] + obj.query += copy.deepcopy(other.query) + [')'] + return obj + + def __or__(self, other): + return self._combine(other, self.OR) + + def __and__(self, other): + return self._combine(other, self.AND) + + def as_js(self, document): + js = [] + js_scope = {} + for i, item in enumerate(self.query): + if isinstance(item, dict): + item_query = QuerySet._transform_query(document, **item) + # item_query will values will either be a value or a dict + js.append(self._item_query_as_js(item_query, js_scope, i)) + else: + js.append(item) + return pymongo.code.Code(' '.join(js), js_scope) + + def _item_query_as_js(self, item_query, js_scope, item_num): + # item_query will be in one of the following forms + # {'age': 25, 'name': 'Test'} + # {'age': {'$lt': 25}, 'name': {'$in': ['Test', 'Example']} + # {'age': {'$lt': 25, '$gt': 18}} + js = [] + for i, (key, value) in enumerate(item_query.items()): + op = 'eq' + # Construct a variable name for the value in the JS + value_name = 'i%sf%s' % (item_num, i) + if isinstance(value, dict): + # Multiple operators for this field + for j, (op, value) in enumerate(value.items()): + # Create a custom variable name for this operator + op_value_name = '%so%s' % (value_name, j) + # Construct the JS that uses this op + value, operation_js = self._build_op_js(op, key, value, + op_value_name) + # Update the js scope with the value for this op + js_scope[op_value_name] = value + js.append(operation_js) + else: + # Construct the JS for this field + value, field_js = self._build_op_js(op, key, value, value_name) + js_scope[value_name] = value + js.append(field_js) + return ' && '.join(js) + + def _build_op_js(self, op, key, value, value_name): + """Substitute the values in to the correct chunk of Javascript. + """ + if isinstance(value, RE_TYPE): + # Regexes are handled specially + if op.strip('$') == 'ne': + op_js = Q.OPERATORS['regex_ne'] + else: + op_js = Q.OPERATORS['regex_eq'] + else: + op_js = Q.OPERATORS[op.strip('$')] + + # Comparing two ObjectIds in Javascript doesn't work.. + if isinstance(value, pymongo.objectid.ObjectId): + value = str(value) + + # Perform the substitution + operation_js = op_js % { + 'field': key, + 'value': value_name + } + return value, operation_js + +class QuerySet(object): + """A set of results returned from a query. Wraps a MongoDB cursor, + providing :class:`~mongoengine.Document` objects as the results. + """ + + def __init__(self, document, collection): + self._document = document + self._collection_obj = collection + self._accessed_collection = False + self._query = {} + self._where_clause = None + self._loaded_fields = [] + self._ordering = [] + + # If inheritance is allowed, only return instances and instances of + # subclasses of the class being used + if document._meta.get('allow_inheritance'): + self._query = {'_types': self._document._class_name} + self._cursor_obj = None + self._limit = None + self._skip = None + + def ensure_index(self, key_or_list): + """Ensure that the given indexes are in place. + + :param key_or_list: a single index key or a list of index keys (to + construct a multi-field index); keys may be prefixed with a **+** + or a **-** to determine the index ordering + """ + index_list = QuerySet._build_index_spec(self._document, key_or_list) + self._collection.ensure_index(index_list) + return self + + @classmethod + def _build_index_spec(cls, doc_cls, key_or_list): + """Build a PyMongo index spec from a MongoEngine index spec. + """ + if isinstance(key_or_list, basestring): + key_or_list = [key_or_list] + + index_list = [] + use_types = doc_cls._meta.get('allow_inheritance', True) + for key in key_or_list: + # Get direction from + or - + direction = pymongo.ASCENDING + if key.startswith("-"): + direction = pymongo.DESCENDING + if key.startswith(("+", "-")): + key = key[1:] + + # Use real field name, do it manually because we need field + # objects for the next part (list field checking) + parts = key.split('.') + fields = QuerySet._lookup_field(doc_cls, parts) + parts = [field.db_field for field in fields] + key = '.'.join(parts) + index_list.append((key, direction)) + + # Check if a list field is being used, don't use _types if it is + if use_types and not all(f._index_with_types for f in fields): + use_types = False + + # If _types is being used, prepend it to every specified index + if doc_cls._meta.get('allow_inheritance') and use_types: + index_list.insert(0, ('_types', 1)) + + return index_list + + def __call__(self, q_obj=None, **query): + """Filter the selected documents by calling the + :class:`~mongoengine.queryset.QuerySet` with a query. + + :param q_obj: a :class:`~mongoengine.queryset.Q` object to be used in + the query; the :class:`~mongoengine.queryset.QuerySet` is filtered + multiple times with different :class:`~mongoengine.queryset.Q` + objects, only the last one will be used + :param query: Django-style query keyword arguments + """ + if q_obj: + self._where_clause = q_obj.as_js(self._document) + query = QuerySet._transform_query(_doc_cls=self._document, **query) + self._query.update(query) + return self + + def filter(self, *q_objs, **query): + """An alias of :meth:`~mongoengine.queryset.QuerySet.__call__` + """ + return self.__call__(*q_objs, **query) + + @property + def _collection(self): + """Property that returns the collection object. This allows us to + perform operations only if the collection is accessed. + """ + if not self._accessed_collection: + self._accessed_collection = True + + # Ensure document-defined indexes are created + if self._document._meta['indexes']: + for key_or_list in self._document._meta['indexes']: + #self.ensure_index(key_or_list) + self._collection.ensure_index(key_or_list) + + # Ensure indexes created by uniqueness constraints + for index in self._document._meta['unique_indexes']: + self._collection.ensure_index(index, unique=True) + + # If _types is being used (for polymorphism), it needs an index + if '_types' in self._query: + self._collection.ensure_index('_types') + return self._collection_obj + + @property + def _cursor(self): + if self._cursor_obj is None: + cursor_args = {} + if self._loaded_fields: + cursor_args = {'fields': self._loaded_fields} + self._cursor_obj = self._collection.find(self._query, + **cursor_args) + # Apply where clauses to cursor + if self._where_clause: + self._cursor_obj.where(self._where_clause) + + # apply default ordering + if self._document._meta['ordering']: + self.order_by(*self._document._meta['ordering']) + + return self._cursor_obj + + @classmethod + def _lookup_field(cls, document, parts): + """Lookup a field based on its attribute and return a list containing + the field's parents and the field. + """ + if not isinstance(parts, (list, tuple)): + parts = [parts] + fields = [] + field = None + for field_name in parts: + if field is None: + # Look up first field from the document + field = document._fields[field_name] + else: + # Look up subfield on the previous field + field = field.lookup_member(field_name) + if field is None: + raise InvalidQueryError('Cannot resolve field "%s"' + % field_name) + fields.append(field) + return fields + + @classmethod + def _translate_field_name(cls, doc_cls, field, sep='.'): + """Translate a field attribute name to a database field name. + """ + parts = field.split(sep) + parts = [f.db_field for f in QuerySet._lookup_field(doc_cls, parts)] + return '.'.join(parts) + + @classmethod + def _transform_query(cls, _doc_cls=None, **query): + """Transform a query from Django-style format to Mongo format. + """ + operators = ['ne', 'gt', 'gte', 'lt', 'lte', 'in', 'nin', 'mod', + 'all', 'size', 'exists'] + match_operators = ['contains', 'icontains', 'startswith', + 'istartswith', 'endswith', 'iendswith'] + + mongo_query = {} + for key, value in query.items(): + parts = key.split('__') + # Check for an operator and transform to mongo-style if there is + op = None + if parts[-1] in operators + match_operators: + op = parts.pop() + + if _doc_cls: + # Switch field names to proper names [set in Field(name='foo')] + fields = QuerySet._lookup_field(_doc_cls, parts) + parts = [field.db_field for field in fields] + + # Convert value to proper value + field = fields[-1] + singular_ops = [None, 'ne', 'gt', 'gte', 'lt', 'lte'] + singular_ops += match_operators + if op in singular_ops: + value = field.prepare_query_value(op, value) + elif op in ('in', 'nin', 'all'): + # 'in', 'nin' and 'all' require a list of values + value = [field.prepare_query_value(op, v) for v in value] + + if field.__class__.__name__ == 'GenericReferenceField': + parts.append('_ref') + + if op and op not in match_operators: + value = {'$' + op: value} + + key = '.'.join(parts) + if op is None or key not in mongo_query: + mongo_query[key] = value + elif key in mongo_query and isinstance(mongo_query[key], dict): + mongo_query[key].update(value) + + return mongo_query + + def get(self, *q_objs, **query): + """Retrieve the the matching object raising + :class:`~mongoengine.queryset.MultipleObjectsReturned` or + :class:`~mongoengine.queryset.DoesNotExist` exceptions if multiple or + no results are found. + + .. versionadded:: 0.3 + """ + self.__call__(*q_objs, **query) + count = self.count() + if count == 1: + return self[0] + elif count > 1: + message = u'%d items returned, instead of 1' % count + raise MultipleObjectsReturned(message) + else: + raise DoesNotExist('Document not found') + + def get_or_create(self, *q_objs, **query): + """Retreive unique object or create, if it doesn't exist. Raises + :class:`~mongoengine.queryset.MultipleObjectsReturned` if multiple + results are found. A new document will be created if the document + doesn't exists; a dictionary of default values for the new document + may be provided as a keyword argument called :attr:`defaults`. + + .. versionadded:: 0.3 + """ + defaults = query.get('defaults', {}) + if 'defaults' in query: + del query['defaults'] + + self.__call__(*q_objs, **query) + count = self.count() + if count == 0: + query.update(defaults) + doc = self._document(**query) + doc.save() + return doc + elif count == 1: + return self.first() + else: + message = u'%d items returned, instead of 1' % count + raise MultipleObjectsReturned(message) + + def first(self): + """Retrieve the first object matching the query. + """ + try: + result = self[0] + except IndexError: + result = None + return result + + def with_id(self, object_id): + """Retrieve the object matching the id provided. + + :param object_id: the value for the id of the document to look up + """ + id_field = self._document._meta['id_field'] + object_id = self._document._fields[id_field].to_mongo(object_id) + + result = self._collection.find_one({'_id': object_id}) + if result is not None: + result = self._document._from_son(result) + return result + + def in_bulk(self, object_ids): + """Retrieve a set of documents by their ids. + + :param object_ids: a list or tuple of ``ObjectId``\ s + :rtype: dict of ObjectIds as keys and collection-specific + Document subclasses as values. + + .. versionadded:: 0.3 + """ + doc_map = {} + + docs = self._collection.find({'_id': {'$in': object_ids}}) + for doc in docs: + doc_map[doc['_id']] = self._document._from_son(doc) + + return doc_map + + def next(self): + """Wrap the result in a :class:`~mongoengine.Document` object. + """ + try: + if self._limit == 0: + raise StopIteration + return self._document._from_son(self._cursor.next()) + except StopIteration, e: + self.rewind() + raise e + + def rewind(self): + """Rewind the cursor to its unevaluated state. + + .. versionadded:: 0.3 + """ + self._cursor.rewind() + + def count(self): + """Count the selected elements in the query. + """ + if self._limit == 0: + return 0 + return self._cursor.count(with_limit_and_skip=True) + + def __len__(self): + return self.count() + + def map_reduce(self, map_f, reduce_f, finalize_f=None, limit=None, + scope=None, keep_temp=False): + """Perform a map/reduce query using the current query spec + and ordering. While ``map_reduce`` respects ``QuerySet`` chaining, + it must be the last call made, as it does not return a maleable + ``QuerySet``. + + See the :meth:`~mongoengine.tests.QuerySetTest.test_map_reduce` + and :meth:`~mongoengine.tests.QuerySetTest.test_map_advanced` + tests in ``tests.queryset.QuerySetTest`` for usage examples. + + :param map_f: map function, as :class:`~pymongo.code.Code` or string + :param reduce_f: reduce function, as + :class:`~pymongo.code.Code` or string + :param finalize_f: finalize function, an optional function that + performs any post-reduction processing. + :param scope: values to insert into map/reduce global scope. Optional. + :param limit: number of objects from current query to provide + to map/reduce method + :param keep_temp: keep temporary table (boolean, default ``True``) + + Returns an iterator yielding + :class:`~mongoengine.document.MapReduceDocument`. + + .. note:: Map/Reduce requires server version **>= 1.1.1**. The PyMongo + :meth:`~pymongo.collection.Collection.map_reduce` helper requires + PyMongo version **>= 1.2**. + + .. versionadded:: 0.3 + """ + from document import MapReduceDocument + + if not hasattr(self._collection, "map_reduce"): + raise NotImplementedError("Requires MongoDB >= 1.1.1") + + map_f_scope = {} + if isinstance(map_f, pymongo.code.Code): + map_f_scope = map_f.scope + map_f = str(map_f) + map_f = pymongo.code.Code(self._sub_js_fields(map_f), map_f_scope) + + reduce_f_scope = {} + if isinstance(reduce_f, pymongo.code.Code): + reduce_f_scope = reduce_f.scope + reduce_f = str(reduce_f) + reduce_f_code = self._sub_js_fields(reduce_f) + reduce_f = pymongo.code.Code(reduce_f_code, reduce_f_scope) + + mr_args = {'query': self._query, 'keeptemp': keep_temp} + + if finalize_f: + finalize_f_scope = {} + if isinstance(finalize_f, pymongo.code.Code): + finalize_f_scope = finalize_f.scope + finalize_f = str(finalize_f) + finalize_f_code = self._sub_js_fields(finalize_f) + finalize_f = pymongo.code.Code(finalize_f_code, finalize_f_scope) + mr_args['finalize'] = finalize_f + + if scope: + mr_args['scope'] = scope + + if limit: + mr_args['limit'] = limit + + results = self._collection.map_reduce(map_f, reduce_f, **mr_args) + results = results.find() + + if self._ordering: + results = results.sort(self._ordering) + + for doc in results: + yield MapReduceDocument(self._document, self._collection, + doc['_id'], doc['value']) + + def limit(self, n): + """Limit the number of returned documents to `n`. This may also be + achieved using array-slicing syntax (e.g. ``User.objects[:5]``). + + :param n: the maximum number of objects to return + """ + if n == 0: + self._cursor.limit(1) + else: + self._cursor.limit(n) + self._limit = n + + # Return self to allow chaining + return self + + def skip(self, n): + """Skip `n` documents before returning the results. This may also be + achieved using array-slicing syntax (e.g. ``User.objects[5:]``). + + :param n: the number of objects to skip before returning results + """ + self._cursor.skip(n) + self._skip = n + return self + + def __getitem__(self, key): + """Support skip and limit using getitem and slicing syntax. + """ + # Slice provided + if isinstance(key, slice): + try: + self._cursor_obj = self._cursor[key] + self._skip, self._limit = key.start, key.stop + except IndexError, err: + # PyMongo raises an error if key.start == key.stop, catch it, + # bin it, kill it. + start = key.start or 0 + if start >= 0 and key.stop >= 0 and key.step is None: + if start == key.stop: + self.limit(0) + self._skip, self._limit = key.start, key.stop - start + return self + raise err + # Allow further QuerySet modifications to be performed + return self + # Integer index provided + elif isinstance(key, int): + return self._document._from_son(self._cursor[key]) + + def only(self, *fields): + """Load only a subset of this document's fields. :: + + post = BlogPost.objects(...).only("title") + + :param fields: fields to include + + .. versionadded:: 0.3 + """ + self._loaded_fields = [] + for field in fields: + if '.' in field: + raise InvalidQueryError('Subfields cannot be used as ' + 'arguments to QuerySet.only') + # Translate field name + field = QuerySet._lookup_field(self._document, field)[-1].db_field + self._loaded_fields.append(field) + + # _cls is needed for polymorphism + if self._document._meta.get('allow_inheritance'): + self._loaded_fields += ['_cls'] + return self + + def order_by(self, *keys): + """Order the :class:`~mongoengine.queryset.QuerySet` by the keys. The + order may be specified by prepending each of the keys by a + or a -. + Ascending order is assumed. + + :param keys: fields to order the query results by; keys may be + prefixed with **+** or **-** to determine the ordering direction + """ + key_list = [] + for key in keys: + direction = pymongo.ASCENDING + if key[0] == '-': + direction = pymongo.DESCENDING + if key[0] in ('-', '+'): + key = key[1:] + key_list.append((key, direction)) + + self._ordering = key_list + self._cursor.sort(key_list) + return self + + def explain(self, format=False): + """Return an explain plan record for the + :class:`~mongoengine.queryset.QuerySet`\ 's cursor. + + :param format: format the plan before returning it + """ + + plan = self._cursor.explain() + if format: + import pprint + plan = pprint.pformat(plan) + return plan + + def delete(self, safe=False): + """Delete the documents matched by the query. + + :param safe: check if the operation succeeded before returning + """ + self._collection.remove(self._query, safe=safe) + + @classmethod + def _transform_update(cls, _doc_cls=None, **update): + """Transform an update spec from Django-style format to Mongo format. + """ + operators = ['set', 'unset', 'inc', 'dec', 'push', 'push_all', 'pull', + 'pull_all'] + + mongo_update = {} + for key, value in update.items(): + parts = key.split('__') + # Check for an operator and transform to mongo-style if there is + op = None + if parts[0] in operators: + op = parts.pop(0) + # Convert Pythonic names to Mongo equivalents + if op in ('push_all', 'pull_all'): + op = op.replace('_all', 'All') + elif op == 'dec': + # Support decrement by flipping a positive value's sign + # and using 'inc' + op = 'inc' + if value > 0: + value = -value + + if _doc_cls: + # Switch field names to proper names [set in Field(name='foo')] + fields = QuerySet._lookup_field(_doc_cls, parts) + parts = [field.db_field for field in fields] + + # Convert value to proper value + field = fields[-1] + if op in (None, 'set', 'unset', 'push', 'pull'): + value = field.prepare_query_value(op, value) + elif op in ('pushAll', 'pullAll'): + value = [field.prepare_query_value(op, v) for v in value] + + key = '.'.join(parts) + + if op: + value = {key: value} + key = '$' + op + + if op is None or key not in mongo_update: + mongo_update[key] = value + elif key in mongo_update and isinstance(mongo_update[key], dict): + mongo_update[key].update(value) + + return mongo_update + + def update(self, safe_update=True, upsert=False, **update): + """Perform an atomic update on the fields matched by the query. + + :param safe: check if the operation succeeded before returning + :param update: Django-style update keyword arguments + + .. versionadded:: 0.2 + """ + if pymongo.version < '1.1.1': + raise OperationError('update() method requires PyMongo 1.1.1+') + + update = QuerySet._transform_update(self._document, **update) + try: + self._collection.update(self._query, update, safe=safe_update, + upsert=upsert, multi=True) + except pymongo.errors.OperationFailure, err: + if unicode(err) == u'multi not coded yet': + message = u'update() method requires MongoDB 1.1.3+' + raise OperationError(message) + raise OperationError(u'Update failed (%s)' % unicode(err)) + + def update_one(self, safe_update=True, upsert=False, **update): + """Perform an atomic update on first field matched by the query. + + :param safe: check if the operation succeeded before returning + :param update: Django-style update keyword arguments + + .. versionadded:: 0.2 + """ + update = QuerySet._transform_update(self._document, **update) + try: + # Explicitly provide 'multi=False' to newer versions of PyMongo + # as the default may change to 'True' + if pymongo.version >= '1.1.1': + self._collection.update(self._query, update, safe=safe_update, + upsert=upsert, multi=False) + else: + # Older versions of PyMongo don't support 'multi' + self._collection.update(self._query, update, safe=safe_update) + except pymongo.errors.OperationFailure, e: + raise OperationError('Update failed [%s]' % str(e)) + + def __iter__(self): + return self + + def _sub_js_fields(self, code): + """When fields are specified with [~fieldname] syntax, where + *fieldname* is the Python name of a field, *fieldname* will be + substituted for the MongoDB name of the field (specified using the + :attr:`name` keyword argument in a field's constructor). + """ + def field_sub(match): + # Extract just the field name, and look up the field objects + field_name = match.group(1).split('.') + fields = QuerySet._lookup_field(self._document, field_name) + # Substitute the correct name for the field into the javascript + return '["%s"]' % fields[-1].db_field + + return re.sub('\[\s*~([A-z_][A-z_0-9.]+?)\s*\]', field_sub, code) + + def exec_js(self, code, *fields, **options): + """Execute a Javascript function on the server. A list of fields may be + provided, which will be translated to their correct names and supplied + as the arguments to the function. A few extra variables are added to + the function's scope: ``collection``, which is the name of the + collection in use; ``query``, which is an object representing the + current query; and ``options``, which is an object containing any + options specified as keyword arguments. + + As fields in MongoEngine may use different names in the database (set + using the :attr:`db_field` keyword argument to a :class:`Field` + constructor), a mechanism exists for replacing MongoEngine field names + with the database field names in Javascript code. When accessing a + field, use square-bracket notation, and prefix the MongoEngine field + name with a tilde (~). + + :param code: a string of Javascript code to execute + :param fields: fields that you will be using in your function, which + will be passed in to your function as arguments + :param options: options that you want available to the function + (accessed in Javascript through the ``options`` object) + """ + code = self._sub_js_fields(code) + + fields = [QuerySet._translate_field_name(self._document, f) + for f in fields] + collection = self._document._meta['collection'] + + scope = { + 'collection': collection, + 'options': options or {}, + } + + query = self._query + if self._where_clause: + query['$where'] = self._where_clause + + scope['query'] = query + code = pymongo.code.Code(code, scope=scope) + + db = _get_db() + return db.eval(code, *fields) + + def sum(self, field): + """Sum over the values of the specified field. + + :param field: the field to sum over; use dot-notation to refer to + embedded document fields + """ + sum_func = """ + function(sumField) { + var total = 0.0; + db[collection].find(query).forEach(function(doc) { + total += (doc[sumField] || 0.0); + }); + return total; + } + """ + return self.exec_js(sum_func, field) + + def average(self, field): + """Average over the values of the specified field. + + :param field: the field to average over; use dot-notation to refer to + embedded document fields + """ + average_func = """ + function(averageField) { + var total = 0.0; + var num = 0; + db[collection].find(query).forEach(function(doc) { + if (doc[averageField]) { + total += doc[averageField]; + num += 1; + } + }); + return total / num; + } + """ + return self.exec_js(average_func, field) + + def item_frequencies(self, list_field, normalize=False): + """Returns a dictionary of all items present in a list field across + the whole queried set of documents, and their corresponding frequency. + This is useful for generating tag clouds, or searching documents. + + :param list_field: the list field to use + :param normalize: normalize the results so they add to 1.0 + """ + freq_func = """ + function(listField) { + if (options.normalize) { + var total = 0.0; + db[collection].find(query).forEach(function(doc) { + total += doc[listField].length; + }); + } + + var frequencies = {}; + var inc = 1.0; + if (options.normalize) { + inc /= total; + } + db[collection].find(query).forEach(function(doc) { + doc[listField].forEach(function(item) { + frequencies[item] = inc + (frequencies[item] || 0); + }); + }); + return frequencies; + } + """ + return self.exec_js(freq_func, list_field, normalize=normalize) + + def __repr__(self): + limit = REPR_OUTPUT_SIZE + 1 + if self._limit is not None and self._limit < limit: + limit = self._limit + data = list(self[self._skip:limit]) + if len(data) > REPR_OUTPUT_SIZE: + data[-1] = "...(remaining elements truncated)..." + return repr(data) + + +class InvalidCollectionError(Exception): + pass + + +class QuerySetManager(object): + + def __init__(self, manager_func=None): + self._manager_func = manager_func + self._collection = None + + def __get__(self, instance, owner): + """Descriptor for instantiating a new QuerySet object when + Document.objects is accessed. + """ + if instance is not None: + # Document class being used rather than a document object + return self + + if self._collection is None: + db = _get_db() + collection = owner._meta['collection'] + + # Create collection as a capped collection if specified + if owner._meta['max_size'] or owner._meta['max_documents']: + # Get max document limit and max byte size from meta + max_size = owner._meta['max_size'] or 10000000 # 10MB default + max_documents = owner._meta['max_documents'] + + if collection in db.collection_names(): + self._collection = db[collection] + # The collection already exists, check if its capped + # options match the specified capped options + options = self._collection.options() + if options.get('max') != max_documents or \ + options.get('size') != max_size: + msg = ('Cannot create collection "%s" as a capped ' + 'collection as it already exists') % collection + raise InvalidCollectionError(msg) + else: + # Create the collection as a capped collection + opts = {'capped': True, 'size': max_size} + if max_documents: + opts['max'] = max_documents + self._collection = db.create_collection(collection, opts) + else: + self._collection = db[collection] + + # owner is the document that contains the QuerySetManager + queryset = QuerySet(owner, self._collection) + if self._manager_func: + if self._manager_func.func_code.co_argcount == 1: + queryset = self._manager_func(queryset) + else: + queryset = self._manager_func(owner, queryset) + return queryset + + +def queryset_manager(func): + """Decorator that allows you to define custom QuerySet managers on + :class:`~mongoengine.Document` classes. The manager must be a function that + accepts a :class:`~mongoengine.Document` class as its first argument, and a + :class:`~mongoengine.queryset.QuerySet` as its second argument. The method + function should return a :class:`~mongoengine.queryset.QuerySet`, probably + the same one that was passed in, but modified in some way. + """ + if func.func_code.co_argcount == 1: + import warnings + msg = 'Methods decorated with queryset_manager should take 2 arguments' + warnings.warn(msg, DeprecationWarning) + return QuerySetManager(func)