diff --git a/marklogic/documents.py b/marklogic/documents.py
index 918c107..c00d1c8 100644
--- a/marklogic/documents.py
+++ b/marklogic/documents.py
@@ -1,21 +1,78 @@
import json
+from typing import Union
-from requests import Session
+from requests import Response, Session
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata
+"""
+Defines classes to simplify usage of the documents REST endpoint defined at
+https://docs.marklogic.com/REST/client/management.
+"""
-class Document:
+
+class Metadata:
"""
- :param uri: the URI of the document; can be None when relying on MarkLogic to
- generate a URI.
- :param content: the content of the document.
+ Defines the metadata properties that can be associated with a document and also
+ used for specifying default metadata when writing many documents. One benefit
+ of this class - besides encapsulating each bit of what MarkLogic defines as metadata
+ for a document - is to provide a simpler mechanism for defining permissions via a
+ dictionary as opposed to an array of dictionaries.
+
:param collections: array of collection URIs.
:param permissions: dict with keys of role names and values of arrays of
capabilities such as "read", "update", and "execute".
:param quality: document quality, used for scoring in searches.
:param metadata_values: dict with string keys and string values.
:param properties: dict with string keys and values of any type.
+ """
+
+ def __init__(
+ self,
+ collections: list[str] = None,
+ permissions: dict = None,
+ quality: int = None,
+ metadata_values: dict = None,
+ properties: dict = None,
+ ):
+ self.collections = collections
+ self.permissions = permissions
+ self.quality = quality
+ self.metadata_values = metadata_values
+ self.properties = properties
+
+
+def metadata_to_dict(metadata: Metadata) -> dict:
+ """
+ Returns a dictionary with a structure matching what the /v1/documents endpoint
+ requires.
+ """
+ md = {}
+ if metadata.permissions:
+ md["permissions"] = [
+ {"role-name": k, "capabilities": v} for k, v in metadata.permissions.items()
+ ]
+ if metadata.collections:
+ md["collections"] = metadata.collections
+ if metadata.quality:
+ md["quality"] = metadata.quality
+ if metadata.properties:
+ md["properties"] = metadata.properties
+ if metadata.metadata_values:
+ md["metadataValues"] = metadata.metadata_values
+ return md
+
+
+class Document(Metadata):
+ """
+ :param uri: the URI of the document; can be None when relying on MarkLogic to
+ generate a URI.
+ :param content: the content of the document.
+ :param collections: see definition in parent class.
+ :param permissions: see definition in parent class.
+ :param quality: see definition in parent class.
+ :param metadata_values: see definition in parent class.
+ :param properties: see definition in parent class.
:param content_type: the MIME type of the document; use when MarkLogic cannot
determine the MIME type based on the URI.
:param extension: specifies a suffix for a URI generated by MarkLogic.
@@ -46,13 +103,9 @@ def __init__(
version_id: str = None,
temporal_document: str = None,
):
+ super().__init__(collections, permissions, quality, metadata_values, properties)
self.uri = uri
self.content = content
- self.collections = collections
- self.permissions = permissions
- self.quality = quality
- self.metadata_values = metadata_values
- self.properties = properties
# The following are all specific to writing a document.
self.content_type = content_type
@@ -64,38 +117,25 @@ def __init__(
self.temporal_document = temporal_document
def to_request_field(self) -> RequestField:
+ """
+ Returns a multipart request field representing the document to be written.
+ """
data = self.content
if type(data) is dict:
data = json.dumps(data)
field = RequestField(name=self.uri, data=data, filename=self.uri)
field.make_multipart(
- content_disposition=self._make_disposition(),
+ content_disposition=self._make_content_disposition(),
content_type=self.content_type,
)
return field
- def to_metadata_dict(self) -> dict:
+ def to_metadata_request_field(self) -> RequestField:
"""
- Returns a dictionary with a data structure matching what the /v1/documents
- endpoint requires.
+ Returns a multipart request field if any metadata has been set on this
+ document; returns None otherwise.
"""
- metadata = {}
- if self.permissions:
- metadata["permissions"] = [
- {"role-name": k, "capabilities": v} for k, v in self.permissions.items()
- ]
- if self.collections:
- metadata["collections"] = self.collections
- if self.quality:
- metadata["quality"] = self.quality
- if self.properties:
- metadata["properties"] = self.properties
- if self.metadata_values:
- metadata["metadataValues"] = self.metadata_values
- return metadata
-
- def to_metadata_request_field(self) -> RequestField:
- metadata = self.to_metadata_dict()
+ metadata = metadata_to_dict(self)
if len(metadata.keys()) == 0:
return None
@@ -108,7 +148,12 @@ def to_metadata_request_field(self) -> RequestField:
)
return field
- def _make_disposition(self) -> str:
+ def _make_content_disposition(self) -> str:
+ """
+ Returns a content disposition suitable for use when writing documents via
+ https://docs.marklogic.com/REST/POST/v1/documents . See that page for more
+ information on each part of the disposition.
+ """
disposition = "attachment"
if not self.uri:
@@ -133,17 +178,68 @@ def _make_disposition(self) -> str:
return disposition
+class DefaultMetadata(Metadata):
+ """
+ Defines default metadata for use when writing many documents at one time.
+ """
+
+ def __init__(
+ self,
+ collections: list[str] = None,
+ permissions: dict = None,
+ quality: int = None,
+ metadata_values: dict = None,
+ properties: dict = None,
+ ):
+ super().__init__(collections, permissions, quality, metadata_values, properties)
+
+ def to_metadata_request_field(self) -> RequestField:
+ """
+ Returns a multipart request field suitable for use when writing many documents.
+ """
+ metadata = metadata_to_dict(self)
+ if len(metadata.keys()) == 0:
+ return None
+ field = RequestField(name=None, data=json.dumps(metadata), filename=None)
+ field.make_multipart(
+ content_disposition="inline; category=metadata",
+ content_type="application/json",
+ )
+ return field
+
+
class DocumentManager:
+ """
+ Provides methods to simplify interacting with the /v1/documents REST endpoint
+ defined at https://docs.marklogic.com/REST/client/management.
+ """
+
def __init__(self, session: Session):
self._session = session
- def write(self, documents: list[Document], **kwargs):
+ def write(
+ self, parts: list[Union[DefaultMetadata, Document]], **kwargs
+ ) -> Response:
+ """
+ Write one or many documents at a time via a POST to the endpoint defined at
+ https://docs.marklogic.com/REST/POST/v1/documents .
+
+ :param parts: a part can define either a document to be written, which can
+ include metadata, or a set of default metadata to be applied to each document
+ after it that does not define its own metadata. See
+ https://docs.marklogic.com/guide/rest-dev/bulk#id_16015 for more information on
+ how the REST endpoint uses metadata.
+ """
fields = []
- for doc in documents:
- metadata_field = doc.to_metadata_request_field()
- if metadata_field:
- fields.append(metadata_field)
- fields.append(doc.to_request_field())
+
+ for part in parts:
+ if isinstance(part, DefaultMetadata):
+ fields.append(part.to_metadata_request_field())
+ else:
+ metadata_field = part.to_metadata_request_field()
+ if metadata_field:
+ fields.append(metadata_field)
+ fields.append(part.to_request_field())
data, content_type = encode_multipart_formdata(fields)
diff --git a/tests/test_write_documents_with_metadata.py b/tests/test_write_documents_with_metadata.py
index 0d9f06a..7e750aa 100644
--- a/tests/test_write_documents_with_metadata.py
+++ b/tests/test_write_documents_with_metadata.py
@@ -1,65 +1,36 @@
from marklogic import Client
-from marklogic.documents import Document
+from marklogic.documents import Document, DefaultMetadata
+
+TEST_METADATA = {
+ "collections": ["c1", "c2"],
+ "permissions": {
+ "python-tester": ["read", "update"],
+ "qconsole-user": "execute",
+ },
+ "quality": 1,
+ "metadata_values": {"key1": "value1", "key2": "value2"},
+ "properties": {
+ "hello": "world",
+ "xml": "be embedded",
+ "number": 1,
+ },
+}
def test_all_metadata(client: Client):
- uri = "/temp/doc1.json"
-
response = client.documents.write(
[
Document(
- uri,
+ "/temp/doc1.json",
{"content": "original"},
- collections=["c1", "c2"],
- permissions={
- "python-tester": ["read", "update"],
- "qconsole-user": "execute",
- },
- quality=1,
- properties={
- "hello": "world",
- "xml": "be embedded",
- "number": 1,
- },
- metadata_values={"key1": "value1", "key2": "value2"},
+ *TEST_METADATA.values(),
),
]
)
-
assert 200 == response.status_code
- # Get and verify all the metadata.
- metadata = client.get(
- "v1/documents?uri=/temp/doc1.json&category=metadata&format=json"
- ).json()
-
- perms = metadata["permissions"]
- assert 2 == len(perms)
- perm = next(perm for perm in perms if perm["role-name"] == "python-tester")
- assert 2 == len(perm["capabilities"])
- assert "read" in perm["capabilities"]
- assert "update" in perm["capabilities"]
- perm = next(perm for perm in perms if perm["role-name"] == "qconsole-user")
- assert 1 == len(perm["capabilities"])
- assert "execute" == perm["capabilities"][0]
-
- collections = metadata["collections"]
- assert 2 == len(collections)
- assert "c1" in collections
- assert "c2" in collections
-
- props = metadata["properties"]
- assert 3 == len(props.keys())
- assert "world" == props["hello"]
- assert "be embedded" == props["xml"]
- assert 1 == props["number"]
-
- assert 1 == metadata["quality"]
-
- values = metadata["metadataValues"]
- assert 2 == len(values.keys())
- assert "value1" == values["key1"]
- assert "value2" == values["key2"]
+ metadata = _get_metadata(client, "/temp/doc1.json")
+ _verify_test_metadata_exists(metadata)
def test_only_quality_and_permissions(client: Client):
@@ -79,10 +50,7 @@ def test_only_quality_and_permissions(client: Client):
assert 200 == response.status_code
- metadata = client.get(
- "v1/documents?uri=/temp/doc1.json&category=metadata&format=json"
- ).json()
-
+ metadata = _get_metadata(client, "/temp/doc1.json")
assert 2 == metadata["quality"]
assert 0 == len(metadata["collections"])
assert 0 == len(metadata["properties"].keys())
@@ -105,3 +73,101 @@ def test_only_quality(client: Client):
), "The response should be sent without permissions and thus fail because a \
non-admin user requires at least one update permission."
assert "XDMP-MUSTHAVEUPDATE" in response.text
+
+
+def test_default_metadata(client: Client):
+ """
+ The REST endpoint allows for default metadata to be provided at any point in the
+ multipart body, and it is expected to be applied to any document after it that does
+ not have any metadata itself.
+ """
+ response = client.documents.write(
+ [
+ DefaultMetadata(*TEST_METADATA.values()),
+ Document("/temp/doc1.json", {"doc": 1}),
+ Document(
+ "/temp/doc2.json",
+ {"doc": 2},
+ permissions={"python-tester": "update", "rest-extension-user": "read"}
+ ),
+ DefaultMetadata(
+ permissions={"python-tester": "update", "qconsole-user": "read"}
+ ),
+ Document("/temp/doc3.json", {"doc": 3}),
+ ],
+ )
+
+ assert 200 == response.status_code
+
+ # doc1 should use the first set of default metadata
+ metadata = _get_metadata(client, "/temp/doc1.json")
+ _verify_test_metadata_exists(metadata)
+
+ # doc2 should use its own metadata
+ metadata = _get_metadata(client, "/temp/doc2.json")
+ assert 0 == metadata["quality"]
+ assert 0 == len(metadata["collections"])
+ assert 0 == len(metadata["properties"].keys())
+ assert 0 == len(metadata["metadataValues"].keys())
+ perms = metadata["permissions"]
+ assert 2 == len(perms)
+ perm = next(perm for perm in perms if perm["role-name"] == "python-tester")
+ assert 1 == len(perm["capabilities"])
+ assert "update" in perm["capabilities"]
+ perm = next(perm for perm in perms if perm["role-name"] == "rest-extension-user")
+ assert 1 == len(perm["capabilities"])
+ assert "read" in perm["capabilities"]
+
+ # doc3 should use the second set of default metadata
+ metadata = _get_metadata(client, "/temp/doc3.json")
+ assert 0 == metadata["quality"]
+ assert 0 == len(metadata["collections"])
+ assert 0 == len(metadata["properties"].keys())
+ assert 0 == len(metadata["metadataValues"].keys())
+ perms = metadata["permissions"]
+ assert 2 == len(perms)
+ perm = next(perm for perm in perms if perm["role-name"] == "python-tester")
+ assert 1 == len(perm["capabilities"])
+ assert "update" in perm["capabilities"]
+ perm = next(perm for perm in perms if perm["role-name"] == "qconsole-user")
+ assert 1 == len(perm["capabilities"])
+ assert "read" in perm["capabilities"]
+
+
+
+def _get_metadata(client: Client, uri: str):
+ return client.get(f"v1/documents?uri={uri}&category=metadata&format=json").json()
+
+
+def _verify_test_metadata_exists(metadata: dict):
+ """
+ Convenience function for verifying that document metadata contains the metadata
+ defined by TEST_METADATA.
+ """
+ perms = metadata["permissions"]
+ assert 2 == len(perms)
+ perm = next(perm for perm in perms if perm["role-name"] == "python-tester")
+ assert 2 == len(perm["capabilities"])
+ assert "read" in perm["capabilities"]
+ assert "update" in perm["capabilities"]
+ perm = next(perm for perm in perms if perm["role-name"] == "qconsole-user")
+ assert 1 == len(perm["capabilities"])
+ assert "execute" == perm["capabilities"][0]
+
+ collections = metadata["collections"]
+ assert 2 == len(collections)
+ assert "c1" in collections
+ assert "c2" in collections
+
+ props = metadata["properties"]
+ assert 3 == len(props.keys())
+ assert "world" == props["hello"]
+ assert "be embedded" == props["xml"]
+ assert 1 == props["number"]
+
+ assert 1 == metadata["quality"]
+
+ values = metadata["metadataValues"]
+ assert 2 == len(values.keys())
+ assert "value1" == values["key1"]
+ assert "value2" == values["key2"]