Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 134 additions & 38 deletions marklogic/documents.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,78 @@
import json
from typing import Union

from requests import Session
from requests import Response, Session
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata

"""
Defines classes to simplify usage of the documents REST endpoint defined at
https://docs.marklogic.com/REST/client/management.
"""

class Document:

class Metadata:
"""
:param uri: the URI of the document; can be None when relying on MarkLogic to
generate a URI.
:param content: the content of the document.
Defines the metadata properties that can be associated with a document and also
used for specifying default metadata when writing many documents. One benefit
of this class - besides encapsulating each bit of what MarkLogic defines as metadata
for a document - is to provide a simpler mechanism for defining permissions via a
dictionary as opposed to an array of dictionaries.

:param collections: array of collection URIs.
:param permissions: dict with keys of role names and values of arrays of
capabilities such as "read", "update", and "execute".
:param quality: document quality, used for scoring in searches.
:param metadata_values: dict with string keys and string values.
:param properties: dict with string keys and values of any type.
"""

def __init__(
self,
collections: list[str] = None,
permissions: dict = None,
quality: int = None,
metadata_values: dict = None,
properties: dict = None,
):
self.collections = collections
self.permissions = permissions
self.quality = quality
self.metadata_values = metadata_values
self.properties = properties


def metadata_to_dict(metadata: Metadata) -> dict:
"""
Returns a dictionary with a structure matching what the /v1/documents endpoint
requires.
"""
md = {}
if metadata.permissions:
md["permissions"] = [
{"role-name": k, "capabilities": v} for k, v in metadata.permissions.items()
]
if metadata.collections:
md["collections"] = metadata.collections
if metadata.quality:
md["quality"] = metadata.quality
if metadata.properties:
md["properties"] = metadata.properties
if metadata.metadata_values:
md["metadataValues"] = metadata.metadata_values
return md


class Document(Metadata):
"""
:param uri: the URI of the document; can be None when relying on MarkLogic to
generate a URI.
:param content: the content of the document.
:param collections: see definition in parent class.
:param permissions: see definition in parent class.
:param quality: see definition in parent class.
:param metadata_values: see definition in parent class.
:param properties: see definition in parent class.
:param content_type: the MIME type of the document; use when MarkLogic cannot
determine the MIME type based on the URI.
:param extension: specifies a suffix for a URI generated by MarkLogic.
Expand Down Expand Up @@ -46,13 +103,9 @@ def __init__(
version_id: str = None,
temporal_document: str = None,
):
super().__init__(collections, permissions, quality, metadata_values, properties)
self.uri = uri
self.content = content
self.collections = collections
self.permissions = permissions
self.quality = quality
self.metadata_values = metadata_values
self.properties = properties

# The following are all specific to writing a document.
self.content_type = content_type
Expand All @@ -64,38 +117,25 @@ def __init__(
self.temporal_document = temporal_document

def to_request_field(self) -> RequestField:
"""
Returns a multipart request field representing the document to be written.
"""
data = self.content
if type(data) is dict:
data = json.dumps(data)
field = RequestField(name=self.uri, data=data, filename=self.uri)
field.make_multipart(
content_disposition=self._make_disposition(),
content_disposition=self._make_content_disposition(),
content_type=self.content_type,
)
return field

def to_metadata_dict(self) -> dict:
def to_metadata_request_field(self) -> RequestField:
"""
Returns a dictionary with a data structure matching what the /v1/documents
endpoint requires.
Returns a multipart request field if any metadata has been set on this
document; returns None otherwise.
"""
metadata = {}
if self.permissions:
metadata["permissions"] = [
{"role-name": k, "capabilities": v} for k, v in self.permissions.items()
]
if self.collections:
metadata["collections"] = self.collections
if self.quality:
metadata["quality"] = self.quality
if self.properties:
metadata["properties"] = self.properties
if self.metadata_values:
metadata["metadataValues"] = self.metadata_values
return metadata

def to_metadata_request_field(self) -> RequestField:
metadata = self.to_metadata_dict()
metadata = metadata_to_dict(self)
if len(metadata.keys()) == 0:
return None

Expand All @@ -108,7 +148,12 @@ def to_metadata_request_field(self) -> RequestField:
)
return field

def _make_disposition(self) -> str:
def _make_content_disposition(self) -> str:
"""
Returns a content disposition suitable for use when writing documents via
https://docs.marklogic.com/REST/POST/v1/documents . See that page for more
information on each part of the disposition.
"""
disposition = "attachment"

if not self.uri:
Expand All @@ -133,17 +178,68 @@ def _make_disposition(self) -> str:
return disposition


class DefaultMetadata(Metadata):
"""
Defines default metadata for use when writing many documents at one time.
"""

def __init__(
self,
collections: list[str] = None,
permissions: dict = None,
quality: int = None,
metadata_values: dict = None,
properties: dict = None,
):
super().__init__(collections, permissions, quality, metadata_values, properties)

def to_metadata_request_field(self) -> RequestField:
"""
Returns a multipart request field suitable for use when writing many documents.
"""
metadata = metadata_to_dict(self)
if len(metadata.keys()) == 0:
return None
field = RequestField(name=None, data=json.dumps(metadata), filename=None)
field.make_multipart(
content_disposition="inline; category=metadata",
content_type="application/json",
)
return field


class DocumentManager:
"""
Provides methods to simplify interacting with the /v1/documents REST endpoint
defined at https://docs.marklogic.com/REST/client/management.
"""

def __init__(self, session: Session):
self._session = session

def write(self, documents: list[Document], **kwargs):
def write(
self, parts: list[Union[DefaultMetadata, Document]], **kwargs
) -> Response:
"""
Write one or many documents at a time via a POST to the endpoint defined at
https://docs.marklogic.com/REST/POST/v1/documents .

:param parts: a part can define either a document to be written, which can
include metadata, or a set of default metadata to be applied to each document
after it that does not define its own metadata. See
https://docs.marklogic.com/guide/rest-dev/bulk#id_16015 for more information on
how the REST endpoint uses metadata.
"""
fields = []
for doc in documents:
metadata_field = doc.to_metadata_request_field()
if metadata_field:
fields.append(metadata_field)
fields.append(doc.to_request_field())

for part in parts:
if isinstance(part, DefaultMetadata):
fields.append(part.to_metadata_request_field())
else:
metadata_field = part.to_metadata_request_field()
if metadata_field:
fields.append(metadata_field)
fields.append(part.to_request_field())

data, content_type = encode_multipart_formdata(fields)

Expand Down
Loading