Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 24 additions & 16 deletions marklogic/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ class Document(Metadata):

def __init__(
self,
uri: str,
content,
uri: str = None,
content=None,
collections: list[str] = None,
permissions: dict = None,
quality: int = None,
Expand Down Expand Up @@ -148,6 +148,8 @@ def to_request_field(self) -> RequestField:
"""
Returns a multipart request field representing the document to be written.
"""
if self.content is None:
return None
data = self.content
if type(data) is dict:
data = json.dumps(data)
Expand Down Expand Up @@ -282,13 +284,14 @@ def multipart_response_to_documents(response: Response) -> list[Document]:
header_values = _extract_values_from_header(part)
uri = header_values["uri"]
if header_values["category"] == "content":
content = (
json.loads(part.content)
if header_values["content_type"] == "application/json"
else part.content
)
content_type = header_values["content_type"]
version_id = header_values["version_id"]
content = part.content
content_type = header_values.get("content_type")
if content_type == "application/json":
content = json.loads(content)
elif content_type in ["application/xml", "text/xml", "text/plain"]:
content = content.decode(part.encoding)

version_id = header_values.get("version_id")
if uris_to_documents.get(uri):
doc: Document = uris_to_documents[uri]
doc.content = content
Expand Down Expand Up @@ -323,7 +326,7 @@ def __init__(self, session: Session):
self._session = session

def write(
self, parts: list[Union[DefaultMetadata, Document]], **kwargs
self, parts: Union[Document, list[Union[DefaultMetadata, Document]]], **kwargs
) -> Response:
"""
Write one or many documents at a time via a POST to the endpoint defined at
Expand All @@ -337,14 +340,19 @@ def write(
"""
fields = []

if isinstance(parts, Document):
parts = [parts]

for part in parts:
if isinstance(part, DefaultMetadata):
fields.append(part.to_metadata_request_field())
else:
metadata_field = part.to_metadata_request_field()
if metadata_field:
fields.append(metadata_field)
fields.append(part.to_request_field())
content_field = part.to_request_field()
if content_field:
fields.append(content_field)

data, content_type = encode_multipart_formdata(fields)

Expand All @@ -358,20 +366,20 @@ def write(
return self._session.post("/v1/documents", data=data, headers=headers, **kwargs)

def read(
self, uris: list[str], categories: list[str] = None, **kwargs
self, uris: Union[str, list[str]], categories: list[str] = None, **kwargs
) -> Union[list[Document], Response]:
"""
Read one or many documents via a GET to the endpoint defined at
https://docs.marklogic.com/REST/POST/v1/documents . If a 200 is not returned
by that endpoint, then the Response is returned instead.

:param uris: list of URIs to read.
:param uris: list of URIs or a single URI to read.
:param categories: optional list of the categories of data to return for each
URI. By default, only content will be returned for each URI. See the endpoint
documentation for further information.
"""
params = kwargs.pop("params", {})
params["uri"] = uris
params["uri"] = uris if isinstance(uris, list) else [uris]
params["format"] = "json" # This refers to the metadata format.
if categories:
params["category"] = categories
Expand Down Expand Up @@ -405,8 +413,8 @@ def search(
documents instead of a search response. Parameters that are commonly used for
that endpoint are included as arguments to this method for ease of use.

:param query: JSON or XML query matching one of the types supported by the
search endpoint. The "Content-type" header will be set based on whether this
:param query: JSON or XML query matching one of the types supported by the
search endpoint. The "Content-type" header will be set based on whether this
is a dict, a string of JSON, or a string of XML.
:param categories: optional list of the categories of data to return for each
URI. By default, only content will be returned for each URI. See the endpoint
Expand Down
52 changes: 38 additions & 14 deletions tests/test_read_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,62 @@
def test_write_and_read_binary(client: Client):
content = "MarkLogic and Python".encode("ascii")
response = client.documents.write(
[
Document(
"/temp/doc1.bin",
content,
permissions=DEFAULT_PERMS,
)
]
Document(
"/temp/doc1.bin",
content,
permissions=DEFAULT_PERMS,
)
)
assert 200 == response.status_code

docs = client.documents.read(["/temp/doc1.bin"])
docs = client.documents.read("/temp/doc1.bin")
assert len(docs) == 1
doc = docs[0]
assert doc.uri == "/temp/doc1.bin"
content = doc.content.decode("ascii")
assert content == "MarkLogic and Python"


def test_write_and_read_xml_document(client: Client):
response = client.documents.write(
Document("/doc1.xml", "<hello>world</hello>", permissions=DEFAULT_PERMS)
)
assert response.status_code == 200

doc = client.documents.read("/doc1.xml")[0]
# Verify content was turned into a string
assert "<hello>world</hello>" in doc.content


def test_write_and_read_text_document(client: Client):
response = client.documents.write(
Document(
"/doc1.txt",
"hello world!",
permissions=DEFAULT_PERMS,
content_type="text/plain",
)
)
assert response.status_code == 200

doc = client.documents.read("/doc1.txt")[0]
assert doc.content == "hello world!"


def test_read_uri_with_double_quotes(client: Client):
uri = '/this/"works.json'
response = client.documents.write(
[Document(uri, {"hello": "world"}, permissions=DEFAULT_PERMS)]
Document(uri, {"hello": "world"}, permissions=DEFAULT_PERMS)
)
assert response.status_code == 200

docs = client.documents.read(["/this/%22works.json"])
docs = client.documents.read("/this/%22works.json")
assert len(docs) == 1
assert "/this/%22works.json" == docs[0].uri


def test_uri_not_found(client: Client):
docs = client.documents.read(["/doesnt-exist.json"])
docs = client.documents.read("/doesnt-exist.json")
assert docs is not None
assert len(docs) == 0

Expand All @@ -51,7 +75,7 @@ def test_read_with_transform(client: Client):
parameters, along with the ones added by the client.
"""
docs = client.documents.read(
["/doc1.json"],
"/doc1.json",
categories=["content", "metadata"],
params={"transform": "envelope"},
)
Expand Down Expand Up @@ -92,7 +116,7 @@ def test_with_accept_header(client: Client):
expected to be set to multipart/mixed by the client.
"""
docs = client.documents.read(
["/doc1.json"],
"/doc1.json",
headers={"Accept": "something/invalid"},
categories=["content", "quality"],
)
Expand All @@ -107,7 +131,7 @@ def test_with_accept_header(client: Client):

def test_read_with_basic_client(basic_client: Client):
# Just verifies that basic auth works as expected.
doc = basic_client.documents.read(["/doc1.json"])[0]
doc = basic_client.documents.read("/doc1.json")[0]
assert {"hello": "world"} == doc.content


Expand Down
121 changes: 59 additions & 62 deletions tests/test_write_documents.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from marklogic import Client
from marklogic.documents import Document
from marklogic.documents import DefaultMetadata, Document

DEFAULT_PERMS = {"python-tester": ["read", "update"]}

Expand Down Expand Up @@ -94,48 +94,41 @@ def test_content_types(client: Client):

def test_single_doc(client):
response = client.documents.write(
[Document("/temp/doc1.json", {"doc": 1}, permissions=DEFAULT_PERMS)]
Document("/temp/doc1.json", {"doc": 1}, permissions=DEFAULT_PERMS)
)
assert 200 == response.status_code

doc1 = client.get("v1/documents?uri=/temp/doc1.json").json()
assert 1 == doc1["doc"]


@pytest.mark.skip("Will get this working when supporting batch-level metadata")
def test_server_generated_uri(client):
response = client.documents.write(
[
DefaultMetadata(permissions=DEFAULT_PERMS),
Document(
None,
{"doc": "serveruri"},
extension=".json",
directory="/temp/",
permissions=DEFAULT_PERMS,
)
content={"doc": "serveruri"}, extension=".json", directory="/temp/"
),
]
)
assert 200 == response.status_code

# Do a search to find the URI.
data = client.get("/v1/search?q=serveruri&format=json").json()
assert 1 == data["total"]
uri = data["results"][0]["uri"]

doc1 = client.get(f"v1/documents?uri={uri}").json()
assert "serveruri" == doc1["doc"]
# Do a search to verify the document was created.
docs = client.documents.search(q="serveruri")
assert len(docs) == 1
doc = docs[0]
assert doc.uri.startswith("/temp/")
assert doc.uri.endswith(".json")


def test_repair_xml(client):
response = client.documents.write(
[
Document(
"/temp/doc1.xml",
"<doc>needs <b>closing tag</doc>",
repair="full",
permissions=DEFAULT_PERMS,
)
]
Document(
"/temp/doc1.xml",
"<doc>needs <b>closing tag</doc>",
repair="full",
permissions=DEFAULT_PERMS,
)
)
assert 200 == response.status_code

Expand All @@ -147,25 +140,19 @@ def test_repair_xml(client):
def test_extract_binary(client):
content = "MarkLogic and Python".encode("ascii")
response = client.documents.write(
[
Document(
"/temp/doc1.bin",
content,
extract="properties",
permissions=DEFAULT_PERMS,
)
]
Document(
"/temp/doc1.bin",
content,
extract="properties",
permissions=DEFAULT_PERMS,
)
)
assert 200 == response.status_code


def test_optimistic_locking(client):
response = client.documents.write(
[
Document(
"/temp/doc1.json", {"content": "original"}, permissions=DEFAULT_PERMS
)
]
Document("/temp/doc1.json", {"content": "original"}, permissions=DEFAULT_PERMS)
)
assert 200 == response.status_code

Expand All @@ -174,14 +161,12 @@ def test_optimistic_locking(client):

# Update the document, passing in the current version_id based on the ETag.
response = client.documents.write(
[
Document(
"/temp/doc1.json",
{"content": "updated!"},
version_id=etag,
permissions=DEFAULT_PERMS,
)
]
Document(
"/temp/doc1.json",
{"content": "updated!"},
version_id=etag,
permissions=DEFAULT_PERMS,
)
)
assert 200 == response.status_code

Expand All @@ -191,14 +176,12 @@ def test_optimistic_locking(client):

# Next update should fail since the ETag is no longer the current version.
response = client.documents.write(
[
Document(
"/temp/doc1.json",
{"this": "should fail"},
version_id=etag,
permissions=DEFAULT_PERMS,
)
]
Document(
"/temp/doc1.json",
{"this": "should fail"},
version_id=etag,
permissions=DEFAULT_PERMS,
)
)
assert 412 == response.status_code, "412 is returned when the versionId is invalid."
assert response.text.__contains__("RESTAPI-CONTENTWRONGVERSION")
Expand All @@ -214,14 +197,12 @@ def test_temporal_doc(client):
}

response = client.documents.write(
[
Document(
"/temp/doc1.json",
content,
temporal_document="custom1",
permissions=DEFAULT_PERMS,
)
],
Document(
"/temp/doc1.json",
content,
temporal_document="custom1",
permissions=DEFAULT_PERMS,
),
params={"temporal-collection": "temporal-collection"},
)
assert 200 == response.status_code
Expand All @@ -236,4 +217,20 @@ def test_temporal_doc(client):


def test_metadata_no_content(client: Client):
print("TODO!")
uri = "/temp/doc1.json"
response = client.documents.write(
Document(uri, {"doc": 1}, permissions=DEFAULT_PERMS),
)
assert response.status_code == 200

doc = client.documents.read(uri, categories=["metadata"])[0]
# Collections is not None since MarkLogic returns [] for it.
assert len(doc.collections) == 0

response = client.documents.write(Document(uri, collections=["c1", "c2"]))
assert response.status_code == 200

doc = client.documents.read(uri, categories=["metadata"])[0]
assert "c1" in doc.collections
assert "c2" in doc.collections
assert len(doc.collections) == 2
Loading