From bbcf2b98351cd2c1039c2405d0f4b102aefdfb53 Mon Sep 17 00:00:00 2001 From: Rob Rudin Date: Wed, 12 Jul 2023 12:09:45 -0400 Subject: [PATCH] DEVEXP-496 Can now write a batch of documents I'm going to do docs in a separate PR, after all the metadata stuff is supported too. There are only a lot of new files here because I needed them in the test app to test out things like optimistic locking and temporal writes. --- marklogic/client.py | 7 + marklogic/documents.py | 125 ++++++++++++ test-app/.gitignore | 1 + .../security/roles/python-tester.json | 23 +++ .../security/users/python-test-admin.json | 7 + .../security/users/python-test-user.json | 4 +- .../temporal/axes/temporal-system-axis.json | 15 ++ .../temporal/axes/temporal-valid-axis.json | 15 ++ .../collections/temporal-collection.json | 8 + .../src/main/ml-data/permissions.properties | 2 +- .../src/main/ml-modules/rest-properties.json | 3 + tests/conftest.py | 5 + tests/test_write_documents.py | 182 ++++++++++++++++++ 13 files changed, 393 insertions(+), 4 deletions(-) create mode 100644 marklogic/documents.py create mode 100644 test-app/src/main/ml-config/security/roles/python-tester.json create mode 100644 test-app/src/main/ml-config/security/users/python-test-admin.json create mode 100644 test-app/src/main/ml-config/temporal/axes/temporal-system-axis.json create mode 100644 test-app/src/main/ml-config/temporal/axes/temporal-valid-axis.json create mode 100644 test-app/src/main/ml-config/temporal/collections/temporal-collection.json create mode 100644 test-app/src/main/ml-modules/rest-properties.json create mode 100644 tests/test_write_documents.py diff --git a/marklogic/client.py b/marklogic/client.py index ed96507..3ba4a10 100644 --- a/marklogic/client.py +++ b/marklogic/client.py @@ -1,5 +1,6 @@ import requests from marklogic.cloud_auth import MarkLogicCloudAuth +from marklogic.documents import DocumentManager from requests.auth import HTTPDigestAuth from urllib.parse import urljoin @@ -63,3 +64,9 @@ def prepare_request(self, request, *args, **kwargs): """ request.url = urljoin(self.base_url, request.url) return super(Client, self).prepare_request(request, *args, **kwargs) + + @property + def documents(self): + if not hasattr(self, "_documents"): + self._documents = DocumentManager(self) + return self._documents diff --git a/marklogic/documents.py b/marklogic/documents.py new file mode 100644 index 0000000..61c7836 --- /dev/null +++ b/marklogic/documents.py @@ -0,0 +1,125 @@ +import json +from requests import Session +from urllib3.fields import RequestField +from urllib3.filepost import encode_multipart_formdata + + +class Document: + """ + :param uri: the URI of the document; can be None when relying on MarkLogic to + generate a URI. + :param content: the content of the document. + :param content_type: the MIME type of the document; use when MarkLogic cannot + determine the MIME type based on the URI. + :param extension: specifies a suffix for a URI generated by MarkLogic. + :param directory: specifies a prefix for a URI generated by MarkLogic. + :param repair: for an XML document, the level of XML repair to perform; can be + "full" or "none", with "none" being the default. + :param version_id: affects updates when optimistic locking is enabled; see + https://docs.marklogic.com/REST/POST/v1/documents for more information. + :param temporal_document: the logical document URI for a document written to a + temporal collection; requires that a "temporal-collection" parameter be included in + the request. + """ + + def __init__( + self, + uri: str, + content, + content_type: str = None, + extension: str = None, + directory: str = None, + repair: str = None, + extract: str = None, + version_id: str = None, + temporal_document: str = None, + ): + self.uri = uri + self.content = content + self.content_type = content_type + self.extension = extension + self.directory = directory + self.repair = repair + self.extract = extract + self.version_id = version_id + self.temporal_document = temporal_document + + def to_request_field(self) -> RequestField: + data = self.content + if type(data) is dict: + data = json.dumps(data) + field = RequestField(name=self.uri, data=data, filename=self.uri) + field.make_multipart( + content_disposition=self._make_disposition(), + content_type=self.content_type, + ) + return field + + def _make_disposition(self) -> str: + disposition = "attachment" + + if not self.uri: + disposition = "inline" + if self.extension: + disposition = f"{disposition};extension={self.extension}" + if self.directory: + disposition = f"{disposition};directory={self.directory}" + + if self.repair: + disposition = f"{disposition};repair={self.repair}" + + if self.extract: + disposition = f"{disposition};extract={self.extract}" + + if self.version_id: + disposition = f"{disposition};versionId={self.version_id}" + + if self.temporal_document: + disposition = f"{disposition};temporal-document={self.temporal_document}" + + return disposition + + +class DocumentManager: + def __init__(self, session: Session): + self._session = session + + def write(self, documents: list[Document], **kwargs): + fields = [self._make_default_metadata_field()] + for doc in documents: + fields.append(doc.to_request_field()) + + data, content_type = encode_multipart_formdata(fields) + + headers = kwargs.pop("headers", {}) + headers["Content-Type"] = "".join( + ("multipart/mixed",) + content_type.partition(";")[1:] + ) + if not headers.get("Accept"): + headers["Accept"] = "application/json" + + return self._session.post("/v1/documents", data=data, headers=headers, **kwargs) + + def _make_default_metadata_field(self): + """ + Temporary method to ensure the test user can see written documents. Will be + removed when this feature is implemented for real. + """ + metadata_field = RequestField( + name="request-metadata", + data=json.dumps( + { + "permissions": [ + { + "role-name": "python-tester", + "capabilities": ["read", "update"], + } + ] + } + ), + ) + metadata_field.make_multipart( + content_disposition="inline; category=metadata", + content_type="application/json", + ) + return metadata_field diff --git a/test-app/.gitignore b/test-app/.gitignore index 7f6511a..3efdcd5 100644 --- a/test-app/.gitignore +++ b/test-app/.gitignore @@ -1,2 +1,3 @@ .gradle gradle-local.properties +build diff --git a/test-app/src/main/ml-config/security/roles/python-tester.json b/test-app/src/main/ml-config/security/roles/python-tester.json new file mode 100644 index 0000000..616bb5d --- /dev/null +++ b/test-app/src/main/ml-config/security/roles/python-tester.json @@ -0,0 +1,23 @@ +{ + "role-name": "python-tester", + "role": [ + "rest-extension-user" + ], + "privilege": [ + { + "privilege-name": "rest-reader", + "action": "http://marklogic.com/xdmp/privileges/rest-reader", + "kind": "execute" + }, + { + "privilege-name": "rest-writer", + "action": "http://marklogic.com/xdmp/privileges/rest-writer", + "kind": "execute" + }, + { + "privilege-name": "xdbc:eval", + "action": "http://marklogic.com/xdmp/privileges/xdbc-eval", + "kind": "execute" + } + ] +} diff --git a/test-app/src/main/ml-config/security/users/python-test-admin.json b/test-app/src/main/ml-config/security/users/python-test-admin.json new file mode 100644 index 0000000..ff988f3 --- /dev/null +++ b/test-app/src/main/ml-config/security/users/python-test-admin.json @@ -0,0 +1,7 @@ +{ + "user-name": "python-test-admin", + "password": "password", + "role": [ + "admin" + ] +} \ No newline at end of file diff --git a/test-app/src/main/ml-config/security/users/python-test-user.json b/test-app/src/main/ml-config/security/users/python-test-user.json index fcacb51..f033fbb 100644 --- a/test-app/src/main/ml-config/security/users/python-test-user.json +++ b/test-app/src/main/ml-config/security/users/python-test-user.json @@ -2,9 +2,7 @@ "user-name": "python-test-user", "password": "password", "role": [ - "rest-evaluator", - "rest-reader", - "rest-writer", + "python-tester", "qconsole-user" ] } \ No newline at end of file diff --git a/test-app/src/main/ml-config/temporal/axes/temporal-system-axis.json b/test-app/src/main/ml-config/temporal/axes/temporal-system-axis.json new file mode 100644 index 0000000..7c87e06 --- /dev/null +++ b/test-app/src/main/ml-config/temporal/axes/temporal-system-axis.json @@ -0,0 +1,15 @@ +{ + "axis-name": "system", + "axis-start": { + "element-reference": { + "namespace-uri": "", + "localname": "systemStart" + } + }, + "axis-end": { + "element-reference": { + "namespace-uri": "", + "localname": "systemEnd" + } + } +} diff --git a/test-app/src/main/ml-config/temporal/axes/temporal-valid-axis.json b/test-app/src/main/ml-config/temporal/axes/temporal-valid-axis.json new file mode 100644 index 0000000..4781821 --- /dev/null +++ b/test-app/src/main/ml-config/temporal/axes/temporal-valid-axis.json @@ -0,0 +1,15 @@ +{ + "axis-name": "valid", + "axis-start": { + "element-reference": { + "namespace-uri": "", + "localname": "validStart" + } + }, + "axis-end": { + "element-reference": { + "namespace-uri": "", + "localname": "validEnd" + } + } +} diff --git a/test-app/src/main/ml-config/temporal/collections/temporal-collection.json b/test-app/src/main/ml-config/temporal/collections/temporal-collection.json new file mode 100644 index 0000000..436f6eb --- /dev/null +++ b/test-app/src/main/ml-config/temporal/collections/temporal-collection.json @@ -0,0 +1,8 @@ +{ + "collection-name": "temporal-collection", + "system-axis": "system", + "valid-axis": "valid", + "option": [ + "updates-admin-override" + ] +} diff --git a/test-app/src/main/ml-data/permissions.properties b/test-app/src/main/ml-data/permissions.properties index c977854..a181f8b 100644 --- a/test-app/src/main/ml-data/permissions.properties +++ b/test-app/src/main/ml-data/permissions.properties @@ -1 +1 @@ -*=rest-reader,read,rest-writer,update +*=python-tester,read,python-tester,update diff --git a/test-app/src/main/ml-modules/rest-properties.json b/test-app/src/main/ml-modules/rest-properties.json new file mode 100644 index 0000000..a0e9758 --- /dev/null +++ b/test-app/src/main/ml-modules/rest-properties.json @@ -0,0 +1,3 @@ +{ + "update-policy": "VERSION_OPTIONAL" + } \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 8639a17..570fd9e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,11 @@ def client(): return Client("http://localhost:8030", digest=("python-test-user", "password")) +@pytest.fixture +def admin_client(): + return Client("http://localhost:8030", digest=("python-test-admin", "password")) + + @pytest.fixture def basic_client(): # requests allows a tuple to be passed when doing basic authentication. diff --git a/tests/test_write_documents.py b/tests/test_write_documents.py new file mode 100644 index 0000000..b1cd57d --- /dev/null +++ b/tests/test_write_documents.py @@ -0,0 +1,182 @@ +import pytest + +from marklogic import Client +from marklogic.documents import Document + + +@pytest.fixture(autouse=True) +def prepare_test_database(admin_client: Client): + """ + Deletes any documents created by other tests to ensure a 'clean' database before a + test runs. Does not delete documents in the 'test-data' collection which is intended + to contain all the documents loaded by the test-app. A user with the 'admin' role + is used so that temporal documents can be deleted. + """ + query = "cts:uris((), (), cts:not-query(cts:collection-query('test-data'))) \ + ! xdmp:document-delete(.)" + response = admin_client.post( + "v1/eval", + headers={"Content-type": "application/x-www-form-urlencoded"}, + data={"xquery": query}, + ) + assert 200 == response.status_code + + +def test_write_json(client: Client): + # Verifies that JSON can be either a dict or a string. + response = client.documents.write( + [ + Document("/temp/doc1.json", {"doc": 1}), + Document("/temp/doc2.json", '{"doc": 2}'), + ] + ) + + assert 200 == response.status_code + assert response.headers["Content-type"].startswith("application/json") + data = response.json() + assert len(data["documents"]) == 2 + + doc1 = client.get("v1/documents?uri=/temp/doc1.json").json() + assert 1 == doc1["doc"] + doc2 = client.get("v1/documents?uri=/temp/doc2.json").json() + assert 2 == doc2["doc"] + + +def test_return_xml(client: Client): + """ + Verifies that the headers passed in by a user aren't lost when the client sets + the Content-type to multipart/mixed. + """ + docs = [ + Document("/temp/doc1.json", {"doc": 1}), + Document("/temp/doc2.json", {"doc": 2}), + ] + response = client.documents.write(docs, headers={"Accept": "application/xml"}) + + assert response.headers["Content-type"].startswith("application/xml") + assert response.text.startswith("2"), + ] + ) + assert 200 == response.status_code + + doc1 = client.get("v1/documents?uri=/temp/doc1.json").json() + assert 1 == doc1["doc"] + doc2_text = client.get("v1/documents?uri=/temp/doc2.xml").text + assert doc2_text.__contains__("2") + + +def test_content_types(client: Client): + """ + Verifies a user can specify a content type for each document where MarkLogic is not + able to determine a type based on the URI. + """ + response = client.documents.write( + [ + Document("/temp/doc1", {"doc": 1}, content_type="application/json"), + Document("/temp/doc2", "2", content_type="application/xml"), + ] + ) + assert 200 == response.status_code + + doc1 = client.get("v1/documents?uri=/temp/doc1").json() + assert 1 == doc1["doc"] + doc2_text = client.get("v1/documents?uri=/temp/doc2").text + assert doc2_text.__contains__("2") + + +def test_single_doc(client): + response = client.documents.write([Document("/temp/doc1.json", {"doc": 1})]) + assert 200 == response.status_code + + doc1 = client.get("v1/documents?uri=/temp/doc1.json").json() + assert 1 == doc1["doc"] + + +def test_server_generated_uri(client): + response = client.documents.write( + [Document(None, {"doc": "serveruri"}, extension=".json", directory="/temp/")] + ) + assert 200 == response.status_code + + # Do a search to find the URI. + data = client.get("/v1/search?q=serveruri&format=json").json() + assert 1 == data["total"] + uri = data["results"][0]["uri"] + + doc1 = client.get(f"v1/documents?uri={uri}").json() + assert "serveruri" == doc1["doc"] + + +def test_repair_xml(client): + response = client.documents.write( + [Document("/temp/doc1.xml", "needs closing tag", repair="full")] + ) + assert 200 == response.status_code + + xml = client.get("v1/documents?uri=/temp/doc1.xml").text + assert xml.__contains__("needs closing tag") + + +@pytest.mark.skip("Will succeed only if MarkLogic converters are installed.") +def test_extract_binary(client): + content = "MarkLogic and Python".encode("ascii") + response = client.documents.write( + [Document("/temp/doc1.bin", content, extract="properties")] + ) + assert 200 == response.status_code + + +def test_optimistic_locking(client): + response = client.documents.write( + [Document("/temp/doc1.json", {"content": "original"})] + ) + assert 200 == response.status_code + + # The ETag defines the version of the document. + etag = client.get("v1/documents?uri=/temp/doc1.json").headers["ETag"] + + # Update the document, passing in the current version_id based on the ETag. + response = client.documents.write( + [Document("/temp/doc1.json", {"content": "updated!"}, version_id=etag)] + ) + assert 200 == response.status_code + + # Verify the doc was updated. + doc = client.get("v1/documents?uri=/temp/doc1.json").json() + assert "updated!" == doc["content"] + + # Next update should fail since the ETag is no longer the current version. + response = client.documents.write( + [Document("/temp/doc1.json", {"this": "should fail"}, version_id=etag)] + ) + assert 412 == response.status_code, "412 is returned when the versionId is invalid." + assert response.text.__contains__("RESTAPI-CONTENTWRONGVERSION") + + +def test_temporal_doc(client): + content = { + "text": "hello world", + "systemStart": "2014-04-03T11:00:00", + "systemEnd": "2014-04-03T16:00:00", + "validStart": "2014-04-03T11:00:00", + "validEnd": "2014-04-03T16:00:00", + } + + response = client.documents.write( + [Document("/temp/doc1.json", content, temporal_document="custom1")], + params={"temporal-collection": "temporal-collection"}, + ) + assert 200 == response.status_code + + # Verify that the temporal doc was written to the "custom1" collection. This will be + # easier to do once we have support for reading documents and their metadata. + data = client.get("/v1/search?collection=custom1&format=json").json() + assert 1 == data["total"] + assert "/temp/doc1.json" == data["results"][0]["uri"]