From 305de78e3d44a8775b6d88b56f22d54cfd0425c0 Mon Sep 17 00:00:00 2001
From: Rob Rudin <rob.rudin@marklogic.com>
Date: Mon, 17 Jul 2023 09:24:25 -0400
Subject: [PATCH] DEVEXP-503 Can now read a batch of documents with metadata

Reworked some of the existing "write" tests to now use `client.documents.read` to verify data that was written.
---
 marklogic/documents.py                        | 174 +++++++++++++++---
 .../security/users/python-not-rest-user.json  |   8 +
 .../main/ml-modules/transforms/envelope.sjs   |   6 +
 tests/conftest.py                             |  14 +-
 tests/test_get_documents.py                   |  54 ------
 tests/test_read_documents.py                  | 122 ++++++++++++
 tests/test_search.py                          |   5 -
 tests/test_write_documents.py                 |  23 ++-
 tests/test_write_documents_with_metadata.py   | 130 ++++++-------
 9 files changed, 383 insertions(+), 153 deletions(-)
 create mode 100644 test-app/src/main/ml-config/security/users/python-not-rest-user.json
 create mode 100644 test-app/src/main/ml-modules/transforms/envelope.sjs
 delete mode 100644 tests/test_get_documents.py
 create mode 100644 tests/test_read_documents.py
 delete mode 100644 tests/test_search.py

diff --git a/marklogic/documents.py b/marklogic/documents.py
index c00d1c8..74ac8c7 100644
--- a/marklogic/documents.py
+++ b/marklogic/documents.py
@@ -1,7 +1,9 @@
 import json
+from collections import OrderedDict
 from typing import Union
 
 from requests import Response, Session
+from requests_toolbelt.multipart.decoder import MultipartDecoder
 from urllib3.fields import RequestField
 from urllib3.filepost import encode_multipart_formdata
 
@@ -63,27 +65,29 @@ def metadata_to_dict(metadata: Metadata) -> dict:
     return md
 
 
+def dict_to_metadata(metadata: dict, target_metadata: Metadata) -> None:
+    """
+    Populates the given Metadata instance based on the metadata dictionary as returned
+    by the /v1/documents REST endpoint.
+    """
+    target_metadata.collections = metadata.get("collections")
+    target_metadata.quality = metadata.get("quality")
+    target_metadata.metadata_values = metadata.get("metadataValues")
+    target_metadata.properties = metadata.get("properties")
+    if metadata.get("permissions"):
+        perms = {}
+        for perm in metadata["permissions"]:
+            role = perm["role-name"]
+            perms[role] = perm["capabilities"]
+        target_metadata.permissions = perms
+    else:
+        target_metadata.perms = None
+
+
 class Document(Metadata):
     """
-    :param uri: the URI of the document; can be None when relying on MarkLogic to
-    generate a URI.
-    :param content: the content of the document.
-    :param collections: see definition in parent class.
-    :param permissions: see definition in parent class.
-    :param quality: see definition in parent class.
-    :param metadata_values: see definition in parent class.
-    :param properties: see definition in parent class.
-    :param content_type: the MIME type of the document; use when MarkLogic cannot
-    determine the MIME type based on the URI.
-    :param extension: specifies a suffix for a URI generated by MarkLogic.
-    :param directory: specifies a prefix for a URI generated by MarkLogic.
-    :param repair: for an XML document, the level of XML repair to perform; can be
-    "full" or "none", with "none" being the default.
-    :param version_id: affects updates when optimistic locking is enabled; see
-    https://docs.marklogic.com/REST/POST/v1/documents for more information.
-    :param temporal_document: the logical document URI for a document written to a
-    temporal collection; requires that a "temporal-collection" parameter be included in
-    the request.
+    Represents a document, either as read from MarkLogic or as a document to be
+    written to MarkLogic.
     """
 
     def __init__(
@@ -96,24 +100,48 @@ def __init__(
         metadata_values: dict = None,
         properties: dict = None,
         content_type: str = None,
+        version_id: str = None,
         extension: str = None,
         directory: str = None,
         repair: str = None,
         extract: str = None,
-        version_id: str = None,
         temporal_document: str = None,
     ):
+        """
+        :param uri: the URI of the document; can be None when relying on MarkLogic to
+        generate a URI.
+        :param content: the content of the document.
+        :param collections: see definition in parent class.
+        :param permissions: see definition in parent class.
+        :param quality: see definition in parent class.
+        :param metadata_values: see definition in parent class.
+        :param properties: see definition in parent class.
+        :param content_type: the MIME type of the document; use when MarkLogic cannot
+        determine the MIME type based on the URI.
+        :param version_id: affects updates when optimistic locking is enabled; see
+        https://docs.marklogic.com/REST/POST/v1/documents for more information.
+        :param temporal_document: the logical document URI for a document written to a
+        :param extension: specifies a suffix for a URI generated by MarkLogic; only used
+        when writing a document.
+        :param directory: specifies a prefix for a URI generated by MarkLogic; only used
+        when writing a document.
+        :param repair: for an XML document, the level of XML repair to perform; can be
+        "full" or "none", with "none" being the default; only used when writing a
+        document.
+        temporal collection; requires that a "temporal-collection" parameter be
+        included in the request; only used when writing a document.
+        """
         super().__init__(collections, permissions, quality, metadata_values, properties)
         self.uri = uri
         self.content = content
+        self.content_type = content_type
+        self.version_id = version_id
 
         # The following are all specific to writing a document.
-        self.content_type = content_type
         self.extension = extension
         self.directory = directory
         self.repair = repair
         self.extract = extract
-        self.version_id = version_id
         self.temporal_document = temporal_document
 
     def to_request_field(self) -> RequestField:
@@ -208,6 +236,37 @@ def to_metadata_request_field(self) -> RequestField:
         return field
 
 
+def _extract_values_from_header(part) -> dict:
+    """
+    Returns a dict containing values about the document content or metadata.
+    """
+    encoding = part.encoding
+    disposition = part.headers["Content-Disposition".encode(encoding)].decode(encoding)
+    disposition_values = {}
+    for item in disposition.split(";"):
+        tokens = item.split("=")
+        # The first item will be "attachment" and can be ignored.
+        if len(tokens) == 2:
+            disposition_values[tokens[0].strip()] = tokens[1]
+
+    content_type = None
+    if part.headers.get("Content-Type".encode(encoding)):
+        content_type = part.headers["Content-Type".encode(encoding)].decode(encoding)
+
+    uri = disposition_values["filename"]
+    if uri.startswith('"'):
+        uri = uri[1:]
+    if uri.endswith('"'):
+        uri = uri[:-1]
+
+    return {
+        "uri": uri,
+        "category": disposition_values["category"],
+        "content_type": content_type,
+        "version_id": disposition_values.get("versionId"),
+    }
+
+
 class DocumentManager:
     """
     Provides methods to simplify interacting with the /v1/documents REST endpoint
@@ -251,3 +310,74 @@ def write(
             headers["Accept"] = "application/json"
 
         return self._session.post("/v1/documents", data=data, headers=headers, **kwargs)
+
+    def _get_multipart_documents_response(
+        self, uris: list[str], categories: list[str], **kwargs
+    ) -> Response:
+        """
+        Constructs and sends a multipart/mixed request to the v1/documents endpoint.
+        """
+        params = kwargs.pop("params", {})
+        params["uri"] = uris
+        params["format"] = "json"  # This refers to the metadata format.
+        if categories:
+            params["category"] = categories
+
+        headers = kwargs.pop("headers", {})
+        headers["Accept"] = "multipart/mixed"
+        return self._session.get(
+            "/v1/documents", params=params, headers=headers, **kwargs
+        )
+
+    def read(
+        self, uris: list[str], categories: list[str] = None, **kwargs
+    ) -> Union[list[Document], Response]:
+        """
+        Read one or many documents via a GET to the endpoint defined at
+        https://docs.marklogic.com/REST/POST/v1/documents . If a 200 is not returned
+        by that endpoint, then the Response is returned instead.
+
+        :param uris: list of URIs to read.
+        :param categories: optional list of the categories of data to return for each
+        URI. By default, only content will be returned for each URI. See the endpoint
+        documentation for further information.
+        """
+        response = self._get_multipart_documents_response(uris, categories, **kwargs)
+        if response.status_code != 200:
+            return response
+
+        decoder = MultipartDecoder.from_response(response)
+
+        # Use a dict to store URIs to Document objects so that we don't assume any
+        # order with how the metadata and content parts are returned. An OrderedDict is
+        # used to ensure that the order of the URIs is maintained, though the REST
+        # endpoint is not guaranteed to return them in the same order as provided by
+        # the user.
+        docs = OrderedDict()
+
+        for part in decoder.parts:
+            header_values = _extract_values_from_header(part)
+            uri = header_values["uri"]
+            if header_values["category"] == "content":
+                content = (
+                    json.loads(part.content)
+                    if header_values["content_type"] == "application/json"
+                    else part.content
+                )
+                content_type = header_values["content_type"]
+                version_id = header_values["version_id"]
+                if docs.get(uri):
+                    doc: Document = docs[uri]
+                    doc.content = content
+                    doc.content_type = content_type
+                    doc.version_id = version_id
+                else:
+                    docs[uri] = Document(
+                        uri, content, content_type=content_type, version_id=version_id
+                    )
+            else:
+                doc = docs[uri] if docs.get(uri) else Document(uri, None)
+                docs[uri] = doc
+                dict_to_metadata(json.loads(part.content), doc)
+
+        return list(docs.values())
diff --git a/test-app/src/main/ml-config/security/users/python-not-rest-user.json b/test-app/src/main/ml-config/security/users/python-not-rest-user.json
new file mode 100644
index 0000000..71fba98
--- /dev/null
+++ b/test-app/src/main/ml-config/security/users/python-not-rest-user.json
@@ -0,0 +1,8 @@
+{
+    "user-name": "python-not-rest-user",
+    "description": "For tests where the user does not have the privileges required by the REST API.",
+    "password": "password",
+    "role": [
+        "qconsole-user"
+    ]
+}
\ No newline at end of file
diff --git a/test-app/src/main/ml-modules/transforms/envelope.sjs b/test-app/src/main/ml-modules/transforms/envelope.sjs
new file mode 100644
index 0000000..3b6f6be
--- /dev/null
+++ b/test-app/src/main/ml-modules/transforms/envelope.sjs
@@ -0,0 +1,6 @@
+function transform(context, params, content) {
+    return {
+        "envelope": content
+    }
+};
+exports.transform = transform;
diff --git a/tests/conftest.py b/tests/conftest.py
index 8e796b1..bec236e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,21 +1,29 @@
 import pytest
+
 from marklogic import Client
 
+BASE_URL = "http://localhost:8030"
+
 
 @pytest.fixture
 def client():
-    return Client("http://localhost:8030", digest=("python-test-user", "password"))
+    return Client(BASE_URL, digest=("python-test-user", "password"))
 
 
 @pytest.fixture
 def admin_client():
-    return Client("http://localhost:8030", digest=("python-test-admin", "password"))
+    return Client(BASE_URL, digest=("python-test-admin", "password"))
 
 
 @pytest.fixture
 def basic_client():
     # requests allows a tuple to be passed when doing basic authentication.
-    return Client("http://localhost:8030", auth=("python-test-user", "password"))
+    return Client(BASE_URL, auth=("python-test-user", "password"))
+
+
+@pytest.fixture
+def not_rest_user_client():
+    return Client(BASE_URL, digest=("python-not-rest-user", "password"))
 
 
 @pytest.fixture
diff --git a/tests/test_get_documents.py b/tests/test_get_documents.py
deleted file mode 100644
index 0848560..0000000
--- a/tests/test_get_documents.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from requests_toolbelt.multipart.decoder import MultipartDecoder
-
-
-def test_get_docs(client):
-    """
-    Possible future client interface:
-    array_of_documents = client.documents.get(uri=[], metadata=True)
-
-    Where each Document in the array would have fields of:
-    uri/content/collections/permissions/quality/properties/metadata_values.
-    """
-    response = client.get(
-        "/v1/documents",
-        params={
-            "uri": ["/doc1.json", "/doc2.xml"],
-            "category": ["content", "metadata"],
-            "format": "json",  # Applies only to metadata
-        },
-        headers={"Accept": "multipart/mixed"},
-    )
-
-    assert 200 == response.status_code
-
-    # Could provide a class for converting a multipart/mixed response into an array
-    # of documents too:
-    # from marklogic import DocumentDecoder
-    # array_of_documents = DocumentDecoder.from_response(response)
-    decoder = MultipartDecoder.from_response(response)
-    for part in decoder.parts:
-        print(part.headers)
-        print(part.text)
-
-
-def test_search_docs(client_with_props):
-    response = client_with_props.get(
-        "v1/search",
-        params={
-            "collection": "test-data",
-            "category": ["content", "metadata"],
-            "format": "json",  # Applies only to metadata
-        },
-        headers={"Accept": "multipart/mixed"},  # Indicates we want documents back.
-    )
-
-    for part in MultipartDecoder.from_response(response).parts:
-        print(part.headers)
-        print(part.text)
-
-
-def test_get_docs_basic_auth(basic_client):
-    # Just verifies that basic auth works as expected.
-    response = basic_client.get("/v1/documents", params={"uri": "/doc1.json"})
-    assert 200 == response.status_code
-    assert "world" == response.json()["hello"]
diff --git a/tests/test_read_documents.py b/tests/test_read_documents.py
new file mode 100644
index 0000000..a8b104c
--- /dev/null
+++ b/tests/test_read_documents.py
@@ -0,0 +1,122 @@
+from requests import Response
+
+from marklogic import Client
+from marklogic.documents import Document
+
+DEFAULT_PERMS = {"python-tester": ["read", "update"]}
+
+
+def test_write_and_read_binary(client: Client):
+    content = "MarkLogic and Python".encode("ascii")
+    response = client.documents.write(
+        [
+            Document(
+                "/temp/doc1.bin",
+                content,
+                permissions=DEFAULT_PERMS,
+            )
+        ]
+    )
+    assert 200 == response.status_code
+
+    docs = client.documents.read(["/temp/doc1.bin"])
+    assert len(docs) == 1
+    doc = docs[0]
+    assert doc.uri == "/temp/doc1.bin"
+    content = doc.content.decode("ascii")
+    assert content == "MarkLogic and Python"
+
+
+def test_read_uri_with_double_quotes(client: Client):
+    uri = '/this/"works.json'
+    response = client.documents.write(
+        [Document(uri, {"hello": "world"}, permissions=DEFAULT_PERMS)]
+    )
+    assert response.status_code == 200
+
+    docs = client.documents.read(["/this/%22works.json"])
+    assert len(docs) == 1
+    assert "/this/%22works.json" == docs[0].uri
+
+
+def test_uri_not_found(client: Client):
+    docs = client.documents.read(["/doesnt-exist.json"])
+    assert docs is not None
+    assert len(docs) == 0
+
+
+def test_read_with_transform(client: Client):
+    """
+    Verifies a user can pass in any kwargs and they will be retained as request
+    parameters, along with the ones added by the client.
+    """
+    docs = client.documents.read(
+        ["/doc1.json"],
+        categories=["content", "metadata"],
+        params={"transform": "envelope"},
+    )
+    assert 1 == len(docs)
+    assert docs[0].content == {"envelope": {"hello": "world"}}
+
+
+def test_read_only_collections(client: Client):
+    docs = client.documents.read(
+        ["/doc1.json", "/doc2.xml"], categories=["collections"]
+    )
+    assert 2 == len(docs)
+
+    doc1 = docs[0]
+    assert doc1.uri == "/doc1.json"
+    assert len(doc1.collections) == 1
+    assert doc1.collections[0] == "test-data"
+    assert doc1.content is None
+    assert doc1.permissions is None
+    assert doc1.quality is None
+    assert doc1.metadata_values is None
+    assert doc1.properties is None
+
+    doc2 = docs[1]
+    assert doc2.uri == "/doc2.xml"
+    assert len(doc2.collections) == 1
+    assert doc2.collections[0] == "test-data"
+    assert doc2.content is None
+    assert doc2.permissions is None
+    assert doc2.quality is None
+    assert doc2.metadata_values is None
+    assert doc2.properties is None
+
+
+def test_with_accept_header(client: Client):
+    """
+    Verifies that any Accept header provided by the user will be ignored, as it's
+    expected to be set to multipart/mixed by the client.
+    """
+    docs = client.documents.read(
+        ["/doc1.json"],
+        headers={"Accept": "something/invalid"},
+        categories=["content", "quality"],
+    )
+
+    assert len(docs) == 1
+    doc = docs[0]
+    assert doc.uri == "/doc1.json"
+    assert doc.content == {"hello": "world"}
+    assert doc.quality == 0
+    assert doc.collections is None
+
+
+def test_read_with_basic_client(basic_client: Client):
+    # Just verifies that basic auth works as expected.
+    doc = basic_client.documents.read(["/doc1.json"])[0]
+    assert {"hello": "world"} == doc.content
+
+
+def test_not_rest_user(not_rest_user_client: Client):
+    response: Response = not_rest_user_client.documents.read(
+        ["/doc1.json", "/doc2.xml"]
+    )
+    assert (
+        response.status_code == 403
+    ), """The user does not have the rest-reader privilege, so MarkLogic is expected
+    to return a 403. And the documents.read method is then expected to return the
+    Response so that the user has access to everything in it."""
diff --git a/tests/test_search.py b/tests/test_search.py
deleted file mode 100644
index 19316da..0000000
--- a/tests/test_search.py
+++ /dev/null
@@ -1,5 +0,0 @@
-def test_search(client):
-    response = client.get("v1/search")
-    assert 200 == response.status_code
-    assert "application/xml; charset=utf-8" == response.headers["Content-type"]
-    assert response.text.startswith("<search:response")
diff --git a/tests/test_write_documents.py b/tests/test_write_documents.py
index daaca0c..1b5e68c 100644
--- a/tests/test_write_documents.py
+++ b/tests/test_write_documents.py
@@ -20,10 +20,17 @@ def test_write_json(client: Client):
     data = response.json()
     assert len(data["documents"]) == 2
 
-    doc1 = client.get("v1/documents?uri=/temp/doc1.json").json()
-    assert 1 == doc1["doc"]
-    doc2 = client.get("v1/documents?uri=/temp/doc2.json").json()
-    assert 2 == doc2["doc"]
+    docs = client.documents.read(["/temp/doc1.json", "/temp/doc2.json"])
+
+    doc1 = next(doc for doc in docs if doc.uri == "/temp/doc1.json")
+    assert "application/json" == doc1.content_type
+    assert doc1.version_id is not None
+    assert {"doc": 1} == doc1.content
+
+    doc2 = next(doc for doc in docs if doc.uri == "/temp/doc2.json")
+    assert "application/json" == doc2.content_type
+    assert doc2.version_id is not None
+    assert {"doc": 2} == doc2.content
 
 
 def test_return_xml(client: Client):
@@ -221,9 +228,11 @@ def test_temporal_doc(client):
 
     # Verify that the temporal doc was written to the "custom1" collection. This will be
     # easier to do once we have support for reading documents and their metadata.
-    data = client.get("/v1/search?collection=custom1&format=json").json()
-    assert 1 == data["total"]
-    assert "/temp/doc1.json" == data["results"][0]["uri"]
+    doc = client.documents.read("/temp/doc1.json", categories=["collections"])[0]
+    assert len(doc.collections) == 3
+    assert "custom1" in doc.collections
+    assert "latest" in doc.collections
+    assert "temporal-collection" in doc.collections
 
 
 def test_metadata_no_content(client: Client):
diff --git a/tests/test_write_documents_with_metadata.py b/tests/test_write_documents_with_metadata.py
index 7e750aa..1bb6e3b 100644
--- a/tests/test_write_documents_with_metadata.py
+++ b/tests/test_write_documents_with_metadata.py
@@ -29,8 +29,11 @@ def test_all_metadata(client: Client):
     )
     assert 200 == response.status_code
 
-    metadata = _get_metadata(client, "/temp/doc1.json")
-    _verify_test_metadata_exists(metadata)
+    docs = client.documents.read(
+        ["/temp/doc1.json"], categories=["content", "metadata"]
+    )
+
+    _verify_test_metadata_exists(docs[0])
 
 
 def test_only_quality_and_permissions(client: Client):
@@ -50,11 +53,13 @@ def test_only_quality_and_permissions(client: Client):
 
     assert 200 == response.status_code
 
-    metadata = _get_metadata(client, "/temp/doc1.json")
-    assert 2 == metadata["quality"]
-    assert 0 == len(metadata["collections"])
-    assert 0 == len(metadata["properties"].keys())
-    assert 0 == len(metadata["metadataValues"].keys())
+    doc = client.documents.read(
+        ["/temp/doc1.json"], categories=["content", "metadata"]
+    )[0]
+    assert 2 == doc.quality
+    assert 0 == len(doc.collections)
+    assert 0 == len(doc.properties.keys())
+    assert 0 == len(doc.metadata_values.keys())
 
 
 def test_only_quality(client: Client):
@@ -88,7 +93,7 @@ def test_default_metadata(client: Client):
             Document(
                 "/temp/doc2.json",
                 {"doc": 2},
-                permissions={"python-tester": "update", "rest-extension-user": "read"}
+                permissions={"python-tester": "update", "rest-extension-user": "read"},
             ),
             DefaultMetadata(
                 permissions={"python-tester": "update", "qconsole-user": "read"}
@@ -100,74 +105,75 @@ def test_default_metadata(client: Client):
     assert 200 == response.status_code
 
     # doc1 should use the first set of default metadata
-    metadata = _get_metadata(client, "/temp/doc1.json")
-    _verify_test_metadata_exists(metadata)
-
-    # doc2 should use its own metadata
-    metadata = _get_metadata(client, "/temp/doc2.json")
-    assert 0 == metadata["quality"]
-    assert 0 == len(metadata["collections"])
-    assert 0 == len(metadata["properties"].keys())
-    assert 0 == len(metadata["metadataValues"].keys())
-    perms = metadata["permissions"]
-    assert 2 == len(perms)
-    perm = next(perm for perm in perms if perm["role-name"] == "python-tester")
-    assert 1 == len(perm["capabilities"])
-    assert "update" in perm["capabilities"]
-    perm = next(perm for perm in perms if perm["role-name"] == "rest-extension-user")
-    assert 1 == len(perm["capabilities"])
-    assert "read" in perm["capabilities"]
-
-    # doc3 should use the second set of default metadata
-    metadata = _get_metadata(client, "/temp/doc3.json")
-    assert 0 == metadata["quality"]
-    assert 0 == len(metadata["collections"])
-    assert 0 == len(metadata["properties"].keys())
-    assert 0 == len(metadata["metadataValues"].keys())
-    perms = metadata["permissions"]
-    assert 2 == len(perms)
-    perm = next(perm for perm in perms if perm["role-name"] == "python-tester")
-    assert 1 == len(perm["capabilities"])
-    assert "update" in perm["capabilities"]
-    perm = next(perm for perm in perms if perm["role-name"] == "qconsole-user")
-    assert 1 == len(perm["capabilities"])
-    assert "read" in perm["capabilities"]
-
-
-
-def _get_metadata(client: Client, uri: str):
-    return client.get(f"v1/documents?uri={uri}&category=metadata&format=json").json()
-
-
-def _verify_test_metadata_exists(metadata: dict):
+    docs = client.documents.read(
+        ["/temp/doc1.json", "/temp/doc2.json", "/temp/doc3.json"],
+        categories=["content", "metadata"],
+    )
+
+    doc1 = next(doc for doc in docs if doc.uri == "/temp/doc1.json")
+    doc2 = next(doc for doc in docs if doc.uri == "/temp/doc2.json")
+    doc3 = next(doc for doc in docs if doc.uri == "/temp/doc3.json")
+
+    _verify_test_metadata_exists(doc1)
+
+    # Verify doc2 uses its own metadata.
+    assert 0 == doc2.quality
+    assert 0 == len(doc2.collections)
+    assert 0 == len(doc2.properties.keys())
+    assert 0 == len(doc2.metadata_values.keys())
+    perms = doc2.permissions
+    assert 2 == len(perms.keys())
+    capabilities = perms["python-tester"]
+    assert 1 == len(capabilities)
+    assert "update" == capabilities[0]
+    capabilities = perms["rest-extension-user"]
+    assert 1 == len(capabilities)
+    assert "read" == capabilities[0]
+
+    # Verify doc3 uses the second set of default metadata.
+    assert 0 == doc3.quality
+    assert 0 == len(doc3.collections)
+    assert 0 == len(doc3.properties.keys())
+    assert 0 == len(doc3.metadata_values.keys())
+    perms = doc3.permissions
+    assert 2 == len(perms.keys())
+    capabilities = perms["python-tester"]
+    assert 1 == len(capabilities)
+    assert "update" == capabilities[0]
+    capabilities = perms["qconsole-user"]
+    assert 1 == len(capabilities)
+    assert "read" == capabilities[0]
+
+
+def _verify_test_metadata_exists(doc: Document):
     """
     Convenience function for verifying that document metadata contains the metadata
     defined by TEST_METADATA.
     """
-    perms = metadata["permissions"]
-    assert 2 == len(perms)
-    perm = next(perm for perm in perms if perm["role-name"] == "python-tester")
-    assert 2 == len(perm["capabilities"])
-    assert "read" in perm["capabilities"]
-    assert "update" in perm["capabilities"]
-    perm = next(perm for perm in perms if perm["role-name"] == "qconsole-user")
-    assert 1 == len(perm["capabilities"])
-    assert "execute" == perm["capabilities"][0]
-
-    collections = metadata["collections"]
+    perms = doc.permissions
+    assert 2 == len(perms.keys())
+    capabilities = perms["python-tester"]
+    assert 2 == len(capabilities)
+    assert "read" in capabilities
+    assert "update" in capabilities
+    capabilities = perms["qconsole-user"]
+    assert 1 == len(capabilities)
+    assert "execute" == capabilities[0]
+
+    collections = doc.collections
     assert 2 == len(collections)
     assert "c1" in collections
     assert "c2" in collections
 
-    props = metadata["properties"]
+    props = doc.properties
     assert 3 == len(props.keys())
     assert "world" == props["hello"]
     assert "<can>be embedded</can>" == props["xml"]
     assert 1 == props["number"]
 
-    assert 1 == metadata["quality"]
+    assert 1 == doc.quality
 
-    values = metadata["metadataValues"]
+    values = doc.metadata_values
     assert 2 == len(values.keys())
     assert "value1" == values["key1"]
     assert "value2" == values["key2"]