From 8ff44774bf1f583c3577c181befd9753c2dd3634 Mon Sep 17 00:00:00 2001 From: Phil Barber Date: Fri, 22 Sep 2023 14:44:42 -0400 Subject: [PATCH] DEVEXP-574: Submit Optic plan with Python client --- .gitignore | 2 + marklogic/rows.py | 97 ++++++++++++++++++- .../ml-config/databases/modules-database.json | 3 - ...{test-server.json => rest-api-server.json} | 0 tests/test_query.py | 93 ++++++++++++++++++ 5 files changed, 188 insertions(+), 7 deletions(-) delete mode 100644 test-app/src/main/ml-config/databases/modules-database.json rename test-app/src/main/ml-config/servers/{test-server.json => rest-api-server.json} (100%) create mode 100644 tests/test_query.py diff --git a/.gitignore b/.gitignore index b1ee5a4..10d869e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ __pycache__ .venv venv .idea + +.DS_Store diff --git a/marklogic/rows.py b/marklogic/rows.py index da47252..1eca7e3 100644 --- a/marklogic/rows.py +++ b/marklogic/rows.py @@ -1,5 +1,5 @@ import json -from requests import Session +from requests import Session, Response """ Defines a RowManager class to simplify usage of the "/v1/rows" & "/v1/rows/graphql" REST @@ -11,10 +11,11 @@ class RowManager: """ Provides a method to simplify sending a GraphQL request to the GraphQL rows endpoint. """ + def __init__(self, session: Session): self._session = session - - def graphql(self, graphql_query, return_response=False, *args, **kwargs): + + def graphql(self, graphql_query: str, return_response: bool = False, *args, **kwargs): """ Send a GraphQL query to MarkLogic via a POST to the endpoint defined at https://docs.marklogic.com/REST/POST/v1/rows/graphql @@ -40,4 +41,92 @@ def graphql(self, graphql_query, return_response=False, *args, **kwargs): response.json() if response.status_code == 200 and not return_response else response - ) \ No newline at end of file + ) + + __accept_switch = { + "json": "application/json", + "xml": "application/xml", + "csv": "text/csv", + "json-seq": "application/json-seq", + "mixed": "application/xml, multipart/mixed" + } + + __query_format_switch = { + "json": lambda response: response.json(), + "xml": lambda response: response.text, + "csv": lambda response: response.text, + "json-seq": lambda response: response.text, + "mixed": lambda response: response + } + + def query(self, dsl: str = None, plan: dict = None, sql: str = None, sparql: str = None, format: str = "json", return_response: bool = False, *args, **kwargs): + """ + Send a query to MarkLogic via a POST to the endpoint defined at + https://docs.marklogic.com/REST/POST/v1/rows + Just like that endpoint, this function can be used for four different types of + queries: Optic DSL, Serialized Optic, SQL, and SPARQL. The type of query + processed by the function is dependent upon the parameter used in the call to + the function. + For more information about Optic and using the Optic DSL, SQL, and SPARQL, + see https://docs.marklogic.com/guide/app-dev/OpticAPI + If multiple query parameters are passed into the call, the function uses the + query parameter that is first in the list: dsl, plan, sql, sparql. + + :param dsl: an Optic DSL query + :param plan: a serialized Optic query + :param sql: an SQL query + :param sparql: a SPARQL query + :param return_response: boolean specifying if the entire original response + object should be returned (True) or if only the data should be returned (False) + upon a success (2xx) response. Note that if the status code of the response is + not 2xx, then the entire response is always returned. + """ + request_info = self.__get_request_info(dsl, plan, sql, sparql) + headers = kwargs.pop("headers", {}) + headers["Content-Type"] = request_info["content-type"] + headers["Accept"] = RowManager.__accept_switch.get(format) + response = self._session.post( + "v1/rows", + headers=headers, + data=request_info["data"], + **kwargs + ) + return ( + RowManager.__query_format_switch.get(format)(response) + if response.status_code == 200 and not return_response + else response + ) + + def __get_request_info(self, dsl: str, plan: dict, sql: str, sparql: str): + """ + Examine the parameters passed into the query function to determine what value + should be passed to the endpoint and what the content-type header should be. + + :param dsl: an Optic DSL query + :param plan: a serialized Optic query + :param sql: an SQL query + :param sparql: a SPARQL query + dict object returned contains the two values required to make the POST request. + """ + if dsl is not None: + return { + "content-type": "application/vnd.marklogic.querydsl+javascript", + "data": dsl + } + if plan is not None: + return { + "content-type": "application/json", + "data": plan + } + if sql is not None: + return { + "content-type": "application/sql", + "data": sql + } + if sparql is not None: + return { + "content-type": "application/sparql-query", + "data": sparql + } + else: + raise ValueError("No query found; must specify one of: dsl, plan, sql, or sparql") diff --git a/test-app/src/main/ml-config/databases/modules-database.json b/test-app/src/main/ml-config/databases/modules-database.json deleted file mode 100644 index f0112d1..0000000 --- a/test-app/src/main/ml-config/databases/modules-database.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "database-name": "%%MODULES_DATABASE%%" -} \ No newline at end of file diff --git a/test-app/src/main/ml-config/servers/test-server.json b/test-app/src/main/ml-config/servers/rest-api-server.json similarity index 100% rename from test-app/src/main/ml-config/servers/test-server.json rename to test-app/src/main/ml-config/servers/rest-api-server.json diff --git a/tests/test_query.py b/tests/test_query.py new file mode 100644 index 0000000..0a23827 --- /dev/null +++ b/tests/test_query.py @@ -0,0 +1,93 @@ +from pytest import raises + +dsl_query = 'op.fromView("test","musician").orderBy(op.col("lastName"))' +serialized_query = '{"$optic":{"ns":"op", "fn":"operators", "args":[{"ns":"op", "fn":"from-view", "args":["test", "musician"]}, {"ns":"op", "fn":"order-by", "args":[{"ns":"op", "fn":"col", "args":["lastName"]}]}]}}' +sql_query = 'select * from musician order by lastName' +sparql_query = 'PREFIX musician: SELECT * WHERE {?s musician:lastName ?lastName} ORDER BY ?lastName' + + +def test_dsl_default(client): + data = client.rows.query(dsl_query) + verify_four_musicians_are_returned_in_json(data, "test.musician.lastName") + + +def test_dsl_default_return_response(client): + response = client.rows.query(dsl_query, return_response=True) + assert 200 == response.status_code + verify_four_musicians_are_returned_in_json(response.json(), "test.musician.lastName") + + +def test_query_bad_user(not_rest_user_client): + response = not_rest_user_client.rows.query(dsl_query) + assert 403 == response.status_code + + +def test_dsl_json(client): + data = client.rows.query(dsl_query, format="json") + verify_four_musicians_are_returned_in_json(data, "test.musician.lastName") + + +def test_dsl_xml(client): + data = client.rows.query(dsl_query, format="xml") + verify_four_musicians_are_returned_in_xml_string(data) + +def test_dsl_csv(client): + data = client.rows.query(dsl_query, format="csv") + verify_four_musicians_are_returned_in_csv(data) + +def test_dsl_json_seq(client): + data = client.rows.query(dsl_query, format="json-seq") + verify_four_musicians_are_returned_in_json_seq(data) + +def test_dsl_mixed(client): + response = client.rows.query(dsl_query, format="mixed") + verify_four_musicians_are_returned_in_json(response.json(), "test.musician.lastName") + + +def test_serialized_default(client): + data = client.rows.query(plan=serialized_query) + verify_four_musicians_are_returned_in_json(data, "test.musician.lastName") + + +def test_sql_default(client): + data = client.rows.query(sql=sql_query) + verify_four_musicians_are_returned_in_json(data, "test.musician.lastName") + + +def test_sparql_default(client): + data = client.rows.query(sparql=sparql_query) + verify_four_musicians_are_returned_in_json(data, "lastName") + + +def test_no_query_parameter_provided(client): + with raises(ValueError, match="No query found; must specify one of: dsl, plan, sql, or sparql"): + client.rows.query() + + +def verify_four_musicians_are_returned_in_json(data, column_name): + assert type(data) is dict + assert 4 == len(data["rows"]) + for index, musician in enumerate(["Armstrong", "Byron", "Coltrane", "Davis"]): + assert {'type': 'xs:string', 'value': musician} == data["rows"][index][column_name] + + +def verify_four_musicians_are_returned_in_xml_string(data): + assert type(data) is str + assert 4 == data.count('lastName" type="xs:string">') + for musician in ["Armstrong", "Byron", "Coltrane", "Davis"]: + assert 'lastName" type="xs:string">' + musician in data + + +def verify_four_musicians_are_returned_in_csv(data): + assert type(data) is str + assert 5 == len(data.split("\n")) + for musician in ['Armstrong,Louis,1901-08-04', 'Byron,Don,1958-11-08', 'Coltrane,John,1926-09-23', 'Davis,Miles,1926-05-26']: + assert musician in data + + +def verify_four_musicians_are_returned_in_json_seq(data): + assert type(data) is str + rows = data.split("\n") + assert 6 == len(rows) + for musician in ["Armstrong", "Byron", "Coltrane", "Davis"]: + assert 'lastName":{"type":"xs:string","value":"' + musician in data