From 82bc07ba79da1763440efa807651fddb296d487a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20H=C3=B6ij?= Date: Sat, 13 Dec 2025 12:16:09 +0100 Subject: [PATCH 1/2] Support for set WOQL operators --- .gitignore | 1 + CONTRIBUTING.md | 53 ++++- .../test_woql_set_operations.py | 183 ++++++++++++++++++ terminusdb_client/woqlquery/woql_query.py | 129 ++++++++++++ 4 files changed, 359 insertions(+), 7 deletions(-) create mode 100644 terminusdb_client/tests/integration_tests/test_woql_set_operations.py diff --git a/.gitignore b/.gitignore index b93b4039..84145042 100755 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ terminusdb_client_coverage/ *~ venv/ +.venv/ # due to using tox and pytest .tox diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 745d3adf..f8062882 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,19 +4,58 @@ Thanks for interested to contribute to TerminusDB Client, to get started, fork t ## Setting up dev environment 💻 -Make sure you have Python>=3.9 installed. We use [pipenv](https://pipenv-fork.readthedocs.io/en/latest/) for dev environment, to install pipenv: +Make sure you have Python>=3.9 and <3.13 installed. -`pip3 install pipenv --upgrade` +[Fork and clone](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) this repo, then set up your development environment using one of the methods below. -[Fork and clone](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) this repo, then in your local repo: +### Option 1: Using venv (recommended) -`pipenv install --dev --pre` or `make init` +Create and activate a virtual environment: -To “editable” install the local Terminus Client Python: +```bash +# Create venv with Python 3.12 (or any version 3.9-3.12) +python3.12 -m venv .venv -`pip3 install -e .` +# Activate the virtual environment +source .venv/bin/activate # On macOS/Linux +# .venv\Scripts\activate # On Windows -**to be able to run integration tests, local installation of docker is required** +# Install the package in editable mode with dev dependencies +pip install -e ".[dev]" + +# Install pytest for running tests +pip install pytest +``` + +### Option 2: Using pipenv + +We also support [pipenv](https://pipenv-fork.readthedocs.io/en/latest/) for dev environment: + +```bash +pip install pipenv --upgrade +pipenv install --dev --pre +``` + +Or simply run `make init`. + +To "editable" install the local Terminus Client Python: + +`pip install -e .` + +### Running a local TerminusDB server + +**To run integration tests, you need either Docker or a local TerminusDB server.** + +For integration tests, you can either: + +1. **Use Docker** (automatic): Tests will automatically start a Docker container if no server is detected +2. **Use a local server**: Start the TerminusDB test server from the main terminusdb repository: + ```bash + cd /path/to/terminusdb + ./tests/terminusdb-test-server.sh start + ``` + +The test configuration will automatically detect and use an available server. We use [shed](https://pypi.org/project/shed/) to lint our code. Although you can do it manually by running `shed`, we highly recommend setting up the pre-commit hook to do the linting automatically. diff --git a/terminusdb_client/tests/integration_tests/test_woql_set_operations.py b/terminusdb_client/tests/integration_tests/test_woql_set_operations.py new file mode 100644 index 00000000..2aaba7cf --- /dev/null +++ b/terminusdb_client/tests/integration_tests/test_woql_set_operations.py @@ -0,0 +1,183 @@ +""" +Integration tests for WOQL set operations. + +These tests verify the new set operations: +- set_difference +- set_intersection +- set_union +- set_member +- list_to_set +""" +import time + +import pytest + +from terminusdb_client import Client +from terminusdb_client.woqlquery.woql_query import WOQLQuery + +test_user_agent = "terminusdb-client-python-tests" + + +def extract_values(result_list): + """Extract raw values from a list of typed literals.""" + if not result_list: + return [] + return [item["@value"] if isinstance(item, dict) and "@value" in item else item + for item in result_list] + + +class TestWOQLSetOperations: + """Tests for WOQL set operations.""" + + @pytest.fixture(autouse=True) + def setup_teardown(self, docker_url): + """Setup and teardown for each test.""" + self.client = Client(docker_url, user_agent=test_user_agent) + self.client.connect() + self.db_name = "test_woql_set_operations" + + # Create database for tests + if self.db_name in self.client.list_databases(): + self.client.delete_database(self.db_name) + self.client.create_database(self.db_name) + + yield + + # Cleanup + self.client.delete_database(self.db_name) + + def test_set_difference_basic(self): + """Test set_difference computes difference between two lists.""" + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:ListA", [1, 2, 3, 4]), + WOQLQuery().eq("v:ListB", [2, 4]), + WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff") + ) + + result = self.client.query(query) + assert len(result["bindings"]) == 1 + assert extract_values(result["bindings"][0]["Diff"]) == [1, 3] + + def test_set_difference_subset(self): + """Test set_difference returns empty when first list is subset.""" + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:ListA", [1, 2]), + WOQLQuery().eq("v:ListB", [1, 2, 3]), + WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff") + ) + + result = self.client.query(query) + assert len(result["bindings"]) == 1 + assert result["bindings"][0]["Diff"] == [] + + def test_set_difference_empty_list(self): + """Test set_difference handles empty lists.""" + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:ListA", []), + WOQLQuery().eq("v:ListB", [1]), + WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff") + ) + + result = self.client.query(query) + assert len(result["bindings"]) == 1 + assert result["bindings"][0]["Diff"] == [] + + def test_set_intersection_basic(self): + """Test set_intersection computes intersection of two lists.""" + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:ListA", [1, 2, 3]), + WOQLQuery().eq("v:ListB", [2, 3, 4]), + WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common") + ) + + result = self.client.query(query) + assert len(result["bindings"]) == 1 + assert extract_values(result["bindings"][0]["Common"]) == [2, 3] + + def test_set_intersection_no_common(self): + """Test set_intersection returns empty when no common elements.""" + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:ListA", [1, 2]), + WOQLQuery().eq("v:ListB", [3, 4]), + WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common") + ) + + result = self.client.query(query) + assert len(result["bindings"]) == 1 + assert result["bindings"][0]["Common"] == [] + + def test_set_union_basic(self): + """Test set_union computes union of two lists.""" + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:ListA", [1, 2]), + WOQLQuery().eq("v:ListB", [2, 3]), + WOQLQuery().set_union("v:ListA", "v:ListB", "v:All") + ) + + result = self.client.query(query) + assert len(result["bindings"]) == 1 + assert extract_values(result["bindings"][0]["All"]) == [1, 2, 3] + + def test_set_union_removes_duplicates(self): + """Test set_union removes duplicates.""" + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:ListA", [1, 1, 2]), + WOQLQuery().eq("v:ListB", [2, 2]), + WOQLQuery().set_union("v:ListA", "v:ListB", "v:All") + ) + + result = self.client.query(query) + assert len(result["bindings"]) == 1 + assert extract_values(result["bindings"][0]["All"]) == [1, 2] + + def test_set_member_success(self): + """Test set_member succeeds for element in set.""" + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:MySet", [1, 2, 3]), + WOQLQuery().set_member(2, "v:MySet") + ) + + result = self.client.query(query) + assert len(result["bindings"]) == 1 + + def test_set_member_failure(self): + """Test set_member fails for element not in set.""" + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:MySet", [1, 2, 3]), + WOQLQuery().set_member(5, "v:MySet") + ) + + result = self.client.query(query) + assert len(result["bindings"]) == 0 + + def test_list_to_set(self): + """Test list_to_set removes duplicates and sorts.""" + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:MyList", [3, 1, 2, 1]), + WOQLQuery().list_to_set("v:MyList", "v:MySet") + ) + + result = self.client.query(query) + assert len(result["bindings"]) == 1 + assert extract_values(result["bindings"][0]["MySet"]) == [1, 2, 3] + + def test_performance_large_sets(self): + """Test set operations handle large sets efficiently.""" + list_a = list(range(1000)) + list_b = list(range(500, 1500)) + + query = WOQLQuery().woql_and( + WOQLQuery().eq("v:ListA", list_a), + WOQLQuery().eq("v:ListB", list_b), + WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff") + ) + + start_time = time.time() + result = self.client.query(query) + elapsed = time.time() - start_time + + assert len(result["bindings"]) == 1 + assert len(result["bindings"][0]["Diff"]) == 500 + + # Should complete in under 1 second with O(n log n) algorithm + assert elapsed < 1.0 diff --git a/terminusdb_client/woqlquery/woql_query.py b/terminusdb_client/woqlquery/woql_query.py index d54ea363..588ed197 100644 --- a/terminusdb_client/woqlquery/woql_query.py +++ b/terminusdb_client/woqlquery/woql_query.py @@ -2426,6 +2426,135 @@ def member(self, member, mem_list): self._cursor["list"] = self._value_list(mem_list) return self + def set_difference(self, list_a, list_b, result): + """Computes the set difference between two lists (elements in list_a but not in list_b) + + Parameters + ---------- + list_a : str or list + First list or variable + list_b : str or list + Second list or variable + result : str + Variable to store the result + + Returns + ------- + WOQLQuery object + query object that can be chained and/or execute + """ + if list_a and list_a == "args": + return ["list_a", "list_b", "result"] + if self._cursor.get("@type"): + self._wrap_cursor_with_and() + self._cursor["@type"] = "SetDifference" + self._cursor["list_a"] = self._value_list(list_a) + self._cursor["list_b"] = self._value_list(list_b) + self._cursor["result"] = self._value_list(result) + return self + + def set_intersection(self, list_a, list_b, result): + """Computes the set intersection of two lists (elements in both list_a and list_b) + + Parameters + ---------- + list_a : str or list + First list or variable + list_b : str or list + Second list or variable + result : str + Variable to store the result + + Returns + ------- + WOQLQuery object + query object that can be chained and/or execute + """ + if list_a and list_a == "args": + return ["list_a", "list_b", "result"] + if self._cursor.get("@type"): + self._wrap_cursor_with_and() + self._cursor["@type"] = "SetIntersection" + self._cursor["list_a"] = self._value_list(list_a) + self._cursor["list_b"] = self._value_list(list_b) + self._cursor["result"] = self._value_list(result) + return self + + def set_union(self, list_a, list_b, result): + """Computes the set union of two lists (all unique elements from both lists) + + Parameters + ---------- + list_a : str or list + First list or variable + list_b : str or list + Second list or variable + result : str + Variable to store the result + + Returns + ------- + WOQLQuery object + query object that can be chained and/or execute + """ + if list_a and list_a == "args": + return ["list_a", "list_b", "result"] + if self._cursor.get("@type"): + self._wrap_cursor_with_and() + self._cursor["@type"] = "SetUnion" + self._cursor["list_a"] = self._value_list(list_a) + self._cursor["list_b"] = self._value_list(list_b) + self._cursor["result"] = self._value_list(result) + return self + + def set_member(self, element, set_list): + """Checks if an element is a member of a set (efficient O(log n) lookup) + + Parameters + ---------- + element : any + Element to check + set_list : str or list + Set (list) to check membership in + + Returns + ------- + WOQLQuery object + query object that can be chained and/or execute + """ + if element and element == "args": + return ["element", "set"] + if self._cursor.get("@type"): + self._wrap_cursor_with_and() + self._cursor["@type"] = "SetMember" + self._cursor["element"] = self._clean_object(element) + self._cursor["set"] = self._value_list(set_list) + return self + + def list_to_set(self, input_list, result_set): + """Converts a list to a set (removes duplicates and sorts) + + Parameters + ---------- + input_list : str or list + Input list or variable + result_set : str + Variable to store the resulting set + + Returns + ------- + WOQLQuery object + query object that can be chained and/or execute + """ + if input_list and input_list == "args": + return ["list", "set"] + if self._cursor.get("@type"): + self._wrap_cursor_with_and() + self._cursor["@type"] = "ListToSet" + self._cursor["list"] = self._value_list(input_list) + self._cursor["set"] = self._value_list(result_set) + return self + def concat(self, concat_list, result): """Concatenates the list of variables into a string and saves the result in v From 93986a719b223f881b8b7b610c470b06be2a14af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20H=C3=B6ij?= Date: Sat, 13 Dec 2025 12:29:39 +0100 Subject: [PATCH 2/2] Fix linting --- .../test_woql_set_operations.py | 65 ++++++++++--------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/terminusdb_client/tests/integration_tests/test_woql_set_operations.py b/terminusdb_client/tests/integration_tests/test_woql_set_operations.py index 2aaba7cf..8ed36528 100644 --- a/terminusdb_client/tests/integration_tests/test_woql_set_operations.py +++ b/terminusdb_client/tests/integration_tests/test_woql_set_operations.py @@ -8,6 +8,7 @@ - set_member - list_to_set """ + import time import pytest @@ -22,8 +23,10 @@ def extract_values(result_list): """Extract raw values from a list of typed literals.""" if not result_list: return [] - return [item["@value"] if isinstance(item, dict) and "@value" in item else item - for item in result_list] + return [ + item["@value"] if isinstance(item, dict) and "@value" in item else item + for item in result_list + ] class TestWOQLSetOperations: @@ -35,14 +38,14 @@ def setup_teardown(self, docker_url): self.client = Client(docker_url, user_agent=test_user_agent) self.client.connect() self.db_name = "test_woql_set_operations" - + # Create database for tests if self.db_name in self.client.list_databases(): self.client.delete_database(self.db_name) self.client.create_database(self.db_name) - + yield - + # Cleanup self.client.delete_database(self.db_name) @@ -51,9 +54,9 @@ def test_set_difference_basic(self): query = WOQLQuery().woql_and( WOQLQuery().eq("v:ListA", [1, 2, 3, 4]), WOQLQuery().eq("v:ListB", [2, 4]), - WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff") + WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"), ) - + result = self.client.query(query) assert len(result["bindings"]) == 1 assert extract_values(result["bindings"][0]["Diff"]) == [1, 3] @@ -63,9 +66,9 @@ def test_set_difference_subset(self): query = WOQLQuery().woql_and( WOQLQuery().eq("v:ListA", [1, 2]), WOQLQuery().eq("v:ListB", [1, 2, 3]), - WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff") + WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"), ) - + result = self.client.query(query) assert len(result["bindings"]) == 1 assert result["bindings"][0]["Diff"] == [] @@ -75,9 +78,9 @@ def test_set_difference_empty_list(self): query = WOQLQuery().woql_and( WOQLQuery().eq("v:ListA", []), WOQLQuery().eq("v:ListB", [1]), - WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff") + WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"), ) - + result = self.client.query(query) assert len(result["bindings"]) == 1 assert result["bindings"][0]["Diff"] == [] @@ -87,9 +90,9 @@ def test_set_intersection_basic(self): query = WOQLQuery().woql_and( WOQLQuery().eq("v:ListA", [1, 2, 3]), WOQLQuery().eq("v:ListB", [2, 3, 4]), - WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common") + WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common"), ) - + result = self.client.query(query) assert len(result["bindings"]) == 1 assert extract_values(result["bindings"][0]["Common"]) == [2, 3] @@ -99,9 +102,9 @@ def test_set_intersection_no_common(self): query = WOQLQuery().woql_and( WOQLQuery().eq("v:ListA", [1, 2]), WOQLQuery().eq("v:ListB", [3, 4]), - WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common") + WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common"), ) - + result = self.client.query(query) assert len(result["bindings"]) == 1 assert result["bindings"][0]["Common"] == [] @@ -111,9 +114,9 @@ def test_set_union_basic(self): query = WOQLQuery().woql_and( WOQLQuery().eq("v:ListA", [1, 2]), WOQLQuery().eq("v:ListB", [2, 3]), - WOQLQuery().set_union("v:ListA", "v:ListB", "v:All") + WOQLQuery().set_union("v:ListA", "v:ListB", "v:All"), ) - + result = self.client.query(query) assert len(result["bindings"]) == 1 assert extract_values(result["bindings"][0]["All"]) == [1, 2, 3] @@ -123,9 +126,9 @@ def test_set_union_removes_duplicates(self): query = WOQLQuery().woql_and( WOQLQuery().eq("v:ListA", [1, 1, 2]), WOQLQuery().eq("v:ListB", [2, 2]), - WOQLQuery().set_union("v:ListA", "v:ListB", "v:All") + WOQLQuery().set_union("v:ListA", "v:ListB", "v:All"), ) - + result = self.client.query(query) assert len(result["bindings"]) == 1 assert extract_values(result["bindings"][0]["All"]) == [1, 2] @@ -133,20 +136,18 @@ def test_set_union_removes_duplicates(self): def test_set_member_success(self): """Test set_member succeeds for element in set.""" query = WOQLQuery().woql_and( - WOQLQuery().eq("v:MySet", [1, 2, 3]), - WOQLQuery().set_member(2, "v:MySet") + WOQLQuery().eq("v:MySet", [1, 2, 3]), WOQLQuery().set_member(2, "v:MySet") ) - + result = self.client.query(query) assert len(result["bindings"]) == 1 def test_set_member_failure(self): """Test set_member fails for element not in set.""" query = WOQLQuery().woql_and( - WOQLQuery().eq("v:MySet", [1, 2, 3]), - WOQLQuery().set_member(5, "v:MySet") + WOQLQuery().eq("v:MySet", [1, 2, 3]), WOQLQuery().set_member(5, "v:MySet") ) - + result = self.client.query(query) assert len(result["bindings"]) == 0 @@ -154,9 +155,9 @@ def test_list_to_set(self): """Test list_to_set removes duplicates and sorts.""" query = WOQLQuery().woql_and( WOQLQuery().eq("v:MyList", [3, 1, 2, 1]), - WOQLQuery().list_to_set("v:MyList", "v:MySet") + WOQLQuery().list_to_set("v:MyList", "v:MySet"), ) - + result = self.client.query(query) assert len(result["bindings"]) == 1 assert extract_values(result["bindings"][0]["MySet"]) == [1, 2, 3] @@ -165,19 +166,19 @@ def test_performance_large_sets(self): """Test set operations handle large sets efficiently.""" list_a = list(range(1000)) list_b = list(range(500, 1500)) - + query = WOQLQuery().woql_and( WOQLQuery().eq("v:ListA", list_a), WOQLQuery().eq("v:ListB", list_b), - WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff") + WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"), ) - + start_time = time.time() result = self.client.query(query) elapsed = time.time() - start_time - + assert len(result["bindings"]) == 1 assert len(result["bindings"][0]["Diff"]) == 500 - + # Should complete in under 1 second with O(n log n) algorithm assert elapsed < 1.0