Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ terminusdb_client_coverage/
*~

venv/
.venv/

# due to using tox and pytest
.tox
Expand Down
53 changes: 46 additions & 7 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,58 @@ Thanks for interested to contribute to TerminusDB Client, to get started, fork t

## Setting up dev environment 💻

Make sure you have Python>=3.9 installed. We use [pipenv](https://pipenv-fork.readthedocs.io/en/latest/) for dev environment, to install pipenv:
Make sure you have Python>=3.9 and <3.13 installed.

`pip3 install pipenv --upgrade`
[Fork and clone](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) this repo, then set up your development environment using one of the methods below.

[Fork and clone](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) this repo, then in your local repo:
### Option 1: Using venv (recommended)

`pipenv install --dev --pre` or `make init`
Create and activate a virtual environment:

To “editable” install the local Terminus Client Python:
```bash
# Create venv with Python 3.12 (or any version 3.9-3.12)
python3.12 -m venv .venv

`pip3 install -e .`
# Activate the virtual environment
source .venv/bin/activate # On macOS/Linux
# .venv\Scripts\activate # On Windows

**to be able to run integration tests, local installation of docker is required**
# Install the package in editable mode with dev dependencies
pip install -e ".[dev]"

# Install pytest for running tests
pip install pytest
```

### Option 2: Using pipenv

We also support [pipenv](https://pipenv-fork.readthedocs.io/en/latest/) for dev environment:

```bash
pip install pipenv --upgrade
pipenv install --dev --pre
```

Or simply run `make init`.

To "editable" install the local Terminus Client Python:

`pip install -e .`

### Running a local TerminusDB server

**To run integration tests, you need either Docker or a local TerminusDB server.**

For integration tests, you can either:

1. **Use Docker** (automatic): Tests will automatically start a Docker container if no server is detected
2. **Use a local server**: Start the TerminusDB test server from the main terminusdb repository:
```bash
cd /path/to/terminusdb
./tests/terminusdb-test-server.sh start
```

The test configuration will automatically detect and use an available server.

We use [shed](https://pypi.org/project/shed/) to lint our code. Although you can do it manually by running `shed`, we highly recommend setting up the pre-commit hook to do the linting automatically.

Expand Down
184 changes: 184 additions & 0 deletions terminusdb_client/tests/integration_tests/test_woql_set_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""
Integration tests for WOQL set operations.

These tests verify the new set operations:
- set_difference
- set_intersection
- set_union
- set_member
- list_to_set
"""

import time

import pytest

from terminusdb_client import Client
from terminusdb_client.woqlquery.woql_query import WOQLQuery

test_user_agent = "terminusdb-client-python-tests"


def extract_values(result_list):
"""Extract raw values from a list of typed literals."""
if not result_list:
return []
return [
item["@value"] if isinstance(item, dict) and "@value" in item else item
for item in result_list
]


class TestWOQLSetOperations:
"""Tests for WOQL set operations."""

@pytest.fixture(autouse=True)
def setup_teardown(self, docker_url):
"""Setup and teardown for each test."""
self.client = Client(docker_url, user_agent=test_user_agent)
self.client.connect()
self.db_name = "test_woql_set_operations"

# Create database for tests
if self.db_name in self.client.list_databases():
self.client.delete_database(self.db_name)
self.client.create_database(self.db_name)

yield

# Cleanup
self.client.delete_database(self.db_name)

def test_set_difference_basic(self):
"""Test set_difference computes difference between two lists."""
query = WOQLQuery().woql_and(
WOQLQuery().eq("v:ListA", [1, 2, 3, 4]),
WOQLQuery().eq("v:ListB", [2, 4]),
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"),
)

result = self.client.query(query)
assert len(result["bindings"]) == 1
assert extract_values(result["bindings"][0]["Diff"]) == [1, 3]

def test_set_difference_subset(self):
"""Test set_difference returns empty when first list is subset."""
query = WOQLQuery().woql_and(
WOQLQuery().eq("v:ListA", [1, 2]),
WOQLQuery().eq("v:ListB", [1, 2, 3]),
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"),
)

result = self.client.query(query)
assert len(result["bindings"]) == 1
assert result["bindings"][0]["Diff"] == []

def test_set_difference_empty_list(self):
"""Test set_difference handles empty lists."""
query = WOQLQuery().woql_and(
WOQLQuery().eq("v:ListA", []),
WOQLQuery().eq("v:ListB", [1]),
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"),
)

result = self.client.query(query)
assert len(result["bindings"]) == 1
assert result["bindings"][0]["Diff"] == []

def test_set_intersection_basic(self):
"""Test set_intersection computes intersection of two lists."""
query = WOQLQuery().woql_and(
WOQLQuery().eq("v:ListA", [1, 2, 3]),
WOQLQuery().eq("v:ListB", [2, 3, 4]),
WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common"),
)

result = self.client.query(query)
assert len(result["bindings"]) == 1
assert extract_values(result["bindings"][0]["Common"]) == [2, 3]

def test_set_intersection_no_common(self):
"""Test set_intersection returns empty when no common elements."""
query = WOQLQuery().woql_and(
WOQLQuery().eq("v:ListA", [1, 2]),
WOQLQuery().eq("v:ListB", [3, 4]),
WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common"),
)

result = self.client.query(query)
assert len(result["bindings"]) == 1
assert result["bindings"][0]["Common"] == []

def test_set_union_basic(self):
"""Test set_union computes union of two lists."""
query = WOQLQuery().woql_and(
WOQLQuery().eq("v:ListA", [1, 2]),
WOQLQuery().eq("v:ListB", [2, 3]),
WOQLQuery().set_union("v:ListA", "v:ListB", "v:All"),
)

result = self.client.query(query)
assert len(result["bindings"]) == 1
assert extract_values(result["bindings"][0]["All"]) == [1, 2, 3]

def test_set_union_removes_duplicates(self):
"""Test set_union removes duplicates."""
query = WOQLQuery().woql_and(
WOQLQuery().eq("v:ListA", [1, 1, 2]),
WOQLQuery().eq("v:ListB", [2, 2]),
WOQLQuery().set_union("v:ListA", "v:ListB", "v:All"),
)

result = self.client.query(query)
assert len(result["bindings"]) == 1
assert extract_values(result["bindings"][0]["All"]) == [1, 2]

def test_set_member_success(self):
"""Test set_member succeeds for element in set."""
query = WOQLQuery().woql_and(
WOQLQuery().eq("v:MySet", [1, 2, 3]), WOQLQuery().set_member(2, "v:MySet")
)

result = self.client.query(query)
assert len(result["bindings"]) == 1

def test_set_member_failure(self):
"""Test set_member fails for element not in set."""
query = WOQLQuery().woql_and(
WOQLQuery().eq("v:MySet", [1, 2, 3]), WOQLQuery().set_member(5, "v:MySet")
)

result = self.client.query(query)
assert len(result["bindings"]) == 0

def test_list_to_set(self):
"""Test list_to_set removes duplicates and sorts."""
query = WOQLQuery().woql_and(
WOQLQuery().eq("v:MyList", [3, 1, 2, 1]),
WOQLQuery().list_to_set("v:MyList", "v:MySet"),
)

result = self.client.query(query)
assert len(result["bindings"]) == 1
assert extract_values(result["bindings"][0]["MySet"]) == [1, 2, 3]

def test_performance_large_sets(self):
"""Test set operations handle large sets efficiently."""
list_a = list(range(1000))
list_b = list(range(500, 1500))

query = WOQLQuery().woql_and(
WOQLQuery().eq("v:ListA", list_a),
WOQLQuery().eq("v:ListB", list_b),
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"),
)

start_time = time.time()
result = self.client.query(query)
elapsed = time.time() - start_time

assert len(result["bindings"]) == 1
assert len(result["bindings"][0]["Diff"]) == 500

# Should complete in under 1 second with O(n log n) algorithm
assert elapsed < 1.0
129 changes: 129 additions & 0 deletions terminusdb_client/woqlquery/woql_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2426,6 +2426,135 @@ def member(self, member, mem_list):
self._cursor["list"] = self._value_list(mem_list)
return self

def set_difference(self, list_a, list_b, result):
"""Computes the set difference between two lists (elements in list_a but not in list_b)

Parameters
----------
list_a : str or list
First list or variable
list_b : str or list
Second list or variable
result : str
Variable to store the result

Returns
-------
WOQLQuery object
query object that can be chained and/or execute
"""
if list_a and list_a == "args":
return ["list_a", "list_b", "result"]
if self._cursor.get("@type"):
self._wrap_cursor_with_and()
self._cursor["@type"] = "SetDifference"
self._cursor["list_a"] = self._value_list(list_a)
self._cursor["list_b"] = self._value_list(list_b)
self._cursor["result"] = self._value_list(result)
return self

def set_intersection(self, list_a, list_b, result):
"""Computes the set intersection of two lists (elements in both list_a and list_b)

Parameters
----------
list_a : str or list
First list or variable
list_b : str or list
Second list or variable
result : str
Variable to store the result

Returns
-------
WOQLQuery object
query object that can be chained and/or execute
"""
if list_a and list_a == "args":
return ["list_a", "list_b", "result"]
if self._cursor.get("@type"):
self._wrap_cursor_with_and()
self._cursor["@type"] = "SetIntersection"
self._cursor["list_a"] = self._value_list(list_a)
self._cursor["list_b"] = self._value_list(list_b)
self._cursor["result"] = self._value_list(result)
return self

def set_union(self, list_a, list_b, result):
"""Computes the set union of two lists (all unique elements from both lists)

Parameters
----------
list_a : str or list
First list or variable
list_b : str or list
Second list or variable
result : str
Variable to store the result

Returns
-------
WOQLQuery object
query object that can be chained and/or execute
"""
if list_a and list_a == "args":
return ["list_a", "list_b", "result"]
if self._cursor.get("@type"):
self._wrap_cursor_with_and()
self._cursor["@type"] = "SetUnion"
self._cursor["list_a"] = self._value_list(list_a)
self._cursor["list_b"] = self._value_list(list_b)
self._cursor["result"] = self._value_list(result)
return self

def set_member(self, element, set_list):
"""Checks if an element is a member of a set (efficient O(log n) lookup)

Parameters
----------
element : any
Element to check
set_list : str or list
Set (list) to check membership in

Returns
-------
WOQLQuery object
query object that can be chained and/or execute
"""
if element and element == "args":
return ["element", "set"]
if self._cursor.get("@type"):
self._wrap_cursor_with_and()
self._cursor["@type"] = "SetMember"
self._cursor["element"] = self._clean_object(element)
self._cursor["set"] = self._value_list(set_list)
return self

def list_to_set(self, input_list, result_set):
"""Converts a list to a set (removes duplicates and sorts)

Parameters
----------
input_list : str or list
Input list or variable
result_set : str
Variable to store the resulting set

Returns
-------
WOQLQuery object
query object that can be chained and/or execute
"""
if input_list and input_list == "args":
return ["list", "set"]
if self._cursor.get("@type"):
self._wrap_cursor_with_and()
self._cursor["@type"] = "ListToSet"
self._cursor["list"] = self._value_list(input_list)
self._cursor["set"] = self._value_list(result_set)
return self

def concat(self, concat_list, result):
"""Concatenates the list of variables into a string and saves the result in v

Expand Down