diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d69095ee..b54851d0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,3 +41,17 @@ jobs: working-directory: ./js - run: npm test working-directory: ./js + + python: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: rustup update + - uses: actions/setup-python@v2 + with: + python-version: 3.5 + - run: python -m pip install --upgrade pip maturin + - run: maturin develop + working-directory: ./python + - run: python -m unittest + working-directory: ./python/tests diff --git a/Cargo.toml b/Cargo.toml index 2a11dfba..128242e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ members = [ "js", "lib", + "python", "server", "testsuite", "wikibase" diff --git a/README.md b/README.md index edc7282a..2380fc8d 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ It is written in Rust. It is split into multiple parts: * The `lib` directory contains the database written as a Rust library. +* The `python` directory contains bindings to use Oxigraph in Python. See [its README](https://github.com/oxigraph/oxigraph/blob/master/python/README.md) for the Python bindings documentation. * The `js` directory contains bindings to use Oxigraph in JavaScript with the help of WebAssembly. See [its README](https://github.com/oxigraph/oxigraph/blob/master/js/README.md) for the JS bindings documentation. * The `server` directory contains a stand-alone binary of a web server implementing the [SPARQL 1.1 Protocol](https://www.w3.org/TR/sparql11-protocol/). * The `wikibase` directory contains a stand-alone binary of a web server able to synchronize with a [Wikibase instance](https://wikiba.se/). diff --git a/js/Cargo.toml b/js/Cargo.toml index bc83374b..48106693 100644 --- a/js/Cargo.toml +++ b/js/Cargo.toml @@ -3,7 +3,7 @@ name = "oxigraph_js" version = "0.0.2" authors = ["Tpt "] license = "MIT/Apache-2.0" -readme = "../README.md" +readme = "README.md" keywords = ["RDF", "N-Triples", "Turtle", "RDF/XML", "SPARQL"] repository = "https://github.com/oxigraph/oxigraph/tree/master/js" description = "JavaScript bindings of Oxigraph" diff --git a/js/README.md b/js/README.md index 95263f70..714c27ba 100644 --- a/js/README.md +++ b/js/README.md @@ -23,6 +23,21 @@ npm install oxigraph const oxigraph = require('oxigraph'); ``` +## Example + +Insert the triple ` "example"` and log the name of `` in SPARQL: +```js +const { MemoryStore } = require('oxigraph'); +const store = new MemoryStore(); +const dataFactory = store.dataFactory; +const ex = dataFactory.namedNode("http://example/"); +const schemaName = dataFactory.namedNode("http://schema.org/name"); +store.add(dataFactory.triple(ex, schemaName, dataFactory.literal("example"))); +for (binding of store.query("SELECT ?name WHERE { ?name }")) { + console.log(binding.get("name").value); +} +``` + ## API Oxigraph currently provides a simple JS API. @@ -140,21 +155,6 @@ Example of loading a Turtle file into the named graph ` <> .", "text/turtle", "http://example.com", store.dataFactory.namedNode("http://example.com/graph")); ``` -## Example - -Insert the triple ` "example"` and log the name of `` in SPARQL: -```js -const { MemoryStore } = require('oxigraph'); -const store = new MemoryStore(); -const dataFactory = store.dataFactory; -const ex = dataFactory.namedNode("http://example/"); -const schemaName = dataFactory.namedNode("http://schema.org/name"); -store.add(dataFactory.triple(ex, schemaName, dataFactory.literal("example"))); -for (binding of store.query("SELECT ?name WHERE { ?name }")) { - console.log(binding.get("name").value); -} -``` - ## How to contribute diff --git a/lib/src/sparql/json_results.rs b/lib/src/sparql/json_results.rs index c4041485..96d62a20 100644 --- a/lib/src/sparql/json_results.rs +++ b/lib/src/sparql/json_results.rs @@ -1,4 +1,4 @@ -//! Implementation of [SPARQL Query Results XML Format](https://www.w3.org/TR/sparql11-results-json/) +//! Implementation of [SPARQL Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) use crate::model::*; use crate::sparql::model::*; diff --git a/lib/src/store/rocksdb.rs b/lib/src/store/rocksdb.rs index b5632f99..a580b908 100644 --- a/lib/src/store/rocksdb.rs +++ b/lib/src/store/rocksdb.rs @@ -9,8 +9,8 @@ use rocksdb::*; use std::io::BufRead; use std::mem::take; use std::path::Path; -use std::str; use std::sync::Arc; +use std::{fmt, str}; /// Store based on the [RocksDB](https://rocksdb.org/) key-value database. /// It encodes a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and allows to query and update it using SPARQL. @@ -155,6 +155,21 @@ impl RocksDbStore { self.handle().contains(&quad) } + /// Returns the number of quads in the store + pub fn len(&self) -> usize { + self.db + .full_iterator_cf(self.handle().spog_cf, IteratorMode::Start) + .count() + } + + /// Returns if the store is empty + pub fn is_empty(&self) -> bool { + self.db + .full_iterator_cf(self.handle().spog_cf, IteratorMode::Start) + .next() + .is_none() + } + /// Executes a transaction. /// /// The transaction is executed if the given closure returns `Ok`. @@ -235,6 +250,15 @@ impl RocksDbStore { } } +impl fmt::Display for RocksDbStore { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self.quads_for_pattern(None, None, None, None) { + writeln!(f, "{}", t.map_err(|_| fmt::Error)?)?; + } + Ok(()) + } +} + impl StrLookup for RocksDbStore { fn get_str(&self, id: StrHash) -> Result> { Ok(self diff --git a/lib/src/store/sled.rs b/lib/src/store/sled.rs index 6c6a493f..8799531a 100644 --- a/lib/src/store/sled.rs +++ b/lib/src/store/sled.rs @@ -8,7 +8,7 @@ use crate::{DatasetSyntax, GraphSyntax, Result}; use sled::{Config, Iter, Tree}; use std::io::BufRead; use std::path::Path; -use std::str; +use std::{fmt, str}; /// Store based on the [Sled](https://sled.rs/) key-value database. /// It encodes a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) and allows to query and update it using SPARQL. @@ -138,6 +138,16 @@ impl SledStore { self.contains_encoded(&quad) } + /// Returns the number of quads in the store + pub fn len(&self) -> usize { + self.spog.len() + } + + /// Returns if the store is empty + pub fn is_empty(&self) -> bool { + self.spog.is_empty() + } + /// Loads a graph file (i.e. triples) into the store /// /// Warning: This functions saves the triples in batch. If the parsing fails in the middle of the file, @@ -394,6 +404,15 @@ impl SledStore { } } +impl fmt::Display for SledStore { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self.quads_for_pattern(None, None, None, None) { + writeln!(f, "{}", t.map_err(|_| fmt::Error)?)?; + } + Ok(()) + } +} + impl StrLookup for SledStore { fn get_str(&self, id: StrHash) -> Result> { Ok(self diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 00000000..df308c5e --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "oxigraph_python" +version = "0.1.0" +authors = ["Tpt "] +license = "MIT/Apache-2.0" +readme = "README.md" +keywords = ["RDF", "N-Triples", "Turtle", "RDF/XML", "SPARQL"] +repository = "https://github.com/oxigraph/oxigraph/tree/master/python" +description = """ +Python bindings of Oxigraph +""" +edition = "2018" + +[lib] +crate-type = ["cdylib"] +name = "oxigraph" + +[dependencies] +oxigraph = {path = "../lib", features=["sled"]} +pyo3 = {version="0.11", features = ["extension-module"]} + +[package.metadata.maturin] +classifier = [ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Rust", + "Topic :: Database :: Database Engines/Servers" +] \ No newline at end of file diff --git a/python/README.md b/python/README.md new file mode 100644 index 00000000..8996f170 --- /dev/null +++ b/python/README.md @@ -0,0 +1,233 @@ +Oxigraph for Python +=================== + +[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) + +This package provides a Python API on top of Oxigraph. + +Oxigraph is a work in progress graph database written in Rust implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. + +It offers two stores with [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/) capabilities. +One of the store is in-memory, and the other one is disk based. + +The store is also able to load RDF serialized in [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/). + +## Install + +To install the development version of Oxigraph you need first to install the build tool [Maturin](https://github.com/PyO3/maturin). +This could be done using the usual `pip install maturin`. + +Then you just need to run `maturin develop` to install Oxigraph in the current Python environment. + + +## Example + +Insert the triple ` "example"` and print the name of `` in SPARQL: +```python +from oxigraph import * + +store = MemoryStore() +ex = NamedNode('http://example/') +schemaName = NamedNode('http://schema.org/name') +store.add((ex, schemaName, Literal('example'))) +for binding in store.query('SELECT ?name WHERE { ?name }'): + print(binding['name'].value) +``` + +## API + +### Model + +Oxigraph provides python classes for the basic RDF model elements. + +#### `NamedNode` + +An RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri). +```python +from oxigraph import NamedNode + +assert NamedNode('http://example.com/foo').value == 'http://example.com/foo' +assert str(NamedNode('http://example.com/foo')) == '' +``` + +#### `BlankNode` + +An RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +```python +from oxigraph import BlankNode + +assert BlankNode('foo').value == 'foo' +assert str(BlankNode('foo')) == 'foo' +``` + +#### `Literal` + +An RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). +```python +from oxigraph import NamedNode, Literal + +assert Literal('foo').value == 'foo' +assert str(NamedNode('foo')) == '"foo"' + +assert Literal('foo', language='en').language == 'en' +assert str(NamedNode('foo', language='en')) == '"foo"@en' + +assert Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer')).datatype == 'http://www.w3.org/2001/XMLSchema#integer' +assert str(Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer'))) == '"foo"^^' +``` + +#### `DefaultGraph` + +The RDF [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). +```python +from oxigraph import DefaultGraph + +DefaultGraph() +``` + +### Stores + +Oxigraph provides two stores: + +* `MemoryStore` that stores the RDF quads in memory +* `SledStore` that stores the graph on disk using [Sled](https://github.com/spacejam/sled). + +Both stores provide a similar API. They encode an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). + +#### Constructor + +##### `MemoryStore` + +It could be constructed using: +```python +from oxigraph import MemoryStore + +store = MemoryStore() +``` + +##### `SledStore` + +The following code creates a store using the directory `foo/bar` for storage. +```python +from oxigraph import SledStore + +store = SledStore('foo/bar') +``` + +It is also possible to use a temporary directory that will be removed when the `SledStore` Python object is dropped: +```python +from oxigraph import SledStore + +store = SledStore() +``` + +#### `add` + +To add a quad in the store: +```python +s = NamedNode('http://example.com/subject') +p = NamedNode('http://example.com/predicate') +o = NamedNode('http://example.com/object') +g = NamedNode('http://example.com/graph') +store.add((s, p, o, g)) +``` + +If a triple is provided, it is added to the default graph i.e. `store.add((s, p, o, g))` is the same as `store.add((s, p, o, DefaultGraph()))` + +#### `remove` + +To remove a quad from the store: +```python +store.remove((s, p, o, g)) +``` + +#### `__contains__` + +Checks if a quad is in the store: +```python +assert (s, p, o, g) in store +``` + +#### `__len__` + +Returns the number of quads in the store: +```python +assert len(store) == 1 +``` + +#### `__iter__` + +Iterates on all quads in the store: +```python +assert list(iter(store)) == [(s, p, o, g)] +``` + +#### `match` + +Returns all the quads matching a given pattern using an iterator. + +Return all the quads with the subject `s`: +```python +assert list(store.match(s, None, None, None)) == [(s, p, o, g)] +``` + +Return all the quads in the default graph: +```python +assert list(store.match(s, None, None, DefaultGraph())) == [] +``` + +#### `query` + +Executes a [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/). + +The `ASK` queries return a boolean: +```python +assert store.query('ASK { ?s ?s ?s }') +``` + +The `SELECT` queries return an iterator of query solutions that could be indexed by variable name or position in the `SELECT` clause: +```python +solutions = list(store.query('SELECT ?s WHERE { ?s ?p ?o }')) +assert solutions[0][0] == s +assert solutions[0]['s'] == s +``` + +The `CONSTRUCT` and `DESCRIBE` queries return an iterator of query solutions that could be indexed by variable name or position in the `SELECT` clause: +```python +solutions = list(store.query('SELECT ?s WHERE { ?s ?p ?o }')) +assert solutions[0][0] == s +assert solutions[0]['s'] == s +``` + +### `load` + +Loads serialized RDF triples or quad into the store. +The method arguments are: +1. `data`: the serialized RDF triples or quads. +2. `mime_type`: the MIME type of the serialization. See below for the supported mime types. +3. `base_iri`: the base IRI used to resolve the relative IRIs in the serialization. +4. `to_named_graph`: for triple serialization formats, the name of the named graph the triple should be loaded to. + +The available formats are: +* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` +* [TriG](https://www.w3.org/TR/trig/): `application/trig` +* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` +* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` +* [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` + +Example of loading a Turtle file into the named graph `` with the base IRI `http://example.com`: +```python +store.load(' <> .', mime_type='text/turtle', base_iri="http://example.com", to_graph=NamedNode('http://example.com/graph')) +``` + + +## How to contribute + +The Oxigraph bindings are written in Rust using [PyO3](https://github.com/PyO3/pyo3). + +They are build using [Maturin](https://github.com/PyO3/maturin). +Maturin could be installed using the usual `pip install maturin`. +To install development version of Oxigraph just run `maturin develop`. + +The Python bindings tests are written in Python. +To run them use the usual `python -m unittest` in the `tests` directory. diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 00000000..548b3531 --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["maturin"] +build-backend = "maturin" \ No newline at end of file diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 00000000..5b8cd3b8 --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,31 @@ +#![deny( + future_incompatible, + nonstandard_style, + rust_2018_idioms, + trivial_casts, + trivial_numeric_casts, + unsafe_code, + unused_qualifications +)] + +mod memory_store; +mod model; +mod sled_store; +mod store_utils; + +use crate::memory_store::*; +use crate::model::*; +use crate::sled_store::*; +use pyo3::prelude::*; + +/// Oxigraph library +#[pymodule] +fn oxigraph(_py: Python<'_>, module: &PyModule) -> PyResult<()> { + module.add_class::()?; + module.add_class::()?; + module.add_class::()?; + module.add_class::()?; + module.add_class::()?; + module.add_class::()?; + Ok(()) +} diff --git a/python/src/memory_store.rs b/python/src/memory_store.rs new file mode 100644 index 00000000..7da3f0e3 --- /dev/null +++ b/python/src/memory_store.rs @@ -0,0 +1,168 @@ +use crate::model::*; +use crate::store_utils::*; +use oxigraph::model::*; +use oxigraph::sparql::QueryOptions; +use oxigraph::{DatasetSyntax, FileSyntax, GraphSyntax, MemoryStore}; +use pyo3::basic::CompareOp; +use pyo3::exceptions::{NotImplementedError, RuntimeError, ValueError}; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use pyo3::{PyIterProtocol, PyObjectProtocol, PySequenceProtocol}; +use std::io::Cursor; + +#[pyclass(name = MemoryStore)] +#[derive(Eq, PartialEq, Clone)] +pub struct PyMemoryStore { + inner: MemoryStore, +} + +#[pymethods] +impl PyMemoryStore { + #[new] + fn new() -> Self { + Self { + inner: MemoryStore::new(), + } + } + + fn add(&self, quad: &PyTuple) -> PyResult<()> { + self.inner.insert(extract_quad(quad)?); + Ok(()) + } + + fn remove(&self, quad: &PyTuple) -> PyResult<()> { + self.inner.remove(&extract_quad(quad)?); + Ok(()) + } + + fn r#match( + &self, + subject: &PyAny, + predicate: &PyAny, + object: &PyAny, + graph_name: Option<&PyAny>, + ) -> PyResult { + let (subject, predicate, object, graph_name) = + extract_quads_pattern(subject, predicate, object, graph_name)?; + Ok(QuadIter { + inner: Box::new(self.inner.quads_for_pattern( + subject.as_ref(), + predicate.as_ref(), + object.as_ref(), + graph_name.as_ref(), + )), + }) + } + + fn query(&self, query: &str, py: Python<'_>) -> PyResult { + let query = self + .inner + .prepare_query(query, QueryOptions::default()) + .map_err(|e| ParseError::py_err(e.to_string()))?; + let results = query + .exec() + .map_err(|e| RuntimeError::py_err(e.to_string()))?; + query_results_to_python(py, results, RuntimeError::py_err) + } + + #[args(data, mime_type, "*", base_iri = "\"\"", to_graph = "None")] + fn load( + &self, + data: &str, + mime_type: &str, + base_iri: &str, + to_graph: Option<&PyAny>, + ) -> PyResult<()> { + let to_graph_name = if let Some(graph_name) = to_graph { + Some(extract_graph_name(graph_name)?) + } else { + None + }; + let base_iri = if base_iri.is_empty() { + None + } else { + Some(base_iri) + }; + + if let Some(graph_syntax) = GraphSyntax::from_mime_type(mime_type) { + self.inner + .load_graph( + Cursor::new(data), + graph_syntax, + &to_graph_name.unwrap_or(GraphName::DefaultGraph), + base_iri, + ) + .map_err(|e| ParseError::py_err(e.to_string())) + } else if let Some(dataset_syntax) = DatasetSyntax::from_mime_type(mime_type) { + if to_graph_name.is_some() { + return Err(ValueError::py_err( + "The target graph name parameter is not available for dataset formats", + )); + } + self.inner + .load_dataset(Cursor::new(data), dataset_syntax, base_iri) + .map_err(|e| ParseError::py_err(e.to_string())) + } else { + Err(ValueError::py_err(format!( + "Not supported MIME type: {}", + mime_type + ))) + } + } +} + +#[pyproto] +impl PyObjectProtocol for PyMemoryStore { + fn __str__(&self) -> String { + self.inner.to_string() + } + + fn __richcmp__(&self, other: &PyCell, op: CompareOp) -> PyResult { + let other: &PyMemoryStore = &other.borrow(); + match op { + CompareOp::Eq => Ok(self == other), + CompareOp::Ne => Ok(self != other), + _ => Err(NotImplementedError::py_err("Ordering is not implemented")), + } + } + + fn __bool__(&self) -> bool { + !self.inner.is_empty() + } +} + +#[pyproto] +impl PySequenceProtocol for PyMemoryStore { + fn __len__(&self) -> usize { + self.inner.len() + } + + fn __contains__(&self, quad: &PyTuple) -> PyResult { + Ok(self.inner.contains(&extract_quad(quad)?)) + } +} + +#[pyproto] +impl PyIterProtocol for PyMemoryStore { + fn __iter__(slf: PyRef) -> QuadIter { + QuadIter { + inner: Box::new(slf.inner.quads_for_pattern(None, None, None, None)), + } + } +} + +#[pyclass(unsendable)] +pub struct QuadIter { + inner: Box>, +} + +#[pyproto] +impl PyIterProtocol for QuadIter { + fn __iter__(slf: PyRefMut) -> Py { + slf.into() + } + + fn __next__(mut slf: PyRefMut) -> Option<(PyObject, PyObject, PyObject, PyObject)> { + slf.inner.next().map(move |q| quad_to_python(slf.py(), q)) + } +} diff --git a/python/src/model.rs b/python/src/model.rs new file mode 100644 index 00000000..745b7506 --- /dev/null +++ b/python/src/model.rs @@ -0,0 +1,422 @@ +use oxigraph::model::*; +use pyo3::basic::CompareOp; +use pyo3::exceptions::{NotImplementedError, TypeError, ValueError}; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use pyo3::PyObjectProtocol; +use std::collections::hash_map::DefaultHasher; +use std::hash::Hash; +use std::hash::Hasher; + +#[pyclass(name = NamedNode)] +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct PyNamedNode { + inner: NamedNode, +} + +impl From for PyNamedNode { + fn from(inner: NamedNode) -> Self { + Self { inner } + } +} + +impl From for NamedNode { + fn from(node: PyNamedNode) -> Self { + node.inner + } +} + +impl From for NamedOrBlankNode { + fn from(node: PyNamedNode) -> Self { + node.inner.into() + } +} + +impl From for Term { + fn from(node: PyNamedNode) -> Self { + node.inner.into() + } +} + +impl From for GraphName { + fn from(node: PyNamedNode) -> Self { + node.inner.into() + } +} + +#[pymethods] +impl PyNamedNode { + #[new] + fn new(value: String) -> PyResult { + Ok(NamedNode::new(value) + .map_err(|e| ValueError::py_err(e.to_string()))? + .into()) + } + + #[getter] + fn value(&self) -> &str { + self.inner.as_str() + } +} + +#[pyproto] +impl PyObjectProtocol for PyNamedNode { + fn __str__(&self) -> String { + self.inner.to_string() + } + + fn __repr__(&self) -> String { + format!("", self.inner.as_str()) + } + + fn __hash__(&self) -> u64 { + hash(&self.inner) + } + + fn __richcmp__(&self, other: &PyCell, op: CompareOp) -> bool { + eq_ord_compare(self, &other.borrow(), op) + } +} + +#[pyclass(name = BlankNode)] +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct PyBlankNode { + inner: BlankNode, +} + +impl From for PyBlankNode { + fn from(inner: BlankNode) -> Self { + Self { inner } + } +} + +impl From for BlankNode { + fn from(node: PyBlankNode) -> Self { + node.inner + } +} + +impl From for NamedOrBlankNode { + fn from(node: PyBlankNode) -> Self { + node.inner.into() + } +} + +impl From for Term { + fn from(node: PyBlankNode) -> Self { + node.inner.into() + } +} + +impl From for GraphName { + fn from(node: PyBlankNode) -> Self { + node.inner.into() + } +} + +#[pymethods] +impl PyBlankNode { + #[new] + fn new(value: Option) -> PyResult { + Ok(if let Some(value) = value { + BlankNode::new(value).map_err(|e| ValueError::py_err(e.to_string()))? + } else { + BlankNode::default() + } + .into()) + } + + #[getter] + fn value(&self) -> &str { + self.inner.as_str() + } +} + +#[pyproto] +impl PyObjectProtocol for PyBlankNode { + fn __str__(&self) -> String { + self.inner.to_string() + } + + fn __repr__(&self) -> String { + format!("", self.inner.as_str()) + } + + fn __hash__(&self) -> u64 { + hash(&self.inner) + } + + fn __richcmp__(&self, other: &PyCell, op: CompareOp) -> PyResult { + eq_compare(self, &other.borrow(), op) + } +} + +#[pyclass(name = Literal)] +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct PyLiteral { + inner: Literal, +} + +impl From for PyLiteral { + fn from(inner: Literal) -> Self { + Self { inner } + } +} + +impl From for Literal { + fn from(literal: PyLiteral) -> Self { + literal.inner + } +} + +impl From for Term { + fn from(node: PyLiteral) -> Self { + node.inner.into() + } +} + +#[pymethods] +impl PyLiteral { + #[new] + #[args(value, "*", language = "None", datatype = "None")] + fn new( + value: String, + language: Option, + datatype: Option, + ) -> PyResult { + Ok(if let Some(language) = language { + if let Some(datatype) = datatype { + if datatype.value() != "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" { + return Err(ValueError::py_err( + "The literals with a language tag must use the rdf:langString datatype", + )); + } + } + Literal::new_language_tagged_literal(value, language) + .map_err(|e| ValueError::py_err(e.to_string()))? + } else if let Some(datatype) = datatype { + Literal::new_typed_literal(value, datatype) + } else { + Literal::new_simple_literal(value) + } + .into()) + } + + #[getter] + fn value(&self) -> &str { + self.inner.value() + } + + #[getter] + fn language(&self) -> Option<&str> { + self.inner.language() + } + + #[getter] + fn datatype(&self) -> PyNamedNode { + self.inner.datatype().clone().into() + } +} + +#[pyproto] +impl PyObjectProtocol for PyLiteral { + fn __str__(&self) -> String { + self.inner.to_string() + } + + fn __repr__(&self) -> String { + format!( + "", + self.inner.value(), + self.inner.language().unwrap_or(""), + self.inner.datatype().as_str() + ) + } + + fn __hash__(&self) -> u64 { + hash(&self.inner) + } + + fn __richcmp__(&self, other: &PyCell, op: CompareOp) -> PyResult { + eq_compare(self, &other.borrow(), op) + } +} + +#[pyclass(name = DefaultGraph)] +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub struct PyDefaultGraph {} + +impl From for GraphName { + fn from(_: PyDefaultGraph) -> Self { + GraphName::DefaultGraph + } +} + +#[pymethods] +impl PyDefaultGraph { + #[new] + fn new() -> Self { + PyDefaultGraph {} + } + + #[getter] + fn value(&self) -> &str { + "" + } +} + +#[pyproto] +impl PyObjectProtocol for PyDefaultGraph { + fn __str__(&self) -> &'p str { + "DEFAULT" + } + + fn __repr__(&self) -> &'p str { + "" + } + + fn __hash__(&self) -> u64 { + 0 + } + + fn __richcmp__(&self, other: &PyCell, op: CompareOp) -> PyResult { + eq_compare(self, &other.borrow(), op) + } +} + +pub fn extract_named_node(py: &PyAny) -> PyResult { + if let Ok(node) = py.downcast::>() { + Ok(node.borrow().clone().into()) + } else { + Err(TypeError::py_err(format!( + "{} is not a RDF named node", + py.get_type().name(), + ))) + } +} + +pub fn extract_named_or_blank_node(py: &PyAny) -> PyResult { + if let Ok(node) = py.downcast::>() { + Ok(node.borrow().clone().into()) + } else if let Ok(node) = py.downcast::>() { + Ok(node.borrow().clone().into()) + } else { + Err(TypeError::py_err(format!( + "{} is not a RDF named or blank node", + py.get_type().name(), + ))) + } +} + +pub fn named_or_blank_node_to_python(py: Python<'_>, node: NamedOrBlankNode) -> PyObject { + match node { + NamedOrBlankNode::NamedNode(node) => PyNamedNode::from(node).into_py(py), + NamedOrBlankNode::BlankNode(node) => PyBlankNode::from(node).into_py(py), + } +} + +pub fn extract_term(py: &PyAny) -> PyResult { + if let Ok(node) = py.downcast::>() { + Ok(node.borrow().clone().into()) + } else if let Ok(node) = py.downcast::>() { + Ok(node.borrow().clone().into()) + } else if let Ok(literal) = py.downcast::>() { + Ok(literal.borrow().clone().into()) + } else { + Err(TypeError::py_err(format!( + "{} is not a RDF named or blank node", + py.get_type().name(), + ))) + } +} + +pub fn term_to_python(py: Python<'_>, term: Term) -> PyObject { + match term { + Term::NamedNode(node) => PyNamedNode::from(node).into_py(py), + Term::BlankNode(node) => PyBlankNode::from(node).into_py(py), + Term::Literal(literal) => PyLiteral::from(literal).into_py(py), + } +} + +pub fn extract_graph_name(py: &PyAny) -> PyResult { + if let Ok(node) = py.downcast::>() { + Ok(node.borrow().clone().into()) + } else if let Ok(node) = py.downcast::>() { + Ok(node.borrow().clone().into()) + } else if let Ok(node) = py.downcast::>() { + Ok(node.borrow().clone().into()) + } else { + Err(TypeError::py_err(format!( + "{} is not a valid RDF graph name", + py.get_type().name(), + ))) + } +} + +pub fn graph_name_to_python(py: Python<'_>, name: GraphName) -> PyObject { + match name { + GraphName::NamedNode(node) => PyNamedNode::from(node).into_py(py), + GraphName::BlankNode(node) => PyBlankNode::from(node).into_py(py), + GraphName::DefaultGraph => PyDefaultGraph::new().into_py(py), + } +} + +pub fn triple_to_python(py: Python<'_>, triple: Triple) -> (PyObject, PyObject, PyObject) { + ( + named_or_blank_node_to_python(py, triple.subject), + PyNamedNode::from(triple.predicate).into_py(py), + term_to_python(py, triple.object), + ) +} + +pub fn extract_quad(tuple: &PyTuple) -> PyResult { + let len = tuple.len(); + if len != 3 && len != 4 { + return Err(TypeError::py_err( + "A quad should be tuple with 3 or 4 elements", + )); + } + Ok(Quad { + subject: extract_named_or_blank_node(tuple.get_item(0))?, + predicate: extract_named_node(tuple.get_item(1))?, + object: extract_term(tuple.get_item(2))?, + graph_name: if len == 4 { + extract_graph_name(tuple.get_item(3))? + } else { + GraphName::DefaultGraph + }, + }) +} + +pub fn quad_to_python(py: Python<'_>, quad: Quad) -> (PyObject, PyObject, PyObject, PyObject) { + ( + named_or_blank_node_to_python(py, quad.subject), + PyNamedNode::from(quad.predicate).into_py(py), + term_to_python(py, quad.object), + graph_name_to_python(py, quad.graph_name), + ) +} + +fn eq_compare(a: &T, b: &T, op: CompareOp) -> PyResult { + match op { + CompareOp::Eq => Ok(a == b), + CompareOp::Ne => Ok(a != b), + _ => Err(NotImplementedError::py_err("Ordering is not implemented")), + } +} + +fn eq_ord_compare(a: &T, b: &T, op: CompareOp) -> bool { + match op { + CompareOp::Lt => a < b, + CompareOp::Le => a <= b, + CompareOp::Eq => a == b, + CompareOp::Ne => a != b, + CompareOp::Gt => a > b, + CompareOp::Ge => a >= b, + } +} +fn hash(t: &impl Hash) -> u64 { + let mut s = DefaultHasher::new(); + t.hash(&mut s); + s.finish() +} diff --git a/python/src/sled_store.rs b/python/src/sled_store.rs new file mode 100644 index 00000000..b15fab2e --- /dev/null +++ b/python/src/sled_store.rs @@ -0,0 +1,177 @@ +use crate::model::*; +use crate::store_utils::*; +use oxigraph::model::*; +use oxigraph::sparql::QueryOptions; +use oxigraph::{DatasetSyntax, FileSyntax, GraphSyntax, Result, SledStore}; +use pyo3::create_exception; +use pyo3::exceptions::ValueError; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use pyo3::{PyIterProtocol, PyObjectProtocol, PySequenceProtocol}; +use std::io::Cursor; + +create_exception!(oxigraph, SledError, pyo3::exceptions::RuntimeError); + +#[pyclass(name = SledStore)] +#[derive(Clone)] +pub struct PySledStore { + inner: SledStore, +} + +#[pymethods] +impl PySledStore { + #[new] + fn new(path: Option<&str>) -> PyResult { + Ok(Self { + inner: if let Some(path) = path { + SledStore::open(path).map_err(|e| SledError::py_err(e.to_string()))? + } else { + SledStore::new().map_err(|e| SledError::py_err(e.to_string()))? + }, + }) + } + + fn add(&self, quad: &PyTuple) -> PyResult<()> { + self.inner + .insert(&extract_quad(quad)?) + .map_err(|e| SledError::py_err(e.to_string())) + } + + fn remove(&self, quad: &PyTuple) -> PyResult<()> { + self.inner + .remove(&extract_quad(quad)?) + .map_err(|e| SledError::py_err(e.to_string())) + } + + fn r#match( + &self, + subject: &PyAny, + predicate: &PyAny, + object: &PyAny, + graph_name: Option<&PyAny>, + ) -> PyResult { + let (subject, predicate, object, graph_name) = + extract_quads_pattern(subject, predicate, object, graph_name)?; + Ok(QuadIter { + inner: Box::new(self.inner.quads_for_pattern( + subject.as_ref(), + predicate.as_ref(), + object.as_ref(), + graph_name.as_ref(), + )), + }) + } + + fn query(&self, query: &str, py: Python<'_>) -> PyResult { + let query = self + .inner + .prepare_query(query, QueryOptions::default()) + .map_err(|e| ParseError::py_err(e.to_string()))?; + let results = query.exec().map_err(|e| SledError::py_err(e.to_string()))?; + query_results_to_python(py, results, SledError::py_err) + } + + #[args(data, mime_type, "*", base_iri = "\"\"", to_graph = "None")] + fn load( + &self, + data: &str, + mime_type: &str, + base_iri: &str, + to_graph: Option<&PyAny>, + ) -> PyResult<()> { + let to_graph_name = if let Some(graph_name) = to_graph { + Some(extract_graph_name(graph_name)?) + } else { + None + }; + let base_iri = if base_iri.is_empty() { + None + } else { + Some(base_iri) + }; + + if let Some(graph_syntax) = GraphSyntax::from_mime_type(mime_type) { + self.inner + .load_graph( + Cursor::new(data), + graph_syntax, + &to_graph_name.unwrap_or(GraphName::DefaultGraph), + base_iri, + ) + .map_err(|e| ParseError::py_err(e.to_string())) + } else if let Some(dataset_syntax) = DatasetSyntax::from_mime_type(mime_type) { + if to_graph_name.is_some() { + return Err(ValueError::py_err( + "The target graph name parameter is not available for dataset formats", + )); + } + self.inner + .load_dataset(Cursor::new(data), dataset_syntax, base_iri) + .map_err(|e| ParseError::py_err(e.to_string())) + } else { + Err(ValueError::py_err(format!( + "Not supported MIME type: {}", + mime_type + ))) + } + } +} + +#[pyproto] +impl PyObjectProtocol for PySledStore { + fn __str__(&self) -> String { + self.inner.to_string() + } + + fn __bool__(&self) -> bool { + !self.inner.is_empty() + } +} + +#[pyproto] +impl PySequenceProtocol for PySledStore { + fn __len__(&self) -> usize { + self.inner.len() + } + + fn __contains__(&self, quad: &PyTuple) -> PyResult { + self.inner + .contains(&extract_quad(quad)?) + .map_err(|e| SledError::py_err(e.to_string())) + } +} + +#[pyproto] +impl PyIterProtocol for PySledStore { + fn __iter__(slf: PyRef) -> QuadIter { + QuadIter { + inner: Box::new(slf.inner.quads_for_pattern(None, None, None, None)), + } + } +} + +#[pyclass(unsendable)] +pub struct QuadIter { + inner: Box>>, +} + +#[pyproto] +impl PyIterProtocol for QuadIter { + fn __iter__(slf: PyRefMut) -> Py { + slf.into() + } + + fn __next__( + mut slf: PyRefMut, + ) -> PyResult> { + slf.inner + .next() + .map(move |q| { + Ok(quad_to_python( + slf.py(), + q.map_err(|e| SledError::py_err(e.to_string()))?, + )) + }) + .transpose() + } +} diff --git a/python/src/store_utils.rs b/python/src/store_utils.rs new file mode 100644 index 00000000..f4520cfe --- /dev/null +++ b/python/src/store_utils.rs @@ -0,0 +1,136 @@ +use crate::model::*; +use oxigraph::model::*; +use oxigraph::sparql::{QueryResult, QuerySolution}; +use oxigraph::Result; +use pyo3::exceptions::TypeError; +use pyo3::prelude::*; +use pyo3::{create_exception, PyIterProtocol, PyMappingProtocol, PyNativeType}; +use std::vec::IntoIter; + +create_exception!(oxigraph, ParseError, pyo3::exceptions::Exception); + +pub fn extract_quads_pattern( + subject: &PyAny, + predicate: &PyAny, + object: &PyAny, + graph_name: Option<&PyAny>, +) -> PyResult<( + Option, + Option, + Option, + Option, +)> { + Ok(( + if subject.is_none() { + None + } else { + Some(extract_named_or_blank_node(subject)?) + }, + if predicate.is_none() { + None + } else { + Some(extract_named_node(predicate)?) + }, + if object.is_none() { + None + } else { + Some(extract_term(object)?) + }, + if let Some(graph_name) = graph_name { + if graph_name.is_none() { + None + } else { + Some(extract_graph_name(graph_name)?) + } + } else { + None + }, + )) +} + +pub fn query_results_to_python( + py: Python<'_>, + results: QueryResult<'_>, + error: impl Fn(String) -> PyErr, +) -> PyResult { + Ok(match results { + QueryResult::Solutions(solutions) => QuerySolutionIter { + inner: solutions + .collect::>>() + .map_err(|e| error(e.to_string()))? + .into_iter(), + } + .into_py(py), + QueryResult::Graph(triples) => TripleResultIter { + inner: triples + .collect::>>() + .map_err(|e| error(e.to_string()))? + .into_iter(), + } + .into_py(py), + QueryResult::Boolean(b) => b.into_py(py), + }) +} + +#[pyclass(unsendable)] +pub struct PyQuerySolution { + inner: QuerySolution, +} + +#[pyproto] +impl PyMappingProtocol for PyQuerySolution { + fn __len__(&self) -> usize { + self.inner.len() + } + + fn __getitem__(&self, input: &PyAny) -> PyResult> { + if let Ok(key) = usize::extract(input) { + Ok(self + .inner + .get(key) + .map(|term| term_to_python(input.py(), term.clone()))) + } else if let Ok(key) = <&str>::extract(input) { + Ok(self + .inner + .get(key) + .map(|term| term_to_python(input.py(), term.clone()))) + } else { + Err(TypeError::py_err(format!( + "{} is not an integer of a string", + input.get_type().name(), + ))) + } + } +} + +#[pyclass(unsendable)] +pub struct QuerySolutionIter { + inner: IntoIter, +} + +#[pyproto] +impl PyIterProtocol for QuerySolutionIter { + fn __iter__(slf: PyRefMut) -> Py { + slf.into() + } + + fn __next__(mut slf: PyRefMut) -> Option { + slf.inner.next().map(move |inner| PyQuerySolution { inner }) + } +} + +#[pyclass(unsendable)] +pub struct TripleResultIter { + inner: IntoIter, +} + +#[pyproto] +impl PyIterProtocol for TripleResultIter { + fn __iter__(slf: PyRefMut) -> Py { + slf.into() + } + + fn __next__(mut slf: PyRefMut) -> Option<(PyObject, PyObject, PyObject)> { + slf.inner.next().map(move |t| triple_to_python(slf.py(), t)) + } +} diff --git a/python/tests/test_model.py b/python/tests/test_model.py new file mode 100644 index 00000000..7b17ad77 --- /dev/null +++ b/python/tests/test_model.py @@ -0,0 +1,69 @@ +import unittest +from oxigraph import * + +XSD_STRING = NamedNode("http://www.w3.org/2001/XMLSchema#string") +XSD_INTEGER = NamedNode("http://www.w3.org/2001/XMLSchema#integer") +RDF_LANG_STRING = NamedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString") + + +class TestNamedNode(unittest.TestCase): + def test_constructor(self): + self.assertEqual(NamedNode("http://foo").value, "http://foo") + + def test_string(self): + self.assertEqual(str(NamedNode("http://foo")), "") + + def test_equal(self): + self.assertEqual(NamedNode("http://foo"), NamedNode("http://foo")) + self.assertNotEqual(NamedNode("http://foo"), NamedNode("http://bar")) + + +class TestBlankNode(unittest.TestCase): + def test_constructor(self): + self.assertEqual(BlankNode("foo").value, "foo") + self.assertNotEqual(BlankNode(), BlankNode()) + + def test_string(self): + self.assertEqual(str(BlankNode("foo")), "_:foo") + + def test_equal(self): + self.assertEqual(BlankNode("foo"), BlankNode("foo")) + self.assertNotEqual(BlankNode("foo"), BlankNode("bar")) + # TODO self.assertNotEqual(BlankNode('foo'), NamedNode('http://foo')) + # TODO self.assertNotEqual(NamedNode('http://foo'), BlankNode('foo')) + + +class TestLiteral(unittest.TestCase): + def test_constructor(self): + self.assertEqual(Literal("foo").value, "foo") + self.assertEqual(Literal("foo").datatype, XSD_STRING) + + self.assertEqual(Literal("foo", language="en").value, "foo") + self.assertEqual(Literal("foo", language="en").language, "en") + self.assertEqual(Literal("foo", language="en").datatype, RDF_LANG_STRING) + + self.assertEqual(Literal("foo", datatype=XSD_INTEGER).value, "foo") + self.assertEqual(Literal("foo", datatype=XSD_INTEGER).datatype, XSD_INTEGER) + + def test_string(self): + self.assertEqual(str(Literal("foo")), '"foo"') + self.assertEqual(str(Literal("foo", language="en")), '"foo"@en') + self.assertEqual( + str(Literal("foo", datatype=XSD_INTEGER)), + '"foo"^^', + ) + + def test_equals(self): + self.assertEqual(Literal("foo", datatype=XSD_STRING), Literal("foo")) + self.assertEqual( + Literal("foo", language="en", datatype=RDF_LANG_STRING), + Literal("foo", language="en"), + ) + # TODO self.assertNotEqual(NamedNode('http://foo'), Literal('foo')) + # TODO self.assertNotEqual(Literal('foo'), NamedNode('http://foo')) + # TODO self.assertNotEqual(BlankNode('foo'), Literal('foo')) + # TODO self.assertNotEqual(Literal('foo'), BlankNode('foo')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/test_store.py b/python/tests/test_store.py new file mode 100644 index 00000000..5901a628 --- /dev/null +++ b/python/tests/test_store.py @@ -0,0 +1,156 @@ +import unittest +from abc import ABC, abstractmethod + +from oxigraph import * + +foo = NamedNode("http://foo") +bar = NamedNode("http://bar") +baz = NamedNode("http://baz") +graph = NamedNode("http://graph") + + +class TestAbstractStore(unittest.TestCase, ABC): + @abstractmethod + def store(self): + pass + + def test_add(self): + store = self.store() + store.add((foo, bar, baz)) + store.add((foo, bar, baz, DefaultGraph())) + store.add((foo, bar, baz, graph)) + self.assertEqual(len(store), 2) + + def test_remove(self): + store = self.store() + store.add((foo, bar, baz)) + store.add((foo, bar, baz, DefaultGraph())) + store.add((foo, bar, baz, graph)) + store.remove((foo, bar, baz)) + self.assertEqual(len(store), 1) + + def test_len(self): + store = self.store() + store.add((foo, bar, baz)) + store.add((foo, bar, baz, graph)) + self.assertEqual(len(store), 2) + + def test_in(self): + store = self.store() + store.add((foo, bar, baz)) + store.add((foo, bar, baz, DefaultGraph())) + store.add((foo, bar, baz, graph)) + self.assertTrue((foo, bar, baz) in store) + self.assertTrue((foo, bar, baz, DefaultGraph()) in store) + self.assertTrue((foo, bar, baz, graph) in store) + self.assertTrue((foo, bar, baz, foo) not in store) + + def test_iter(self): + store = self.store() + store.add((foo, bar, baz, DefaultGraph())) + store.add((foo, bar, baz, graph)) + self.assertEqual( + list(store), [(foo, bar, baz, DefaultGraph()), (foo, bar, baz, graph)] + ) + + def test_match(self): + store = self.store() + store.add((foo, bar, baz, DefaultGraph())) + store.add((foo, bar, baz, graph)) + self.assertEqual( + list(store.match(None, None, None)), + [(foo, bar, baz, DefaultGraph()), (foo, bar, baz, graph)], + ) + self.assertEqual( + list(store.match(foo, None, None)), + [(foo, bar, baz, DefaultGraph()), (foo, bar, baz, graph)], + ) + self.assertEqual( + list(store.match(None, None, None, graph)), + [(foo, bar, baz, graph)], + ) + self.assertEqual( + list(store.match(foo, None, None, DefaultGraph())), + [(foo, bar, baz, DefaultGraph())], + ) + + def test_ask_query(self): + store = self.store() + store.add((foo, foo, foo)) + self.assertTrue(store.query("ASK { ?s ?s ?s }")) + self.assertFalse(store.query("ASK { FILTER(false) }")) + + def test_construct_query(self): + store = self.store() + store.add((foo, bar, baz)) + self.assertEqual( + list(store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }")), + [(foo, bar, baz)], + ) + + def test_select_query(self): + store = self.store() + store.add((foo, bar, baz)) + results = list(store.query("SELECT ?s WHERE { ?s ?p ?o }")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0][0], foo) + self.assertEqual(results[0]["s"], foo) + + def test_load_ntriples_to_default_graph(self): + store = self.store() + store.load( + " .", + mime_type="application/n-triples", + ) + self.assertEqual(list(store), [(foo, bar, baz, DefaultGraph())]) + + def test_load_ntriples_to_named_graph(self): + store = self.store() + store.load( + " .", + mime_type="application/n-triples", + to_graph=graph, + ) + self.assertEqual(list(store), [(foo, bar, baz, graph)]) + + def test_load_turtle_with_base_iri(self): + store = self.store() + store.load( + " <> .", + mime_type="text/turtle", + base_iri="http://baz", + ) + self.assertEqual(list(store), [(foo, bar, baz, DefaultGraph())]) + + def test_load_nquads(self): + store = self.store() + store.load( + " .", + mime_type="application/n-quads", + ) + self.assertEqual(list(store), [(foo, bar, baz, graph)]) + + def test_load_trig_with_base_iri(self): + store = self.store() + store.load( + " { <> . }", + mime_type="application/trig", + base_iri="http://baz", + ) + self.assertEqual(list(store), [(foo, bar, baz, graph)]) + + +class TestMemoryStore(TestAbstractStore): + def store(self): + return MemoryStore() + + +class TestSledStore(TestAbstractStore): + def store(self): + return SledStore() + + +del TestAbstractStore # We do not want to expose this class to the test runner + +if __name__ == "__main__": + unittest.main()