This Jupiter Notebook creates a parquet data base, creates and ontology and then constructs an OBDA mappint to SPARQL query the database

Using Ontop as OBDA mapping

In [4]:
# Step 1: Install Required Libraries
# %pip install pandas pyarrow rdflib SPARQLWrapper

In [1]:
# Step 2: Create the Parquet File

import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

# Define the schema for the Parquet file
schema = pa.schema([
    ('Name', pa.string()),
    ('Age', pa.int32()),
    ('City', pa.string())
])

# Sample data
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
}
df = pd.DataFrame(data)

# Create a PyArrow Table and write to a Parquet file
table = pa.Table.from_pandas(df, schema=schema)
parquet_file = 'my_Parquet-file.parquet'
pq.write_table(table, parquet_file)

# Read the Parquet file back into a Pandas DataFrame
table_read = pq.read_table(parquet_file)
df_read = table_read.to_pandas()

print("Data read from Parquet file:")
print(df_read)

Data read from Parquet file:
      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston


In [2]:
# Step 3: Create the Ontology

from rdflib import Graph, Namespace, Literal, URIRef

# Create a new RDF Graph
g = Graph()

# Define a namespace for our ontology
ns = Namespace("http://example.org/ontology/")

# Define classes and properties
g.add((ns.Person, ns.type, ns.Class))
g.add((ns.name, ns.type, ns.Property))
g.add((ns.age, ns.type, ns.Property))
g.add((ns.city, ns.type, ns.Property))

# Serialize the ontology to a file
g.serialize(destination='ontology.ttl', format='turtle')

<Graph identifier=N2119f5143a504a56a18f28474fc039cf (<class 'rdflib.graph.Graph'>)>

# Step 4: Create the Ontop Mapping

Now, we need to create an Ontop mapping file that maps the Parquet file's schema to the ontology. This is typically done using the Ontop CLI or a configuration file. For simplicity, we'll create a basic mapping file manually.

Create a file named mapping.obda with the following content:
[PrefixDeclaration]
:       http://example.org/ontology/
ex:     http://example.org/

[MappingDeclaration] @collection [[
mappingId   ~person_mapping
target      :Person/{Name} a :Person ; :name {Name} ; :age {Age} ; :city {City} .
source      SELECT Name, Age, City FROM my_Parquet-file.parquet
]]

In [5]:
# Step 5: Query the Parquet File Using SPARQL

from SPARQLWrapper import SPARQLWrapper, JSON

# Blazegraph endpoint can also write
blazegraph_url = "http://localhost:3838/blazegraph/namespace/kb/update"


# Define the SPARQL endpoint (assuming Ontop is running locally)
sparql = SPARQLWrapper(blazegraph_url)

# Define the SPARQL query
query = """
PREFIX : <http://example.org/ontology/>
SELECT ?name ?age ?city WHERE {
  ?person a :Person ;
          :name ?name ;
          :age ?age ;
          :city ?city .
}
"""

# Set the query and return format
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Execute the query and print the results
results = sparql.query().convert()

print("SPARQL Query Results:")
for result in results["results"]["bindings"]:
    print(result)

SPARQL Query Results:
