Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions sdata/sclass/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,27 @@ def to_dict(self) -> Dict[str, Any]:
result['data']['column_metadata'] = self.column_metadata.to_dict()
return result

def _ordered_columns(self):
"""Spalten-Attribute in echter df-Spaltenreihenfolge (nicht alphabetisch)."""
cols = [self.column_metadata.get(str(c)) for c in self.df.columns]
return [c for c in cols if c is not None]

def to_jsonld(self, context_mode="inline"):
"""JSON-LD der Metadaten inkl. Spalten-Metadaten (csvw:column)."""
from sdata import semantic
return semantic.to_jsonld(self.metadata, context_mode=context_mode,
columns=self._ordered_columns())

def to_turtle(self):
"""RDF/Turtle inkl. Spalten-Metadaten."""
from sdata import semantic
return semantic.rdf_from_doc(self.to_jsonld(), fmt="turtle")

def write_sidecar(self, path=None, indent=2):
"""Sidecar ``<sname>.meta.jsonld`` inkl. Spalten-Metadaten; gibt den Pfad zurück."""
from sdata import semantic
return semantic.write_sidecar_doc(self.to_jsonld(), path, self.sname, indent=indent)

@classmethod
def from_dict(cls, d: Dict[str, Any]) -> 'DataFrame':
"""
Expand Down
47 changes: 40 additions & 7 deletions sdata/semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
_rdflib = None

__all__ = [
"to_jsonld", "from_jsonld", "to_rdf", "to_turtle",
"write_sidecar", "read_sidecar", "to_verifiable_credential",
"verify_credential", "SIDECAR_SUFFIX",
"to_jsonld", "from_jsonld", "to_rdf", "to_turtle", "rdf_from_doc",
"column_node", "write_sidecar", "read_sidecar",
"to_verifiable_credential", "verify_credential", "SIDECAR_SUFFIX",
]

SIDECAR_SUFFIX = ".meta.jsonld"
Expand Down Expand Up @@ -106,8 +106,28 @@ def _attr_node(attr):
return node


def to_jsonld(metadata, context_mode="inline"):
"""Serialisiere ``metadata`` als JSON-LD-Dokument (dict)."""
def column_node(attr):
"""JSON-LD-Knoten (CSVW) für eine Tabellenspalte aus ``column_metadata``.

``attr.name`` = Spaltenname, ``attr.value`` = pandas-dtype-Name (z.B.
``float64``); optional ``unit``/``label``/``description``.
"""
node = {"name": attr.name, "datatype": vocab.xsd_for_dtype(attr.value)}
if attr.unit not in (None, "", "-"):
node.update(units.unit_node(attr.unit))
if attr.label:
node["label"] = attr.label
if attr.description:
node["description"] = attr.description
return node


def to_jsonld(metadata, context_mode="inline", columns=None):
"""Serialisiere ``metadata`` als JSON-LD-Dokument (dict).

:param columns: optionale, **geordnete** Iterable von Spalten-``Attribute``s
(DataFrame, in df-Spaltenreihenfolge); wird als ``csvw:column``-Liste ergänzt.
"""
doc = {"@context": vocab.build_context(mode=context_mode)}

sname = metadata.get("_sdata_sname")
Expand Down Expand Up @@ -142,6 +162,10 @@ def to_jsonld(metadata, context_mode="inline"):
# User-Attribute -> sdata:<name>
for attr in metadata.user_attributes.values():
doc[vocab.predicate_for(attr.name)] = _attr_node(attr)

# tabellarische Spalten (DataFrame) als geordnete csvw:column-Liste
if columns:
doc["columns"] = [column_node(a) for a in columns]
return doc


Expand Down Expand Up @@ -212,7 +236,11 @@ def to_rdf(metadata, fmt="turtle"):
(turtle/nt/xml/…) serialisiert. Ohne rdflib wird das JSON-LD selbst
zurückgegeben – ``application/ld+json`` ist bereits gültiges RDF.
"""
doc = to_jsonld(metadata)
return rdf_from_doc(to_jsonld(metadata), fmt=fmt)


def rdf_from_doc(doc, fmt="turtle"):
"""Serialisiere ein bereits gebautes JSON-LD-Dokument als RDF (siehe :func:`to_rdf`)."""
data = json.dumps(doc, default=dtypes.json_default)
if _rdflib is not None:
graph = _rdflib.Graph()
Expand Down Expand Up @@ -275,9 +303,14 @@ def write_sidecar(metadata, path=None, indent=2):
"""Schreibe ``<sname>.meta.jsonld`` (JSON-LD) neben einen Blob; gibt den Pfad zurück."""
sname_attr = metadata.get("_sdata_sname")
sname = sname_attr.value if sname_attr is not None and sname_attr.value else "metadata"
return write_sidecar_doc(to_jsonld(metadata), path, sname, indent=indent)


def write_sidecar_doc(doc, path, sname, indent=2):
"""Schreibe ein bereits gebautes JSON-LD-Dokument als ``<sname>.meta.jsonld``."""
target = _sidecar_path(path, sname)
with open(target, "w") as fh:
json.dump(to_jsonld(metadata), fh, indent=indent, default=dtypes.json_default)
json.dump(doc, fh, indent=indent, default=dtypes.json_default)
return target


Expand Down
4 changes: 4 additions & 0 deletions sdata/vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"xsd": "http://www.w3.org/2001/XMLSchema#",
"bfo": "http://purl.obolibrary.org/obo/",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"csvw": "http://www.w3.org/ns/csvw#",
"did": "https://www.w3.org/ns/did#",
}

Expand All @@ -54,6 +55,9 @@
"symbol": "qudt:symbol",
"label": "rdfs:label",
"required": {"@id": "sdata:required", "@type": "xsd:boolean"},
# tabellarische Spalten (CSVW)
"columns": {"@id": "csvw:column", "@container": "@list"},
"datatype": {"@id": "csvw:datatype", "@type": "@id"},
}

#: BFO-2.0-Klassenname -> OBO-CURIE (für ``_sdata_topology_class``).
Expand Down
60 changes: 60 additions & 0 deletions tests/test_semantic_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
"""Tests: DataFrame-Spalten-Metadaten (csvw:column) im JSON-LD/RDF/Sidecar."""
import json

import pandas as pd

from sdata.metadata import Metadata, Attribute
from sdata import semantic
from sdata.sclass.dataframe import DataFrame


def _sdf():
df = pd.DataFrame({"force": [1.0, 2.0], "n": [1, 2], "label": ["x", "y"]})
sdf = DataFrame(df=df, name="tab")
# eine Spalte semantisch anreichern
sdf.column_metadata.set_attr("force", unit="kN", label="Force",
description="axial force")
return sdf


def test_column_node_helper():
a = Attribute("force", "float64", unit="kN", label="Force", description="d")
node = semantic.column_node(a)
assert node == {"name": "force", "datatype": "xsd:double",
"unitRef": "unit:KiloN", "symbol": "kN",
"label": "Force", "description": "d"}
# ohne unit/label -> nur name + datatype
assert semantic.column_node(Attribute("n", "int64")) == {
"name": "n", "datatype": "xsd:integer"}


def test_dataframe_to_jsonld_columns():
doc = _sdf().to_jsonld()
cols = {c["name"]: c for c in doc["columns"]}
assert set(cols) == {"force", "n", "label"}
assert cols["force"]["datatype"] == "xsd:double"
assert cols["force"]["unitRef"] == "unit:KiloN" and cols["force"]["label"] == "Force"
assert cols["n"]["datatype"] == "xsd:integer"
assert cols["label"]["datatype"] == "xsd:string" # object-dtype
# @context kennt die CSVW-Terme
assert doc["@context"]["columns"]["@id"] == "csvw:column"


def test_to_jsonld_columns_none_and_empty():
m = Metadata()
m.add("x", 1)
assert "columns" not in semantic.to_jsonld(m) # columns=None
assert "columns" not in semantic.to_jsonld(m, columns=[]) # leer


def test_dataframe_turtle_and_sidecar(tmp_path):
sdf = _sdf()
# Turtle-Fallback (kein rdflib) = JSON-LD mit columns
ttl = sdf.to_turtle()
assert "columns" in json.loads(ttl)
# Sidecar enthält die Spalten
written = sdf.write_sidecar(str(tmp_path))
assert written.endswith(".meta.jsonld")
doc = json.loads(open(written).read())
assert [c["name"] for c in doc["columns"]] == ["force", "n", "label"]