# HDF5 and RDF - A linked-data approach

HDF5 can be described semantically. Here's how:

In [106]:
import h5rdmtoolbox as h5tbx

In [107]:
with h5tbx.File() as h5:
    h5.attrs['title'] = 'Test file'
    
    h5.rdf['title'].definition = 'Title describes the content of the file'

    h5.attrs['name', 'https://schema.org/name'] = 'name'
    h5.dump()

In [108]:
with h5tbx.File(mode='w') as h5:
    grp = h5.create_group('contact',
                          attrs=dict(orcid='https://orcid.org/0000-0001-8729-0482'))   
    grp.rdf.predicate = 'https://schema.org/author'
    grp.rdf.type = 'http://xmlns.com/foaf/0.1/Person'
    grp.rdf.subject = 'https://orcid.org/0000-0001-8729-0482'
    grp.rdf.predicate['orcid'] =  'http://w3id.org/nfdi4ing/metadata4ing#orcidId'
    grp.attrs['name', 'http://xmlns.com/foaf/0.1/firstName'] = 'Matthias'

    o = grp.rdf.predicate['orcid']
    
    h5.dump(collapsed=False)

hdf_filename = h5.hdf_filename

## JSON-LD extraction

In [109]:
from h5rdmtoolbox import jsonld

In [110]:
jstr = h5tbx.dump_jsonld(
    hdf_filename,
    indent=2,
    context={'m4i': 'http://w3id.org/nfdi4ing/metadata4ing#',
             'foaf': 'http://xmlns.com/foaf/0.1/'},
    structural=True
)

In [111]:
print(jstr)

{
  "@context": {
    "foaf": "http://xmlns.com/foaf/0.1/",
    "hdf5": "http://purl.allotrope.org/ontologies/hdf5/1.8#",
    "m4i": "http://w3id.org/nfdi4ing/metadata4ing#",
    "name": "http://xmlns.com/foaf/0.1/firstName",
    "orcid": "http://w3id.org/nfdi4ing/metadata4ing#orcidId"
  },
  "@graph": [
    {
      "@id": "_:N156",
      "@type": "hdf5:File",
      "hdf5:rootGroup": {
        "@id": "_:N155",
        "@type": "hdf5:Group",
        "hdf5:member": {
          "@id": "https://orcid.org/0000-0001-8729-0482",
          "@type": [
            "hdf5:Group",
            "foaf:Person"
          ],
          "hdf5:attribute": [
            {
              "@id": "_:N157",
              "@type": "hdf5:Attribute",
              "hdf5:name": "name",
              "hdf5:value": "Matthias"
            },
            {
              "@id": "_:N158",
              "@type": "hdf5:Attribute",
              "hdf5:name": "orcid",
              "hdf5:value": {
                "@type": "htt

## Find based on RDF triples:

In [112]:
with h5tbx.File('rdf_example.hdf') as h5:
    h5.dump()
    res = h5.rdf.find(rdf_type=FOAF.Person)

In [113]:
res

[<LGroup "/contact" in "rdf_example.hdf">]

In [114]:
print(
    h5tbx.dump_jsonld(
        hdf_filename,
        indent=2,
        context={'m4i': 'http://w3id.org/nfdi4ing/metadata4ing#',
                 'foaf': 'http://xmlns.com/foaf/0.1/'}
    )
)

{
  "@context": {
    "foaf": "http://xmlns.com/foaf/0.1/",
    "hdf5": "http://purl.allotrope.org/ontologies/hdf5/1.8#",
    "m4i": "http://w3id.org/nfdi4ing/metadata4ing#",
    "name": "http://xmlns.com/foaf/0.1/firstName",
    "orcid": "http://w3id.org/nfdi4ing/metadata4ing#orcidId"
  },
  "@graph": [
    {
      "@id": "_:N160",
      "@type": "hdf5:File",
      "hdf5:rootGroup": {
        "@id": "_:N159",
        "@type": "hdf5:Group",
        "hdf5:member": {
          "@id": "https://orcid.org/0000-0001-8729-0482",
          "@type": [
            "hdf5:Group",
            "foaf:Person"
          ],
          "hdf5:attribute": [
            {
              "@id": "_:N161",
              "@type": "hdf5:Attribute",
              "hdf5:name": "name",
              "hdf5:value": "Matthias"
            },
            {
              "@id": "_:N162",
              "@type": "hdf5:Attribute",
              "hdf5:name": "orcid",
              "hdf5:value": {
                "@type": "htt

In [115]:
with h5tbx.File('rdf_example.hdf') as h5:
    js = h5.dump_jsonld(context={'m4i': 'http://w3id.org/nfdi4ing/metadata4ing#', 'foaf': 'http://xmlns.com/foaf/0.1/'})

In [116]:
with open('rdf_example.json', mode='w') as f:
    f.write(js)

## Query with sparql

In [117]:
import rdflib.graph as g

graph = g.Graph()
graph.parse('rdf_example.json', format='json-ld')

<Graph identifier=Nc9bf0c1900b7490a83d77ec00b237c2a (<class 'rdflib.graph.Graph'>)>

In [118]:
res = graph.query("""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX m4i: <http://w3id.org/nfdi4ing/metadata4ing#>

SELECT ?id ?name
WHERE {
    ?id a foaf:Person .
    ?id foaf:firstName ?name .
    }
""")

In [119]:
for r in res:
    print(r)

(rdflib.term.URIRef('https://orcid.org/0000-0001-8729-0482'), rdflib.term.Literal('Matthias'))


In [120]:
res = graph.query("""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX hdf5: <http://purl.allotrope.org/ontologies/hdf5/1.8#>

SELECT ?id ?name
WHERE {
    ?id a hdf5:Group .
    ?id hdf5:name ?name .
    }
""")

In [121]:
for r in res:
    print(r)

(rdflib.term.BNode('N163'), rdflib.term.Literal('/'))
(rdflib.term.URIRef('https://orcid.org/0000-0001-8729-0482'), rdflib.term.Literal('/contact'))
(rdflib.term.BNode('N167'), rdflib.term.Literal('/grp'))
(rdflib.term.BNode('N171'), rdflib.term.Literal('/processing_info'))


In [122]:
res = graph.query("""
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX hdf5: <http://purl.allotrope.org/ontologies/hdf5/1.8#>

SELECT ?id ?name
WHERE {
    ?id a hdf5:Dataset .
    ?id hdf5:name ?name .
    }
""")

In [123]:
for r in res:
    print(r)

(rdflib.term.BNode('N168'), rdflib.term.Literal('/grp/random_velocity'))


## Visualization

In [124]:
import kglab

In [125]:
g, ctx = h5tbx.jsonld.get_rdflib_graph('rdf_example.hdf')

In [126]:
kg = kglab.KnowledgeGraph(
    import_graph=g,
    namespaces=ctx)

In [127]:
subgraph = kglab.SubgraphTensor(kg)
VIS_STYLE= {"hdf5": {"color": "red", "size": 30},
            "_": {"color": "black", "size": 20}}

In [128]:
pyvis_graph= subgraph.build_pyvis_graph(notebook=True, style=VIS_STYLE)
pyvis_graph.show('graph.html', notebook=True)

graph.html
