# metadata4Ing

Metadata4Ing is an ontology developed within the NFDI Consortium NFDI4Ing ["with the aim of providing a thorough framework for the semantic description of research data, with a particular focus on engineering sciences and neighbouring disciplines."](https://nfdi4ing.pages.rwth-aachen.de/metadata4ing/metadata4ing/)

Let's try to find a way to describe an engineering process, which lead to an HDF5 file within the HDF5 data model:

In [1]:
# import h5m4i
import h5rdmtoolbox as h5tbx
import h5py
import json
import pathlib
import pydantic
import sys
from ast import literal_eval
from typing import Dict, Union, List

Failed to import module h5tbx


In [2]:
def to_hdf(jsonld_filename, grp: h5py.Group) -> None:
    """Takes a .jsonld file and writes it into a HDF5 group"""
    if not isinstance(grp, h5py.Group):
        raise TypeError(f'Expecting h5py.Group, got {type(grp)}')

    if not isinstance(jsonld_filename, (str, pathlib.Path)):
        raise TypeError(f'Expecting str or pathlib.Path, got {type(jsonld_filename)}')

    def _to_hdf(_h5: h5py.Group, jdict: Dict):
        """Takes a .jsonld file and writes it into a HDF5 group"""
        for k, v in jdict.items():
            if isinstance(v, dict):
                if k == 'has parameter':
                    label = v.get('label', '@id')
                    _h5.attrs[k] = v['@id']
                    if v.get('has numerical value', None):
                        ds = _h5.create_dataset(label, data=literal_eval(v['has numerical value']), track_order=True)
                        for kk, vv in v.items():
                            if kk != 'has numerical value':
                                ds.attrs[kk] = vv
                    else:
                        grp = _h5.create_group(label, track_order=True)
                        _to_hdf(grp, v)
                else:
                    grp = _h5.create_group(k, track_order=True)
                    _to_hdf(grp, v)
            elif isinstance(v, list):
                list_grp = _h5.create_group(k, track_order=True)
                for i, item in enumerate(v):
                    # _h5[k] =
                    obj_name = item.get('@id', str(i))
                    if item.get('has numerical value', None):
                        obj = list_grp.create_dataset(obj_name, data=literal_eval(item['has numerical value']),
                                                track_order=True)
                        for kk, vv in item.items():
                            if kk != 'has numerical value':
                                obj.attrs[kk] = vv
                    else:
                        obj = list_grp.create_group(obj_name, track_order=True)
                    _to_hdf(obj, item)
            else:
                _h5.attrs[k] = v

    with open(jsonld_filename, 'r') as f:
        return _to_hdf(grp, json.load(f))

In [3]:
with h5tbx.File() as h5:
    h5.create_group('metadata')
    to_hdf('min_m4i_ex.jsonld', h5.metadata)
    h5.dump()

In [4]:
from pprint import pprint

In [5]:
with h5tbx.File() as h5:
    h5.create_group('metadata')
    to_hdf('min_m4i_ex.jsonld', h5.metadata)
    h5.dump()