# Example Layouts

**Note, work in progress!**

This section provides useful layout specifications.

In [1]:
from h5rdmtoolbox import layout
from h5rdmtoolbox.database import hdfdb
import h5rdmtoolbox as h5tbx

## All string dataset must be one-dimensional

In [2]:
lay = layout.Layout()
string_dataset_1D = lay.add(
    hdfdb.FileDB.find,
    flt={'$ndim': {'$eq': 1},
         '$dtype': {'$regex': '^(?!S)*'}},
    recursive=True,
    objfilter='dataset',
    description='String dataset must be 1D',
    n={'$gt': 0}
)

In [3]:
with h5tbx.File() as h5:
    dss = h5.create_string_dataset('a string ds', data=['one', 'two', 'three'])
    dss = h5.create_string_dataset('grp/a string ds', data=['one', 'two', 'three'])
    dsn = h5.create_dataset('a', data=4)
    print(dsn.dtype)
    print(dss.dtype)
    hdf_filename = h5.hdf_filename

int32
|S5


In [4]:
res = lay.validate(hdf_filename)

In [5]:
res.print_summary(exclude_keys=('called', 'kwargs'))


Summary of layout validation
+--------------------------------------+--------+--------------------+---------------------------+---------------+---------------+-----------------------------------------+
| id                                   |   flag | flag description   | description               | target_type   | target_name   | func                                    |
|--------------------------------------+--------+--------------------+---------------------------+---------------+---------------+-----------------------------------------|
| 39344777-7032-4f34-9f1f-ad14989430f5 |      1 | SUCCESSFUL         | String dataset must be 1D | Group         | tmp0.hdf      | h5rdmtoolbox.database.hdfdb.filedb.find |
+--------------------------------------+--------+--------------------+---------------------------+---------------+---------------+-----------------------------------------+
--> Layout is valid


In [6]:
import re

In [7]:
re.search('^(?!S).*', 'M123')

<re.Match object; span=(0, 4), match='M123'>

## Defining RDF specifications

All dataset must have the attribute units, which shall be semantically described by `http://w3id.org/nfdi4ing/metadata4ing#hasUnit`

In [8]:
rdf_lay = layout.Layout()

In [9]:
is_dataset = rdf_lay.add(
    hdfdb.FileDB.find,
    flt={'units': {'$exists': True}},
    recursive=True,
    objfilter='dataset',
    description='String dataset must be 1D',
    n={'$gt': 0}
)

In [10]:
from h5rdmtoolbox.database import rdf_find

In [11]:
is_dataset = is_dataset.add(
    rdf_find,
    rdf_predicate="http://w3id.org/nfdi4ing/metadata4ing#hasUnit",
    n=1
)

Test the layout specification

In [16]:
with h5tbx.File() as h5:
    ds =h5.create_dataset('velocity', data=4.5, attrs={'units': 'm/s'})

In [17]:
rdf_lay.validate(h5.hdf_filename).is_valid()

2024-06-23_15:25:31,621 ERROR    [core.py:330] Applying spec. "LayoutSpecification(kwargs={'rdf_predicate': 'http://w3id.org/nfdi4ing/metadata4ing#hasUnit'})" failed due to not matching the number of results: 1 != 0


False

In [18]:
with h5tbx.File() as h5:
    ds =h5.create_dataset('velocity', data=4.5, attrs={'units': 'm/s'})
    ds.rdf['units'].predicate = "http://w3id.org/nfdi4ing/metadata4ing#hasUnit"

In [19]:
rdf_lay.validate(h5.hdf_filename).is_valid()

True