In [1]:
import json
from enum import Enum
from typing import Optional, List, Dict, Union, Literal
from pydantic import BaseModel, Field

"""
The JHTDB Data Model: pydantic classes.
"""
class Offset(BaseModel):
    name: str
    grid: List[float]
    coordinate: List[float]

class Feature(BaseModel):
    """
    general quadruple basically, used for listing the various featues (methods, operators etc) that are
    available on a Dataset.
    the code is to be used as identifier and references to a Feature in a certain collection.
    """
    code: str
    name: str
    id: Optional[str] = None
    description: Optional[str] = None

class Variable(Feature):
    component_codes: List[str]
    cardinality: int

class VariableOperatorMethod(BaseModel):
    """
    which spatial interpolation methods can be applied to the result of which operator applied to which variable.
    all represented by their codes pointing to the variables/operators/interpolation_methods in the database definition.
    """
    operator: str
    methods: List[str]

class PhysicalVariable(BaseModel):
    code: str
    name: str
    # whether the variable is stored for each grid point on disk.
    gridded: bool
    # staggered grid and coordinate offsets between dimensions.
    offsets: List[Offset]
    spatialOperatorMethods: List[VariableOperatorMethod]
    temporalMethods: List[str]

class TimeIndexShift(BaseModel):
    getcutout: int
    getdata: int

class Dimension(BaseModel):
    # physical lower bound on dimension.
    lower: str
    # physical upper bound on dimension. str because it may have "pi" included.
    upper: str
    # number of cells along the dimension.
    n: int
    # whether the dimension is stored as discrete steps.
    discrete: Optional[bool] = None
    # whether the dimension boundary is periodic.
    isPeriodic: Optional[bool] = False
    # shift applied to the user-specified time index.
    # differs based on query type and whether or not pchip time interpolation is allowed.
    timeIndexShift: Optional[TimeIndexShift] = None
    # grid spacing [dx, dy, dz].
    # irregular grid spacing dimensions are specified as a string the grid values are stored in python.
    spacing: Optional[Union[str, float]] = None
    # zarr chunk size.
    chunk: Optional[int] = None

class Simulation(BaseModel):
    tlims: Dimension
    xlims: Dimension
    ylims: Dimension
    zlims: Dimension

class Dataset(BaseModel):
    displayname: str
    name: str
    simulation: Simulation
    description: Optional[str] = None
    # list of codes of variables available in this dataset.
    physicalVariables: List[PhysicalVariable]

class TurbulenceDB(BaseModel):
    name: str
    description: Optional[str] = None
    variables: List[Variable]
    # operators are derived fields that can be extracted in addition to the original field.
    # examples are hessian, gradient, laplacian.
    spatial_operators: List[Feature]
    # different spatial interpolation methods can be applied for point queries.
    # examples are lag4, lag6, lag8, m2q8, fd4lag4, fd6noint.
    spatial_methods: List[Feature]
    # time interpolation methods.
    # examples are none, pchip.
    temporal_methods: List[Feature]
    datasets: List[Dataset]
    
"""
GL these classes can be used server side.
identify a config file describing the datasets and adds some information relevant for server side actions.
"""
class StorageDescriptor(BaseModel):
    storageType : Literal['TBD'] = 'TBD'
    
class LegacDBStorage(StorageDescriptor):
    storageType : Literal['LegacyDB'] = 'LegacyDB'
    turbinfoDatabaseURL: str
    
class CephZARRStorage(StorageDescriptor):
    storageType : Literal['ZARR'] = 'ZARR'
    cephParentDirectoryPath: str

class FileDBStorage(StorageDescriptor):
    storageType : Literal['FileDB'] = 'FileDB'
    filedbPickledMDFilePath: str

class DatasetStorageDescriptor(BaseModel):
    datasetName: str
    storageDescriptor: Union[LegacDBStorage, CephZARRStorage, FileDBStorage, StorageDescriptor] = Field(discriminator='storageType')

class JHTDBServerSide(BaseModel):
    jhtdbConfigFileURL: str
    datasets: List[DatasetStorageDescriptor]

"""
client side config.
config files that add application specific metadata to the datasets.    
"""
class CoordinateEnum(str, Enum):
    T = 't'
    X = 'x'
    Y = 'y'
    Z = 'z'

"""
cutout service config.
"""
class CutoutLimit(BaseModel):
    coordinate: CoordinateEnum
    lower: float
    upper: float
    default_lower: float
    default_upper: float
    
class DatasetCutout(BaseModel):
    datasetName: str
    cutout_variables: List[str]
    coordinate_lims: List[CutoutLimit]

class JHTDBCutout(BaseModel):
    jhtdbConfigFileURL: str
    datasets: List[DatasetCutout]
        
"""      
point queries config.
"""   
class CoordinateValue(BaseModel):
    coordinate: CoordinateEnum
    value: str  # can conain 'pi'

class DatasetDefaults(BaseModel):
    datasetName: str
    default_coordinates: List[CoordinateValue]
    
class JHTDBPointQuery(BaseModel):
    jhtdbConfigFileURL: str
    datasets: List[DatasetDefaults]
    
"""
generate TurbulenceDB schema file.
"""
schema = TurbulenceDB.model_json_schema()

# save to file.
with open('/home/idies/workspace/Storage/mschnau1/persistent/giverny/pydantic_json/jhtdb-schema.json', 'w') as f:
    json.dump(schema, f, indent = 2)
    
"""
generate JHTDB cutout schema file.
"""
schema = JHTDBCutout.model_json_schema()

# save to file.
with open('/home/idies/workspace/Storage/mschnau1/persistent/giverny/pydantic_json/jhtdb-cutout-schema.json', 'w') as f:
    json.dump(schema, f, indent = 2)
    
"""
generate JHTDB points schema file.
"""
schema = JHTDBPointQuery.model_json_schema()

# save to file.
with open('/home/idies/workspace/Storage/mschnau1/persistent/giverny/pydantic_json/jhtdb-points-schema.json', 'w') as f:
    json.dump(schema, f, indent = 2)

In [2]:
import json

# load your config file.
with open('/home/idies/workspace/Storage/mschnau1/persistent/giverny/pydantic_json/jhtdb-config.json', 'r') as f:
    config_data = json.load(f)

# validate it.
try:
    db = TurbulenceDB(**config_data)
    print("config is valid!")
except Exception as e:
    print(f"validation error: {e}")

config is valid!
