# Generate NeXus File using NXem structure
This notebook shows how to generate a `.nxs` file using a structure tailored for NOMAD-compatible NXem entries.
It includes:
- User and sample metadata
- Instrument and detector information
- Multiple SEM image events with plottable NXdata


In [1]:
import h5py
import json
import numpy as np
import tifffile as tf

In [2]:
# Load JSON and image
def load_json(path):
    with open(path, 'r') as f:
        return json.load(f)

def load_image_array(image_path):
    return tf.imread(image_path)

In [None]:
# === Define file paths ===
experiment_json = "path\\to\\sample_name.json"
output_file = "path\\to\\file_name.nxs"

# === Define directories ===
output_dir = "path\\to\\NeXus_files_folder"
images_dir = "path\\to\\images_folder"
json_dir = "path\\to\\json_folder"

# Re-import tifffile to ensure it is available
import tifffile as tf

# === Load data ===
meta = load_json(experiment_json)
experiment_meta = meta["experiment"]
sample_meta = meta["sample"]

In [7]:
from pathlib import Path

images_dir = Path(images_dir)
json_dir = Path(json_dir)

if images_dir.exists() and json_dir.exists():
    with h5py.File(output_file, "w") as f:
        entry = f.create_group("entry")
        entry.attrs["NX_class"] = "NXentry"
        entry.attrs["default"] = "/entry/measurement/events/event_1/image_1/image_2d"
        entry.create_dataset("definition", data="NXem")
        entry["definition"].attrs["version"] = "v2024.02"
        entry["definition"].attrs["URL"] = "https://github.com/FAIRmat-NFDI/nexus_definitions/blob/a85e10cd0289f4e44b0fec011ff54703e6705383/contributed_definitions/NXem.nxdl.xml"

        entry.create_dataset("experiment_identifier", data=experiment_meta["experiment_id"])
        entry.create_dataset("start_time", data=experiment_meta["date"] + "T" + experiment_meta["start_time"])
        entry.create_dataset("end_time", data=experiment_meta["date"] + "T" + experiment_meta["end_time"])

        user = entry.create_group("user")
        user.attrs["NX_class"] = "NXuser"
        user.create_dataset("name", data=experiment_meta["operator"]["name"])
        user.create_dataset("affiliation", data=experiment_meta["operator"]["affiliation"])
        user.create_dataset("address", data=experiment_meta["operator"]["address"])
        user.create_dataset("email", data=experiment_meta["operator"]["email"])

        sample = entry.create_group("sample")
        sample.attrs["NX_class"] = "NXsample"
        sample.create_dataset("is_simulation", data=False)
        sample.create_dataset("physical_form", data="powder")
        sample.create_dataset("identifier_sample", data=meta["sample"]["identifier"])
        sample.create_dataset("preparation_date", data=meta["sample"]["date"])
        sample.create_dataset("name", data=meta["sample"]["name"])
        sample.create_dataset("atom_types", data=meta["sample"]["atom_types"])

        #This group needs further investigation on how to fill it, for now is defaulted to origin sample, cartesian, right handed.
        #If the group is changed to recomended, then the NXcoordinate_system will not be included in the output file.
        coord = entry.create_group("coordinate_system")
        coord.attrs["NX_class"] = "NXcoordinate_system"
        coord.create_dataset("type", data="cartesian")
        coord.create_dataset("handedness", data="right_handed")
        coord.create_dataset("origin", data="sample")

        meas = entry.create_group("measurement")
        meas.attrs["NX_class"] = "NXobject"
        instr = meas.create_group("instrument")
        instr.attrs["NX_class"] = "NXinstrument_em"
        instr.create_dataset("name", data=experiment_meta["instrument"]["name"])
        instr.create_dataset("type", data="sem")
        instr.create_dataset("location", data="CNR-IMM Catania")

        fabr = instr.create_group("fabrication")
        fabr.attrs["NX_class"] = "NXfabrication"
        fabr.create_dataset("model", data=experiment_meta["instrument"]["model"])
        fabr.create_dataset("vendor", data=experiment_meta["instrument"]["vendor"])
        fabr.create_dataset("manufacturer", data=experiment_meta["instrument"]["manufacturer"])

        det = instr.create_group("detector")
        det.attrs["NX_class"] = "NXdetector"
        det.create_dataset("type", data="InLens")

        ebeam = instr.create_group("ebeam_column")
        ebeam.attrs["NX_class"] = "NXebeam_column"
        source = ebeam.create_group("electron_source")
        source.attrs["NX_class"] = "NXsource"
        source.create_dataset("emitter_type", data=experiment_meta["instrument"]["source"]["emitter_type"])
        source.create_dataset("emitter_material", data=experiment_meta["instrument"]["source"]["emitter_material"])
        source.create_dataset("probe", data="electron")

        software = instr.create_group("control_software_smartsem")
        software.attrs["NX_class"] = "NXprogram"
        software.create_dataset("program", data="Smart SEM")
        software.create_dataset("version", data="v0.5.02.05 11-july-2007")

        events = meas.create_group("events")
        events.attrs["NX_class"] = "NXobject"

        #Single event_data_em group
        event = events.create_group("event_1")
        event.attrs["NX_class"] = "NXevent_data_em"
        event.create_dataset("identifier_sample", data=meta["sample"]["identifier"])
        event.create_dataset("start_time", data=experiment_meta["date"] + "T" + experiment_meta["start_time"])
        event.create_dataset("end_time", data=experiment_meta["date"] + "T" + experiment_meta["end_time"])

        #Multiple NXimage groups inside this one event
        for idx, tif_path in enumerate(sorted(images_dir.glob("*.tif"))):
            img_name = tif_path.stem
            img_data = tf.imread(tif_path)
            json_path = json_dir / f"{img_name}.json"
            param_meta = json.load(open(json_path)) if json_path.exists() else {}

            image = event.create_group(f"image_{idx+1}")
            image.attrs["NX_class"] = "NXimage"
            image_2d = image.create_group("image_2d")
            image_2d.attrs["NX_class"] = "NXdata"
            image_2d.create_dataset("title", data=img_name)
            image_2d.create_dataset("real", data=img_data)
            image_2d["real"].attrs["long_name"] = "SEM Image Intensity"
            image_2d.create_dataset("axis_i", data=np.arange(img_data.shape[0]))
            image_2d.create_dataset("axis_j", data=np.arange(img_data.shape[1]))
            image_2d.attrs["signal"] = "real"
            image_2d.attrs["axes"] = ["axis_i", "axis_j"]
            image_2d.attrs["axis_i_indices"] = 0
            image_2d.attrs["axis_j_indices"] = 1

            optical = image.create_group("optical_system_em")
            optical.attrs["NX_class"] = "NXoptical_system_em"
            optical.create_dataset("magnification", data=float(param_meta.get("CZ_SEM", {}).get("Magnification", 0.0)))
            optical.create_dataset("working_distance", data=float(param_meta.get("CZ_SEM", {}).get("Working Distance", 0.0)))
            optical["working_distance"].attrs["units"] = "m"
            optical.create_dataset("probe_current", data=float(param_meta.get("CZ_SEM", {}).get("Probe Current", 0.0)))
            optical["probe_current"].attrs["units"] = "A"

            param = image.create_group("instrument")
            param.attrs["NX_class"] = "NXinstrument_em"
            column = param.create_group("ebeam_column")
            column.attrs["NX_class"] = "NXebeam_column"
            column.create_dataset("operation_mode", data="Linear Scan")

            elec = column.create_group("electron_source")
            elec.attrs["NX_class"] = "NXsource"
            elec.create_dataset("voltage", data=float(param_meta.get("CZ_SEM", {}).get("EHT (Electron High Tension)", 0.0)))
            elec["voltage"].attrs["units"] = "V"
            elec.create_dataset("filament_current", data=float(param_meta.get("CZ_SEM", {}).get("Filament Current", 0.0)))
            elec["filament_current"].attrs["units"] = "A"

output_file


'C:\\Users\\lidon\\Desktop\\SEM Experiments\\MDMC NFFA-DI\\IMM-SEM-20250430-Test\\Test.nxs'

Visualizing the tree structure for the generated NeXus file

In [8]:
from nexusformat.nexus import *

test = nxload(output_file)
print(test.tree)

root:NXroot
  entry:NXentry
    @default = '/entry/measurement/events/event_1/image_1/image_2d'
    coordinate_system:NXcoordinate_system
      handedness = 'right_handed'
      origin = 'sample'
      type = 'cartesian'
    definition = 'NXem'
      @URL = 'https://github.com/FAIRmat-NFDI/nexus_definiti...'
      @version = 'v2024.02'
    end_time = '2025-04-30T15:55:00'
    experiment_identifier = 'IMM-SEM-20250430'
    measurement
    sample:NXsample
      atom_types = 'H, O, C'
      identifier_sample = 'IMM-SEM-TEST-20250430'
      is_simulation = False
      name = 'Test'
      physical_form = 'powder'
      preparation_date = '2025-04-30'
    start_time = '2025-04-30T14:55:00'
    user:NXuser
      address = 'Address'
      affiliation = 'Affiliation'
      email = 'name@email.it'
      name = 'Name Surname'
