In [44]:
from imageMapper import readFile, formatMetadata, extractImageMappings, extractImageData, headerMapping
from acquisitionMapper import xml_to_dict, extract_values, traverse_dict
import json

# Create Mapped Acquisition Metadata Dictionary

In [45]:
def extract_metadata_addressesAcq(json_file):
    # Read the JSON file
    with open(json_file, 'r') as file:
        data = json.load(file)

    # Extract the "acquisition" information
    acquisition_data = data.get('acquisition', {})

    # Store key-value pairs starting with "EMProject"
    EM_metadata = {}
    Image_metadata = {}
    for key, value in acquisition_data.items():
        if value.startswith('EMProject'):
            EM_metadata[key] = value
        elif value.startswith('Images'):
            Image_metadata[key] = value

    return EM_metadata, Image_metadata

import xml.etree.ElementTree as ET

def xml_to_dict(file_path):
    def parse_element(element):
        result = {}
        if len(element) == 0:
            return element.text
        for child in element:
            child_data = parse_element(child)
            if '}' in child.tag:
                child_tag = child.tag.split('}', 1)[1]  # Remove the namespace
            else:
                child_tag = child.tag
            if child_tag in result:
                if type(result[child_tag]) is list:
                    result[child_tag].append(child_data)
                else:
                    result[child_tag] = [result[child_tag], child_data]
            else:
                result[child_tag] = child_data
        return result

    tree = ET.parse(file_path)
    root = tree.getroot()
    root_tag = root.tag
    if '}' in root_tag:
        root_tag = root_tag.split('}', 1)[1]  # Remove the namespace from the root tag
    return {root_tag: parse_element(root)}

def traverse_dict(data, path):
    keys = path.split('.')
    result = data
    try:
        for key in keys:
            result = result[key]
        return result
    except (KeyError, TypeError):
        return None
    
def extract_values(addresses, data, dataset_num = 1):
    result = {}
    for key, address in addresses.items():
        levels = address.split('.')
        current_data = data
        for level in levels:
            # emxml contains multiple instances of "Dataset", so it returns a list when asked. We need to tell it
            # which dataset we actually want. 1 is SEM Image, 2 is SEM Image 2, and 3 is the one we're not
            # interested in. We subtract 1 because indexing begins at zero
            # Still needed: check this against image folder name
            if level == 'Dataset':
                current_data = current_data[level][dataset_num - 1]
            else:
                current_data = current_data[level]
        result[key] = current_data
    return result


if __name__ == "__main__":
    # Define relevant files
    emxml_file = '/Users/elias/Desktop/NFDI Tomographiedaten/20200818_AlSi13 XRM tomo2/EMproject.emxml'
    json_file = '/Users/elias/Desktop/PP13_Mapping/pp13-mapper/schemas/sem_fib_nested_schema_map.json'
    imgFile = '/Users/elias/Desktop/NFDI Tomographiedaten/20200818_AlSi13 XRM tomo2/Images/SEM Image/SEM Image - SliceImage - 001.tif'
    output_path = '/Users/elias/Desktop/PP13_Mapping/pp13-mapper/results/acquisitionMetadata.json'
    dataset_num = 1 # 1 or 2

    emMetadata, imgMetadata = extract_metadata_addressesAcq(json_file)
    emData = xml_to_dict(emxml_file)
    mappedEMMetadata = extract_values(emMetadata, emData, dataset_num)
    image_data = readFile(imgFile)
    formatted_metadata = formatMetadata(image_data)
    image_metadata = extractImageData(formatted_metadata, imgMetadata)
    mappedImgMetadata = headerMapping(image_metadata, imgMetadata)

    # Merge the two metadata dictionaries
    acquisitionMetadata = {**mappedEMMetadata, **mappedImgMetadata}

In [46]:
acquisitionMetadata

{'acquisition.genericMetadata.program.programName': 'Auto Slice & View 4',
 'acquisition.genericMetadata.program.programVersion': '4.2.1.1982',
 'acquisition.genericMetadata.applicationId.identifierValue': 'ASV',
 'acquisition.genericMetadata.fileVersion': '1.2',
 'acquisition.genericMetadata.projectName': '20200818_AlSi13 XRM tomo2',
 'acquisition.genericMetadata.zCutSpacing.value': '2.0000000000000002E-07',
 'acquisition.genericMetadata.numberOfCuts': '719',
 'acquisition.genericMetadata.pump': 'TMP',
 'acquisition.genericMetadata.column': 'Elstar',
 'acquisition.genericMetadata.source': 'FEG',
 'acquisition.genericMetadata.eucentricWorkingDistance.value': '0.004',
 'acquisition.genericMetadata.ESEM': 'no',
 'acquisition.genericMetadata.systemType': 'Helios G4 PFIB CXe',
 'acquisition.genericMetadata.stage': '110 x 110'}

# Create Mapped Dataset Metadata Dictionary

In [47]:
def extract_metadata_addresses(json_file):
    # Read the JSON file
    with open(json_file, 'r') as file:
        data = json.load(file)

    # Extract the "acquisition" information
    acquisition_data = data.get('dataset', {})

    # Store key-value pairs starting with "EMProject"
    EM_metadata = {}
    Image_metadata = {}
    for key, value in acquisition_data.items():
        if value.startswith('EMProject'):
            EM_metadata[key] = value
        elif value.startswith('Images'):
            Image_metadata[key] = value

    return EM_metadata, Image_metadata

if __name__ == "__main__":

    emxml_file = '/Users/elias/Desktop/NFDI Tomographiedaten/20200818_AlSi13 XRM tomo2/EMproject.emxml'
    json_file = '/Users/elias/Desktop/PP13_Mapping/pp13-mapper/schemas/sem_fib_nested_schema_map.json'
    imgFile = '/Users/elias/Desktop/NFDI Tomographiedaten/20200818_AlSi13 XRM tomo2/Images/SEM Image/SEM Image - SliceImage - 001.tif'
    output_path = '/Users/elias/Desktop/PP13_Mapping/pp13-mapper/results/datasetMetadata.json'
    dataset_num = 1

    xmlMetadata, imgMetadata = extract_metadata_addresses(json_file)
    emData = xml_to_dict(emxml_file)
    mappedEMMetadata = extract_values(xmlMetadata, emData, dataset_num)
    imageData = readFile(imgFile)
    formattedMetadata = formatMetadata(imageData)
    imageMetadata = extractImageData(formattedMetadata, imgMetadata)
    mappedImgMetadata = headerMapping(imageMetadata, imgMetadata)

    datasetMetadata = {**mappedEMMetadata, **mappedImgMetadata}

In [48]:
datasetMetadata

{'acquisition.dataset.entry.rows': '1',
 'acquisition.dataset.entry.columns': '1',
 'acquisition.dataset.entry.tileColumn': '0',
 'acquisition.dataset.entry.user.userName': 'user',
 'acquisition.dataset.entry.program.programName': '14.5.1.432',
 'acquisition.dataset.entry.instrument.beamType': 'EBeam',
 'acquisition.dataset.entry.instrument.spot': '1',
 'acquisition.dataset.entry.instrument.eBeam.accelerationVoltage.value': '15000',
 'acquisition.dataset.entry.instrument.eBeam.beamCurrent.value': '1.6e-009',
 'acquisition.dataset.entry.instrument.eBeam.scanRotation.value': '0',
 'acquisition.dataset.entry.instrument.eBeam.imageMode.value': 'Normal',
 'acquisition.dataset.entry.instrument.eBeam.apertureSetting.size.value': '4.53e-005',
 'acquisition.dataset.entry.instrument.eBeam.horizontalFieldWidth.value': '0.000592',
 'acquisition.dataset.entry.instrument.eBeam.verticalFieldWidth.value': '0.000394667',
 'acquisition.dataset.entry.instrument.eBeam.tiltCorrectionIsOn': 'no',
 'acquisit

# Create Mapped Image Metadata Dictionary

In [49]:
map_file = "/Users/elias/Desktop/PP13_Mapping/pp13-mapper/schemas/sem_fib_nested_schema_map.json"
    
image_data = readFile(imgFile)
image_mappings = extractImageMappings(map_file)
formatted_metadata = formatMetadata(image_data)
image_metadata = extractImageData(formatted_metadata, image_mappings)
mapped_metadata = headerMapping(image_metadata, image_mappings)

In [50]:
mapped_metadata

{'acquisition.dataset.entry.images.entry.creationTime': '18.08.2020 13:40:03',
 'acquisition.dataset.entry.images.entry.stage.workingDistance.value': '0.00402349',
 'acquisition.dataset.entry.images.entry.stage.stageX.value': '0.000225271',
 'acquisition.dataset.entry.images.entry.stage.stageY.value': '-0.00467317',
 'acquisition.dataset.entry.images.entry.stage.stageZ.value': '0.00402333',
 'acquisition.dataset.entry.images.entry.stage.stageR.value': '0.648119',
 'acquisition.dataset.entry.images.entry.stage.stageTa.value': '0.336851',
 'acquisition.dataset.entry.images.entry.stage.stageTb.value': '0',
 'acquisition.dataset.entry.images.entry.stage.specTilt.value': '',
 'acquisition.dataset.entry.images.entry.stage.activeStage': 'Bulk',
 'acquisition.dataset.entry.images.entry.vacuum.chamberPressure.value': '0.00012',
 'acquisition.dataset.entry.images.entry.vacuum.gas': '',
 'acquisition.dataset.entry.images.entry.vacuum.userMode': 'High vacuum',
 'acquisition.dataset.entry.images.en

# Combine all metadata

In [56]:
combinedMetadata = {**mapped_metadata, **acquisitionMetadata, **datasetMetadata}

# Write to JSON Function which combines them to look like the schema

In [57]:
import json

def writeMetadataToJson(metadata, output_path):
    json_data = {}

    for key, value in metadata.items():
        keys = key.split('.')
        temp = json_data
        for k in keys[:-1]:
            temp = temp.setdefault(k, {})
        temp[keys[-1]] = value

    with open(output_path, 'w') as file:
        json.dump(json_data, file, indent=4)

    print("JSON document created successfully at", output_path)

In [58]:
writeMetadataToJson(combinedMetadata, 'combined.json')

JSON document created successfully at combined.json
