In [300]:
import xml.etree.ElementTree as ET
import json

def extract_metadata(emxml_file, json_file):
    # Load JSON file
    with open(json_file, 'r') as f:
        metadata_mapping = json.load(f)

    # Load XML file
    tree = ET.parse(emxml_file)
    root = tree.getroot()

    metadata = {}

    # Extract acquisition metadata
    acquisition_metadata = metadata_mapping.get('acquisition')
    for key, value in acquisition_metadata.items():
        node = root.find(value)
        if node is not None:
            metadata[key] = node.text

    # Extract dataset metadata
    dataset_metadata = metadata_mapping.get('dataset')
    for key, value in dataset_metadata.items():
        node = root.find(value)
        if node is not None:
            metadata[key] = node.text

    # Extract image metadata
    image_metadata = metadata_mapping.get('image')
    for key, value in image_metadata.items():
        node = root.find(value)
        if node is not None:
            metadata[key] = node.text

    return metadata

# Usage example
json_file = '/Users/elias/Desktop/PP13_Mapping/pp13-mapper/schemas/sem_fib_nested_schema_map.json'
emxml_file = '/Users/elias/Desktop/NFDI Tomographiedaten/20200818_AlSi13 XRM tomo2/EMproject.emxml'
metadata = extract_metadata(emxml_file, json_file)
print(metadata)


{}


In [301]:
import json

def extract_EM_metadata(json_file):
    # Read the JSON file
    with open(json_file, 'r') as file:
        data = json.load(file)

    # Extract the "acquisition" information
    acquisition_data = data.get('acquisition', {})

    # Store key-value pairs starting with "EMproject"
    EM_metadata = {}
    for key, value in acquisition_data.items():
        if value.startswith('EMProject'):
            EM_metadata[key] = value

    return EM_metadata

emMetadata = extract_EM_metadata(json_file)

print(type(emMetadata))


# Print each key-value pair in a new line
for key, value in emMetadata.items():
    print(key + ": " + value)

<class 'dict'>
acquisition.genericMetadata.program.programName: EMProject.ApplicationName
acquisition.genericMetadata.program.programVersion: EMProject.ApplicationVersion
acquisition.genericMetadata.applicationId.identifierValue: EMProject.ApplicationId
acquisition.genericMetadata.fileVersion: EMProject.FileVersion
acquisition.genericMetadata.projectName: EMProject.ProjectName
acquisition.genericMetadata.zCutSpacing.value: EMProject.ZCutSpacing
acquisition.genericMetadata.numberOfCuts: EMProject.Datasets.Dataset.NumberOfCuts


In [302]:
import xml.etree.ElementTree as ET

def xml_to_dict(file_path):
    def parse_element(element):
        result = {}
        if len(element) == 0:
            return element.text
        for child in element:
            child_data = parse_element(child)
            if '}' in child.tag:
                child_tag = child.tag.split('}', 1)[1]  # Remove the namespace
            else:
                child_tag = child.tag
            if child_tag in result:
                if type(result[child_tag]) is list:
                    result[child_tag].append(child_data)
                else:
                    result[child_tag] = [result[child_tag], child_data]
            else:
                result[child_tag] = child_data
        return result

    tree = ET.parse(file_path)
    root = tree.getroot()
    root_tag = root.tag
    if '}' in root_tag:
        root_tag = root_tag.split('}', 1)[1]  # Remove the namespace from the root tag
    return {root_tag: parse_element(root)}


In [303]:
emData = xml_to_dict(emxml_file)
emData

{'EMProject': {'ApplicationName': 'Auto Slice & View 4',
  'ApplicationVersion': '4.2.1.1982',
  'ApplicationId': 'ASV',
  'FileVersion': '1.2',
  'ProjectName': '20200818_AlSi13 XRM tomo2',
  'ViewerRotation': '0',
  'ZCutSpacing': '2.0000000000000002E-07',
  'Datasets': {'Dataset': [{'Id': '1',
     'Name': 'SEM Image',
     'Is2d': 'false',
     'BoxCenter': {'CenterX': '0',
      'CenterY': '0',
      'CenterZ': '7.1867700987306052E-05'},
     'BoxSize': {'SizeX': '0.00017999999999999998',
      'SizeY': '8.4999999999999993E-05',
      'SizeZ': '0.0001437354019746121'},
     'RotationAngle': '0',
     'Rows': '1',
     'Columns': '1',
     'OverlapX': '0',
     'OverlapY': '0',
     'Status': 'Finished',
     'NumberOfCuts': '719',
     'EnergyLevels': '1',
     'LiveAcquisition': {'ImagesPath': 'D:\\ASV\\20200818_AlSi13 XRM tomo2\\Images\\SEM Image',
      'TileColumn': '0',
      'TileRow': '0',
      'DisplayedEnergyLevel': '1'}},
    {'Id': '2',
     'Name': 'SEM Image 2',
    

In [304]:
# emData['EMProject']['Datasets']['Dataset']
#EMproject.DataSets.Dataset.NumberOfCuts

In [305]:
def traverse_dict(data, path):
    keys = path.split('.')
    result = data
    try:
        for key in keys:
            result = result[key]
        return result
    except (KeyError, TypeError):
        return None

In [306]:
traverse_dict(emData, 'EMProject.ApplicationName')

'Auto Slice & View 4'

In [307]:
# def extract_values(addresses, nested_dict):
#     result = {}
#     for key, address in addresses.items():
#         value = traverse_dict(nested_dict, address)
#         print(f"Address: {address}, Value: {value}")
#         result[address] = value
#     return result


In [308]:
extract_values(emMetadata, emData)

Address: EMProject.ApplicationName, Value: Auto Slice & View 4
Address: EMProject.ApplicationVersion, Value: 4.2.1.1982
Address: EMProject.ApplicationId, Value: ASV
Address: EMProject.FileVersion, Value: 1.2
Address: EMProject.ProjectName, Value: 20200818_AlSi13 XRM tomo2
Address: EMProject.ZCutSpacing, Value: 2.0000000000000002E-07
Address: EMProject.Datasets.Dataset.NumberOfCuts, Value: None


{'EMProject.ApplicationName': 'Auto Slice & View 4',
 'EMProject.ApplicationVersion': '4.2.1.1982',
 'EMProject.ApplicationId': 'ASV',
 'EMProject.FileVersion': '1.2',
 'EMProject.ProjectName': '20200818_AlSi13 XRM tomo2',
 'EMProject.ZCutSpacing': '2.0000000000000002E-07',
 'EMProject.Datasets.Dataset.NumberOfCuts': None}