In [37]:
from PIL import Image
from PIL.ExifTags import TAGS
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import numpy as np

### Reader

In [38]:
def readFile(file_path):
    image = Image.open(file_path)
    exif = image.getexif()
    if exif is None:
        return None
    
    exif_data = {}
    for tag_id, value in exif.items():
        tag = TAGS.get(tag_id, tag_id)
        if tag == 34682:
            metadata = value
    return metadata

In [39]:
# testImg = '/Users/elias/Desktop/images_to_try/SEM_image_sample_Thermo_Fisher_Helios_G4_PFIB_CXe.tif'
testImg = '/Users/elias/Desktop/NFDI Tomographiedaten/20200818_AlSi13 XRM tomo2/Images/SEM Image/SEM Image - SliceImage - 700.tif'
# mdValueIni = readFile("/Users/elias/Desktop/NFDI Tomographiedaten/20200818_AlSi13 XRM tomo2/Images/SEM Image/SEM Image - SliceImage - 001.tif")
mdValueIni = readFile(testImg)
fileMetadataList = []

print(mdValueIni)

[User]
Date=08/20/2020
Time=12:57:26 AM
User=user
UserText=
UserTextUnicode=

[System]
Type=DualBeam
Dnumber=9952707
Software=14.5.1.432
BuildNr=432
Source=FEG
Column=Elstar
FinalLens=Elstar
Chamber=xT-SDB
Stage=110 x 110
Pump=TMP
ESEM=no
Aperture=AVA
Scan=PIA 3.0
Acq=PIA 3.0
EucWD=0.004
SystemType=Helios G4 PFIB CXe
DisplayWidth=0.518
DisplayHeight=0.324

[Beam]
HV=15000
Spot=1
StigmatorX=0.0153243
StigmatorY=0.00747505
BeamShiftX=-1.26504e-009
BeamShiftY=7.41047e-006
ScanRotation=0
ImageMode=Normal
FineStageBias=
Beam=EBeam
Scan=EScan

[EBeam]
Source=FEG
ColumnType=Elstar
FinalLens=Elstar
Acq=PIA 3.0
Aperture=AVA
ApertureDiameter=4.53e-005
HV=15000
HFW=0.000592
VFW=0.000394667
WD=0.0041341
BeamCurrent=1.6e-009
TiltCorrectionIsOn=no
DynamicFocusIsOn=no
DynamicWDIsOn=
ScanRotation=0
LensMode=Field-Free
LensModeA=
ATubeVoltage=
UseCase=
SemOpticalMode=
ImageMode=Normal
SourceTiltX=-0.000607639
SourceTiltY=0.00222222
StageX=

# Mapper (UNDER DEVELOPMENT)

Specs and data preparation ([details](https://github.com/kit-data-manager/Metadata-Schemas-for-Materials-Science/tree/main/SEM-FIB%20Tomography)):

* Acquisition metadata $\rightarrow$ get from .emxml file

* Dataset metadata $\rightarrow$ Some metadata is common to each image, identify which and extract only from first image, check against a few other images to ensure it really is the same?

* Image metadata $\rightarrow$ get from each image, just as it was for Zeiss


Therefore mapper should produce `n+2` JSON metadata documents, where `n` is the number of images in the dataset

In [40]:
# define a function which takes the metadata list and organizes it as a python dict

def formatMetadata(metadata):
    metadata_dict = {}
    current_header = None

    lines = metadata.strip().split('\n')

    for line in lines:
        line = line.strip()

        if line.startswith('[') and line.endswith(']'):
            current_header = line[1:-1]
        elif '=' in line:
            key, value = line.split('=', 1)
            split_key = key.split('.')
            last_variable = split_key[-1]
            formatted_last_variable = last_variable.lower()
            split_key[-1] = formatted_last_variable
            formatted_key = f'Images.SEM Image.SliceImage.{current_header}.' + '.'.join(split_key)
            value = value.strip()
            metadata_dict[formatted_key] = value

    return metadata_dict



image_data = formatMetadata(mdValueIni)
print(image_data)

{'Images.SEM Image.SliceImage.User.date': '08/20/2020', 'Images.SEM Image.SliceImage.User.time': '12:57:26 AM', 'Images.SEM Image.SliceImage.User.user': 'user', 'Images.SEM Image.SliceImage.User.usertext': '', 'Images.SEM Image.SliceImage.User.usertextunicode': '', 'Images.SEM Image.SliceImage.System.type': 'DualBeam', 'Images.SEM Image.SliceImage.System.dnumber': '9952707', 'Images.SEM Image.SliceImage.System.software': '14.5.1.432', 'Images.SEM Image.SliceImage.System.buildnr': '432', 'Images.SEM Image.SliceImage.System.source': 'FEG', 'Images.SEM Image.SliceImage.System.column': 'Elstar', 'Images.SEM Image.SliceImage.System.finallens': 'Elstar', 'Images.SEM Image.SliceImage.System.chamber': 'xT-SDB', 'Images.SEM Image.SliceImage.System.stage': '110 x 110', 'Images.SEM Image.SliceImage.System.pump': 'TMP', 'Images.SEM Image.SliceImage.System.esem': 'no', 'Images.SEM Image.SliceImage.System.aperture': 'AVA', 'Images.SEM Image.SliceImage.System.scan': 'PIA 3.0', 'Images.SEM Image.Slice

In [41]:
import json

def extractImageMappings(json_file):
    with open(json_file) as f:
        mappings = json.load(f)
    image_mappings = mappings.get('image', {})
    return image_mappings

mapFile = "/Users/elias/Desktop/PP13_Mapping/pp13-mapper/schemas/sem_fib_nested_schema_map.json"
image_mappings = extractImageMappings(mapFile)

# print(image_mappings)

def extractImageData(image_data, image_mappings):
    extracted_data = {}

    for key, value in image_data.items():
        if key in image_mappings.values():
            extracted_data[key] = value

    return extracted_data


image_metadata = extractImageData(image_data, image_mappings)

In [47]:
def headerMapping(image_metadata, image_mappings):
    mapped_metadata = {}

    for desired_var, current_var in image_mappings.items():
        if current_var in image_metadata:
            mapped_metadata[desired_var] = image_metadata[current_var]

    return mapped_metadata


mapped_metadata = headerMapping(image_metadata, image_mappings)

mapped_metadata

{'acquisition.dataset.entry.images.entry.creationTime': '20.08.2020 00:57:26',
 'acquisition.dataset.entry.images.entry.stage.workingDistance.value': '0.0041341',
 'acquisition.dataset.entry.images.entry.stage.stageX.value': '0.000225271',
 'acquisition.dataset.entry.images.entry.stage.stageY.value': '-0.00467317',
 'acquisition.dataset.entry.images.entry.stage.stageZ.value': '0.00402333',
 'acquisition.dataset.entry.images.entry.stage.stageR.value': '0.648119',
 'acquisition.dataset.entry.images.entry.stage.stageTa.value': '0.33685',
 'acquisition.dataset.entry.images.entry.stage.stageTb.value': '0',
 'acquisition.dataset.entry.images.entry.stage.specTilt.value': '',
 'acquisition.dataset.entry.images.entry.stage.activeStage': 'Bulk',
 'acquisition.dataset.entry.images.entry.vacuum.chamberPressure.value': '5.41e-005',
 'acquisition.dataset.entry.images.entry.vacuum.gas': '',
 'acquisition.dataset.entry.images.entry.vacuum.userMode': 'High vacuum',
 'acquisition.dataset.entry.images.en

In [48]:
def writeMetadataToJson(mapped_metadata, output_file):
    metadata_dict = {}

    for key, value in mapped_metadata.items():
        levels = key.split('.')
        current_dict = metadata_dict

        for level in levels[:-1]:
            current_dict = current_dict.setdefault(level, {})

        current_dict[levels[-1]] = value

    with open(output_file, 'w') as f:
        json.dump(metadata_dict, f, indent=4)
        
writeMetadataToJson(mapped_metadata, 'exampleResult.json')

In [60]:
import time

def processImageFolder(input_folder, output_folder, map_file):
    image_files = [f for f in os.listdir(input_folder) if f.endswith('.tif')]
    total_images = len(image_files)
    processed_images = 0

    start_time = time.time()

    image_mappings = extractImageMappings(map_file)

    for image_file in image_files:
        image_path = os.path.join(input_folder, image_file)
#         print(image_path)
        image_data = readFile(image_path)
        formatted_metadata = formatMetadata(image_data)
        image_metadata = extractImageData(formatted_metadata, image_mappings)
        mapped_metadata = headerMapping(image_metadata, image_mappings)
        output_filename = os.path.splitext(image_file)[0] + '.json'
        output_path = os.path.join(output_folder, output_filename)
        writeMetadataToJson(mapped_metadata, output_path)
        
        processed_images += 1
    
    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"Processed {processed_images}/{total_images} images in {elapsed_time:.2f} seconds")


input_folder = '/Users/elias/Desktop/NFDI Tomographiedaten/20200818_AlSi13 XRM tomo2/Images/SEM Image'
output_folder = '/Users/elias/Desktop/PP13_Mapping/pp13-mapper/results'

processImageFolder(input_folder, output_folder, mapFile)

Processed 719/719 images in 0.74 seconds
