General purpose imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt

import hystorian as hy
import h5py

import os
from glob import glob

from IPython.display import clear_output

Next, we load all the data in `raw_data_path` into the `.hdf5` format and store it in `data_path`. We make one `.hdf5` file per sample, by assuming the raw data filenames start with the sample name followed by an underscore.

In [2]:
allowed_extensions = set(('.ibw', '.sxm'))
raw_data_path = os.path.join('data', 'to_convert')
data_path = 'data'
skipped_list = []
already_converted = []
to_convert = {}

for fn in glob(os.path.join(raw_data_path, '**'), recursive=True):
    
    #skip anything that doesn't have an allowed extension
    if os.path.splitext(fn)[1] not in allowed_extensions:
        skipped_list.append(fn)
        continue
    
    measurement_name = os.path.splitext(os.path.basename(fn))[0]
    sample_name = measurement_name.split('_')[0]
    combined_name = os.path.join(data_path, sample_name)
    
    #check if the sample file exists 
    if os.path.isfile(combined_name+'.hdf5'):
        with h5py.File(combined_name+'.hdf5', 'r') as f:
            #check if the measurement is already there
            if f'datasets/{measurement_name}' in f:
                already_converted.append(measurement_name)
                continue
    
    #Since the measurement is not there, add it to the list to convert
    to_convert.setdefault(combined_name, []).append(fn.replace('\\', '/'))    
    
for combined_name, filelist in to_convert.items():
    hy.io.read_file.merge_hdf5(filelist, combined_name)

for combined_name in to_convert:
    sample_name = os.path.basename(combined_name)
    with h5py.File(combined_name+'.hdf5', 'r+') as f:
        #check if sample thickness is given, ask for it if not
        if f['datasets'].attrs.get('Thickness_uc') is None:
            try:
                f['datasets'].attrs.create('Thickness_uc',
                                            int(input(f'What is the thickness of {sample_name} in uc?')))
            except ValueError:
                pass
                  
    
print('The following measurements were converted previously and have been ignored:')
for mn in already_converted: print(mn)
print('\n')

print('The following files were skipped:')
for fn in skipped_list: print(fn)
print('If any of these files should have been included, check the allowed extensions.')

The following measurements were converted previously and have been ignored:
m21003b_topo_MC_009
m21004DSOJ4_topo_MC_005
m21004DSOJ4_topo_MC_008
m21005b_topo_MC_006
m21005b_topo_MC_009
m21006DSOI2_topo_MC_005
m21007b_topo_MC_003
m20036_topo_MC_006
m20036_topo_MC_007
m20036_topo_MC_008
m20037_topo_MC_003
m20037_topo_MC_004
m20038_topo_MC_006
m20038_topo_MC_007
m20038_topo_MC_010
m20039_topo_MC_010
m20039_topo_MC_011
m20041_topo_MC_006
m20041_topo_MC_007
m21003DSOJ2_topo_MC_006
m21003DSOJ2_topo_MC_009
m21005DSOF2_topo_MC_005
m21005DSOF2_topo_MC_008
m21007DSOC3_topo_MC_005


The following files were skipped:
data\to_convert\
data\to_convert\old
If any of these files should have been included, check the allowed extensions.


In [30]:
for fn in glob('data/*'):
    print(fn.replace('\\', '/'))

data/Celine
data/LoopsCeline-color.png
data/m20036.hdf5
data/m20037.hdf5
data/m20038.hdf5
data/m20039.hdf5
data/m20041.hdf5
data/m21003b.hdf5
data/m21003DSOJ2.hdf5
data/m21004DSOJ4.hdf5
data/m21005b.hdf5
data/m21005b_topo_MC_006.sxm
data/m21005DSOF2.hdf5
data/m21006DSOI2.hdf5
data/m21007b.hdf5
data/m21007DSOC3.hdf5
data/test.txt
data/to_convert


In [39]:
with h5py.File('data/m20038.hdf5', 'r') as f:
    for key in f['datasets']: print(key)
    

m20038_topo_MC_006
m20038_topo_MC_007
m20038_topo_MC_010
