# Conversion of tif files to HDF5 file

This notebook is used to convert the
 [original tiff file data format](https://doi.psi.ch/detail/10.16907/d699e1f7-e822-4396-8c64-34ed405f07b7)  into a single HDF5 file.

## Import necessary modules - need to use the "Tomorec Kernel"

In [None]:
import dxchange
import numpy as np
import h5py

In [None]:
# The directory into which the "wget" command was run, wich contains the data in tif file format.
dirname = '../data/'

# Go into that directory and run:
#   wget -m -np https://doi2.psi.ch/datasets/sls/X02DA/Data10/e17068/disk1/h11913_4_3_/tif
# After waiting 5 to 6 hours, the following directory structure will exist, where the
# "tif" directory will contain 2031 .tif files start with names that follow the 
# pattern "h11913_4_3_xxxx.tif"
doipath = 'doi2.psi.ch/datasets/sls/X02DA/Data10/e17068/disk1/h11913_4_3_/tif/'

# For this conversion script, the beginning of the file names are needed.
fname = dirname + doipath + 'h11913_4_3_'

# Specify where to write the output HDF5 file. Make sure atleast 16 GB of space 
# is available where this will be written.
foutname = dirname + 'hdf5-conversion/h11913_4_3.h5'

In [None]:
# Setup for the conversion
size_proj = [1801, 1562, 2560]
size_flat = [100, 1562, 2560]
size_dark = [30, 1562, 2560]
sproj = 128 # number of slices that fit to operating memory

In [None]:
# Create the output file in the location specified above
fid = h5py.File(foutname, mode='w')

# Create the structure within the file
projh5 = fid.create_dataset('exchange/data', size_proj, dtype='uint16')
flath5 = fid.create_dataset('exchange/data_flat', size_flat, dtype='uint16')
darkh5 = fid.create_dataset('exchange/data_dark', size_dark, dtype='uint16')

In [None]:
# Loop over the .tif image files, write to the HDF5 output file
# This should take 5 to 10 mintues
for k in range(0, np.int(np.ceil(size_proj[1] / sproj))):
   start = k*sproj
   end = min((k + 1) * sproj, size_proj[1])
   print("converting slices %d .. %d" % (start, end))
   proj, flat, dark = dxchange.read_sls_tomcat(fname, sino=(start, end))
   projh5[:, start:end, :] = proj
   flath5[:, start:end, :] = flat
   darkh5[:, start:end, :] = dark

fid.close()