![nsdf](https://www.sci.utah.edu/~pascucci/public/NSDF-large.png)  
[National Science Data Fabric](https://nationalsciencedatafabric.org/) 

# Converting HDF5 data

In [None]:
import os ,sys, time, logging,shutil
from datetime import datetime
import numpy as np
import boto3
import h5py
from pprint import pprint

#sys.path.append("C:/projects/OpenVisus/build/RelWithDebInfo")
#sys.path.append("C:/projects/openvisuspy/src")

import OpenVisus as ov
import openvisuspy
os.environ["VISUS_DISABLE_WRITE_LOCK"]="1"
logger= logging.getLogger("OpenVisus")

# uncomment for debugging
# ov.SetupLogger(logger, stream=True)

from openvisuspy.utils import DownloadObject

print("OpenVisus imported")

# /////////////////////////////////////////////////////////////////////////////////
def Traverse(cur):
    """
    Utility to show the internal of an HDF5 file
    """
    
    if isinstance(cur,h5py.Dataset):
        return {
            '__type':type(cur),
            'shape':cur.shape,
            'dtype':cur.dtype,
            'size':cur.size,
            'ndim':cur.ndim,
            'nbytes':cur.nbytes,
        }
    else:
        return {key: Traverse(cur[key])for key in cur.keys()}
    

I need an HDF5 file to convert.

I can download from the cloud but NOTE: **it's 33GB file**, it will take a lot to download

In [None]:
hdf5_filename='/mnt/data/chess/assets/ff1_000231.h5'
DownloadObject("s3://utah/assets/ff1_000231.h5", hdf5_filename)
f = h5py.File(hdf5_filename, 'r') 
pprint(Traverse(f))

# Read binary data

A single dataset is 33GB. 

**Better to now load all in memory**; here I am reading only a little portion

In [None]:
import matplotlib.pyplot as plt
from scipy import ndimage
from matplotlib.colors import LogNorm

# ///////////////////////////////////////////////////////////////
def ShowImage(img, histogram=True):
	print("dtype",img.dtype,"shape",img.shape,"m",np.min(img),"M",np.max(img))
	fig = plt.figure()
	fig.set_size_inches(18.5, 10.5)

	ax = fig.add_subplot(1, 2, 1) # nrows, ncols, index
	imgplot = plt.imshow(img)

	plt.colorbar(orientation='horizontal')
	if histogram:
		ax = fig.add_subplot(1, 2, 2)
		histogram, bin_edges = np.histogram(img, bins=256, range=(np.min(img), np.max(img)))
		plt.title("Histogram")
		plt.xlabel("value")
		plt.ylabel("pixel count")
		plt.xlim([np.min(img), np.max(img)])
		plt.plot(bin_edges[0:-1], histogram)


images=f["imageseries"]["images"]
D,H,W=images.shape
print(f"Images dtype={images.dtype} shape={images.shape} ")

Z=300
img=images[Z,:,:]
print(np.min(slice),np.max(slice))

from skimage import io,exposure
ShowImage(exposure.equalize_hist(img))

# Create OpenVisus file

NOTE **Converting only a litte portion of data (don't want to have 33GB in memory)**

In [None]:
# note: creating the dataset in a temporary local directory , that is safe to remove
idx_filename="./remove-me/hdf5-example/visus.idx"

reduced_depth=D//3
print("Reduced depth to",reduced_depth)

import os,sys,shutil
assert("remove-me" in idx_filename)
shutil.rmtree(os.path.dirname(idx_filename), ignore_errors=True)

data=images[0:reduced_depth,:,:]

fields=[ov.Field("data",str(data.dtype),"row_major")]
db=ov.CreateIdx(
	url=idx_filename,
	dims=[W,H,reduced_depth],
	fields=fields,
    # NOTE: we are first creating the dataset with no-compression
	compression="raw")

print(db.getDatasetBody().toString())
print("Dataset created")

# Write  data to OpenVisus

In [None]:
t1 = time.time()
db.write(data)
print(f"db.write done in {time.time() - t1} seconds")

# (OPTIONAL) Compress using zip

In [None]:
# in production is better to compress the dataset
do_compress=False
if do_compress:
    t1 = time.time()
    db.compressDataset(["zip"])
    print(f"db.compressDataset done in {time.time()-t1} seconds")

# Show OpenVisus data

In [None]:
import os,sys
img=db.read(x=[0,W],y=[0,H],z=[Z,Z+1], num_refinements=1)[0,:,:]
print(img.dtype,img.shape)
ShowImage(exposure.equalize_hist(img))

# Show coarse to fine

In [None]:
import os,sys
for img in db.read(x=[0,W],y=[0,H],z=[Z,Z+1], num_refinements=3):
	ShowImage(exposure.equalize_hist(img[0,:,:]))