In [None]:
import h5py
import numpy as np
import cv2
import os
from PIL import Image
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from skimage import io

In [None]:
# Store all stacks into single numpy array
data_folder = '3D_Nucl_aug'
folder_list = []
for folder in os.listdir(data_folder):
    class_folder = os.path.join(data_folder,folder)
    for filename in os.listdir(class_folder):
        img_path = os.path.join(class_folder,filename)
        folder_list.append(img_path)
# Create input X array
img_data = np.array([np.array(io.imread(path)) for path in folder_list])
print(img_data.shape)

In [None]:
# Normalize data to 0-1 range
img_data_int8 = img_data.astype(np.uint8)
img_data_float = img_data_int8.astype('float32')
img_data_float[:,:,:,:,(0,1)] /= img_data_float[:,:,:,:,(0,1)].max() # Normalize red and green channels
img_data_float[:,:,:,:,2] /= img_data_float[:,:,:,:,2].max() # Normalize blue channel

In [None]:
# Create labels for each class represented by subfolders, e.g. Nucl1, Nucl2,...
labels = []
for folder in os.listdir(data_folder):
    class_folder = os.path.join(data_folder,folder)
    for filename in os.listdir(class_folder):
        labels.append(folder)
enc = OneHotEncoder(sparse=False)
label_enc = LabelEncoder()
labels_arr = np.array(labels)
y_labels_int = label_enc.fit_transform(labels_arr)
y_labels = np.reshape(y_labels_int, (len(img_data), 1))
# Create condition Y array
y_vector = enc.fit_transform(y_labels)
print(np.shape(y_vector))

In [None]:
# Create reference R array
images_split = np.array_split(img_data_float, 3, axis=4)
ref_channel = np.append(images_split[0], images_split[1], axis=4) # Use red and green channels as reference
print(np.shape(ref_channel))

In [None]:
# Shuffle data
X_data_rand, Y_data_rand, R_data_rand = shuffle(img_data_float, y_vector, ref_channel, random_state=0)

In [None]:
def store_h5_split(images, ref_channel, labels):
    """ Stores an array of images to HDF5.
        Parameters:
        ---------------
        images       images array, (N, 64, 256, 256, 3) to be stored
        ref_channel  reference array, (N, 64, 256, 256, 2) to be stored
        labels       labels array, (N, 10) to be stored
    """

    # Create a new HDF5 file
    file = h5py.File('3D_Nucl_aug.h5', "w")

    # Create a dataset in the file
    dataset_X = file.create_dataset(
        "X", np.shape(images), h5py.h5t.IEEE_F32BE, data=images
    )
    
    dataset_Y = file.create_dataset(
        "Y", np.shape(labels), h5py.h5t.STD_U8BE, data=labels
    )
    
    dataset_R = file.create_dataset(
        "R", np.shape(ref_channel), h5py.h5t.IEEE_F32BE, data=ref_channel
    )
    file.close()

In [None]:
store_h5_split(X_data_rand, R_data_rand, Y_data_rand)