### Read from HDF5 files

In [None]:
%matplotlib notebook
import sys, os
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from cookiebox_forYoussef.src.h5todatasets import *
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import glob

FPATH = '/home/ynashed/workspace/data/cookiebox/*.h5'
data_files = glob.glob(FPATH)
X = []
Y = []
for fname in data_files:
    f = h5py.File(fname,'r')
    imkeys = [i for i in list(f.keys()) if re.match('^img\d+',i)]
    for imkey in imkeys:
        carrier = f[imkey].attrs['carrier']
        h = f[imkey]['hist'][()]
        phases = [phase2id(carrier + f[imkey].attrs['ephases'][i]) for i in range(f[imkey].attrs['npulses'])]
        X.append(h)
        Y.append(phases)
    f.close()
X = np.array(X)
targets = np.array([len(y) for y in Y])[...,np.newaxis]
y = OneHotEncoder(sparse=False).fit_transform(targets)
print(X.shape, y.shape)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Visualize an example

In [None]:
dindex = np.random.choice(np.arange(X_train.shape[0]))
data = X_train[dindex]

print(data.dtype)

fig = plt.figure()
ax = fig.add_subplot(111)
img = ax.imshow(data)
ax.set_aspect('auto')
fig.colorbar(img, ax=ax)

print(y_train[dindex])

### Let's try a baseline bi-directional LSTM model

In [None]:
from tensorflow import keras
from tensorflow.keras import layers, utils

X_train_normalized = utils.normalize(X_train, axis=1)

nclasses = y_train.shape[1]
nenergies = X_train_normalized.shape[1]
nangles = X_train_normalized.shape[2]
nvocab = X_train_normalized.max()

# Input for variable-length sequences of integers
inputs = keras.Input(shape=(nenergies, nangles))
x = inputs
# Embed each integer in a 3-dimensional vector
# x = layers.Embedding(nvocab, 3, input_length=nenergies)(x)
# Add 2 bidirectional LSTMs
# x = layers.Bidirectional(layers.CuDNNLSTM(64, return_sequences=True))(x)
x = layers.Bidirectional(layers.CuDNNLSTM(64))(x)
# Add a classifier
outputs = layers.Dense(nclasses, activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.summary()

model.compile("adam", "categorical_crossentropy", metrics=["accuracy"])
model.fit(X_train_normalized, y_train, batch_size=32, epochs=2)