In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import h5py
import logging
from os import path
from pandas import HDFStore
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import pandas as pd
from keras import regularizers
from utils import get_callbacks,windowed_dataset,plot_report, basic_lstm
import tensorflow_docs.modeling
import tensorflow_docs.plots

tf.executing_eagerly()


%matplotlib inline
seed=2020
tf.random.set_seed(seed) # set up tensorflow's seed
np.random.seed(seed) # set up numpy's seed

Using TensorFlow backend.


In [2]:
coeff = 22
# TODO export load data to data loaders or Utils
root_path = "Z:/elekin"
rdo_root_path = path.join(root_path,"02-RESULTADOS/03-HANDWRITTING")
h5file = path.join(rdo_root_path, "00-OUTPUT/archimedean-")
h5filename = h5file + str(17) + ".h5"
hdf = HDFStore(h5filename)

h5_outputfile = path.join(rdo_root_path, "00-OUTPUT/archimedean-")
h5_output_filename = h5_outputfile + str(coeff) + "-splits" +".h5"

In [3]:
#Load timeseries and labels
raw_df = hdf['results/residues/rd'].T
raw_labels_df = hdf.get('source/labels')
raw_dataset = raw_df.join(raw_labels_df).sample(frac=1, random_state=seed)# how to join and shuffle two DataFrames

In [4]:
raw_dataset['labels'][:5]

T012_02        1.0
C07_12         0.0
T013_02        1.0
T029_02        1.0
control2_02    0.0
Name: labels, dtype: float64

Window function is applied to Dataset objects. Same windows lenghts for each time series corresponding to each subject are built asigning the rigth label. Then they are included in same Dataset object.

In [5]:
window_size = 1024
shuffle_buffer = 4000
batch_size = 1

train = tf.data.Dataset.from_tensor_slices(raw_df.values.astype("float32"))
labels = raw_labels_df.values.astype('int8')

for i,features in enumerate(raw_df.values.astype("float32")):
    new = windowed_dataset(features,window_size,batch_size,shuffle_buffer,labels[i])
    if i>0:
        dataset = tf.data.Dataset.concatenate(dataset,new)
    else:
        dataset = new    

In [6]:
dataset.element_spec

(TensorSpec(shape=(None, None), dtype=tf.float32, name=None),
 TensorSpec(shape=(None,), dtype=tf.int8, name=None))

In [None]:
for x,y in dataset.take(1):
    print("x =",x)
    print("y =",y)

We can apply filters to a Dataset object

In [None]:
for x,y in dataset.filter(lambda x,y: tf.equal(y, 1)[0]).take(1):
    print("x =",x)
    print("y =",y)

It is possible to access just to one element using python iterator. In the example bellow it is showed also how to shuffle the dataset and batch together some examples into minibatches:

In [None]:
iterator = iter(dataset.shuffle(shuffle_buffer*20).batch(batch_size))
first = iterator.next()

In [None]:
for x in first:
    print("first batch: ",x.numpy())
    print()

Now we are able to build different dataset splits for training pruposes

In [7]:
FEATURES = 4096
DATASET_SIZE = int((FEATURES/window_size)*50)
#TODO import from utils API
train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)

full_dataset = dataset
full_dataset = full_dataset.shuffle(DATASET_SIZE)
train_dataset = full_dataset.take(train_size)
test_dataset = full_dataset.skip(train_size)
val_dataset = test_dataset.skip(test_size)
test_dataset = test_dataset.take(test_size)

In [None]:
# LSTM basic architectures --> #TODO import from utils API
tf.keras.backend.clear_session()# para evitar que entrenamientos annteriores afecten
basic_lstm = basic_lstm(8, window_size, dropout=0.5)
basic_lstm.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=7e-6), 
                          loss='binary_crossentropy',metrics=['accuracy'])
basic_lstm.summary()

In [None]:
EPOCHS = 10
#EVALUATION_INTERVAL = 20

#history = basic_lstm.fit(train_dataset, epochs=EPOCHS, steps_per_epoch=EVALUATION_INTERVAL,
#                      validation_data=val_dataset, validation_steps=50, callbacks=get_callbacks(), verbose=0)
history = basic_lstm.fit(train_dataset, validation_data=val_dataset, epochs=EPOCHS, callbacks=get_callbacks(), verbose=0)
plot_report(history)