In [None]:
#pip install git+https://github.com/tensorflow/docs

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import h5py
import logging
from os import path
from pandas import HDFStore
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import shutil
import tempfile
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers

# Load matplotlib inline extension
%matplotlib inline
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [2]:
logs_base_dir = pathlib.Path("./fmri")/"tensorboard_logs"
shutil.rmtree(logs_base_dir, ignore_errors=True)

In [3]:
%tensorboard --logdir "./fmri/tensorboard_logs" --host 0.0.0.0

In [4]:
#Load data from hdf5 file

# origin h5 file location, load and shuffle timeseries
h5_file = '/data/elekin/data/results/00-EXTRACCION-CEREBELO/elekin_fmri_ts_pandas.hdf5'

#windows
#h5_file = "Z:/elekin/02-RESULTADOS/00-EXTRACCION-CEREBELO/elekin_fmri_ts_pandas.hdf5"

tr_key = 'cbl/dataset'#transformed dataset (id,time series matrix 158x28, label)
dataset = pd.read_hdf(h5_file, tr_key)

num_samples = dataset.shape[0]
features = np.stack(dataset['features'].values)
labels = dataset['label'].values.astype('int8')

features.shape, labels.shape

((101, 158, 28), (101,))

In [5]:
#build tensorflow dataset TODO adjust float precission to GPU requirements to see the effect
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
dataset

<TensorSliceDataset shapes: ((158, 28), ()), types: (tf.float32, tf.int8)>

In [6]:
DATASET_SIZE = int(len(list(iter(dataset.cache()))))
STEPS_PER_EPOCH = 1 #gradient descent regularization
FEATURES = features.shape[1] #number of volumes
CHANNELS = features.shape[2] #number of time series
seed=38 #to keep same random states

In [7]:
#split dataset to training and test
train_size = int(0.67 * DATASET_SIZE)
test_size = int(0.33 * DATASET_SIZE)
train_size, test_size

(67, 33)

In [8]:
minibatch_size = 8
np.random.seed(seed) #seting random state
shuffle_buffer = 51 #buffer for splitting
full_dataset = dataset.shuffle(shuffle_buffer)
train_dataset = full_dataset.take(train_size).batch(minibatch_size).prefetch(1).cache() #caching prefetched minibatch
test_dataset = full_dataset.skip(train_size).batch(minibatch_size).prefetch(1).cache() #caching prefetched minibatch

In [17]:
for i,y in enumerate(test_dataset):
    print(i)

0
1
2
3
4


In [9]:
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(0.001,decay_steps=STEPS_PER_EPOCH*1000,
                                                             decay_rate=1,staircase=False)

def get_optimizer():
    return tf.keras.optimizers.Adam(lr_schedule)

In [10]:
#Early stop configuration
earlystop_callback = EarlyStopping( monitor='val_accuracy', min_delta=0.0001,patience=200)
training_earlystop_callback = EarlyStopping( monitor='accuracy', min_delta=0.0001, patience=200)

def get_callbacks(name):
    return [tfdocs.modeling.EpochDots(), earlystop_callback, tf.keras.callbacks.TensorBoard(logs_base_dir/name)]

def compile_and_fit(model, name, optimizer=None, max_epochs=1000):
    if optimizer is None:
        optimizer = get_optimizer()
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    history = model.fit(train_dataset, use_multiprocessing=True, validation_data=test_dataset, epochs=max_epochs, callbacks=
                        get_callbacks(name), verbose=0)
    return history

In [17]:
lstm = tf.keras.models.Sequential([
 tf.keras.layers.LSTM(8, activation=tf.nn.tanh,input_shape=[FEATURES, CHANNELS]),
 tf.keras.layers.Dense(16, activation=tf.nn.relu),
 tf.keras.layers.Dense(8, activation=tf.nn.relu),
 tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)])

In [18]:
size_histories = {}

In [19]:
numb='lstm/108'

In [20]:
tf.random.set_seed(seed)
tf.keras.backend.clear_session()# para evitar que entrenamientos annteriores afecten
size_histories['lstm/tiny/'+numb] = compile_and_fit(lstm, "fmri/lstm/tiny/"+numb, optimizer=None, max_epochs=10000)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 8)                 1184      
_________________________________________________________________
dense (Dense)                (None, 16)                144       
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 136       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 9         
Total params: 1,473
Trainable params: 1,473
Non-trainable params: 0
_________________________________________________________________

Epoch: 0, accuracy:0.5522,  loss:0.6937,  val_accuracy:0.5588,  val_loss:0.6908,  
....................................................................................................
Epoch: 100, accuracy:0.9552,  loss:0.1934,  val_accuracy:0.8235,  v

In [16]:
# Evaluate the model on the test data using `evaluate`
print('test acc:', max(size_histories['lstm/tiny/'+numb].history["val_accuracy"]))

test acc: 0.7941176295280457


## PENDING TO REVIEW ------------------------

## Recurrent Neural Network LSTM

Many models train better if you gradually reduce the learning rate during training. 
Use optimizers.schedules to reduce the learning rate over time:

In [None]:
STEPS_PER_EPOCH = 1
FEATURES = 158
CHANNELS = 28

lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  1e-6,
  decay_steps=STEPS_PER_EPOCH*1000,
  decay_rate=1,
  staircase=False)

def get_optimizer():
    return tf.keras.optimizers.Adam(lr_schedule)

In [None]:
size_histories = {}

In [None]:
tf.keras.backend.clear_session()# para evitar que entrenamientos annteriores afecten
lstm = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(24, activation=tf.nn.tanh,input_shape=[FEATURES, CHANNELS]),
    tf.keras.layers.Dense(16, activation=tf.nn.relu),
    tf.keras.layers.Dense(8, activation=tf.nn.relu),
    tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)])
size_histories['lstm/tiny'] = compile_and_fit(lstm, "lstm/tiny", optimizer=None, max_epochs=10000)

In [None]:
tf.random.set_seed(seed) # establecemos la semilla para tensorflow
tf.keras.backend.clear_session()# para evitar que entrenamientos annteriores afecten
small = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(256, return_sequences=True, activation=tf.nn.tanh,input_shape=[FEATURES, CHANNELS]),
    tf.keras.layers.LSTM(256, activation=tf.nn.tanh),
    tf.keras.layers.Dense(16, activation=tf.nn.relu),
    tf.keras.layers.Dense(8, activation=tf.nn.relu),
    tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)])
size_histories['lstm/small'] = compile_and_fit(small, "lstm/small", optimizer=None, max_epochs=10000)

In [None]:
plotter_loss = tfdocs.plots.HistoryPlotter(metric = 'loss', smoothing_std=10)
plotter_loss.plot(size_histories)
plt.ylim([0., 2.15])

In [None]:
plotter_acc = tfdocs.plots.HistoryPlotter(metric = 'accuracy', smoothing_std=10)
plotter_acc.plot(size_histories)
plt.ylim([0.3, 1.02])

## Bidirectional

In [None]:
shutil.rmtree(logs_base_dir/'lstm/regularizers/tiny', ignore_errors=True)
shutil.copytree(logs_base_dir/'lstm/tiny', logs_base_dir/'lstm/regularizers/tiny')
regularizer_histories = {}
#regularizer_histories['tiny'] = size_histories['lstm/tiny']

In [None]:
tf.random.set_seed(seed) # establecemos la semilla para tensorflow
tf.keras.backend.clear_session()# para evitar que entrenamientos annteriores afecten
lstm = tf.keras.models.Sequential([
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(256, return_sequences=True, activation=tf.nn.relu),input_shape=[FEATURES, CHANNELS]),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(256, activation=tf.nn.relu)),
    tf.keras.layers.Dense(16, activation=tf.nn.relu),
    tf.keras.layers.Dense(8, activation=tf.nn.relu),
    tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)])
regularizer_histories['bi-tiny'] = compile_and_fit(lstm, "lstm/regularizers/bi-tiny", optimizer=None, max_epochs=10000)

### Regularization

In [None]:
# Drop out
dr=0.2
tf.random.set_seed(seed) # establecemos la semilla para tensorflow
tf.keras.backend.clear_session()# para evitar que entrenamientos annteriores afecten
lstm = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(256, return_sequences=True, activation=tf.nn.tanh,input_shape=[FEATURES, CHANNELS]),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.LSTM(128, activation=tf.nn.tanh),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(16, activation=tf.nn.relu),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(8, activation=tf.nn.relu),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)],)
regularizer_histories['drop-tiny'] = compile_and_fit(lstm, "lstm/regularizers/drp-tiny", optimizer=None, max_epochs=10000)

In [None]:
# Drop out
dr=0.25
lr=1e-6
tf.random.set_seed(seed) # establecemos la semilla para tensorflow
tf.keras.backend.clear_session()# para evitar que entrenamientos annteriores afecten
lstm = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(24, activation=tf.nn.tanh, kernel_regularizer=regularizers.l2(lr), input_shape=[FEATURES, CHANNELS]),
    tf.keras.layers.Dense(16, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(lr)),
    tf.keras.layers.Dense(8, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(lr)),
    tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, kernel_regularizer=regularizers.l2(lr))],)
regularizer_histories['kernel'] = compile_and_fit(lstm, "lstm/regularizers/kernel-reg", optimizer=None, max_epochs=10000)

In [None]:
plotter_loss = tfdocs.plots.HistoryPlotter(metric = 'loss', smoothing_std=10)
plotter_loss.plot(regularizer_histories)
plt.ylim([0., 2.15])

In [None]:
plotter_acc = tfdocs.plots.HistoryPlotter(metric = 'accuracy', smoothing_std=10)
plotter_acc.plot(regularizer_histories)
plt.ylim([0.3, 1.02])