In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import h5py
import logging
from os import path
from pandas import HDFStore
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import pandas as pd
from keras import regularizers
from utils import get_callbacks,get_callbacks_t,windowed_dataset,plot_report,build_mdpi,split_train_test_val
import tensorflow_docs.plots

%matplotlib inline

Using TensorFlow backend.


In [3]:
#Parameters
EPOCHS = 1000
INITIAL_EXAMPLES = 50
FEATURES = 4096
window_size = 515
shuffle_buffer = 40000
coeff = 22

In [4]:
#Load data
coeff = 17

#load source origin hdf5 file
root_path = "Z:/elekin"
rdo_root_path = path.join(root_path,"02-RESULTADOS/03-HANDWRITTING")
h5file = path.join(rdo_root_path, "00-OUTPUT/archimedean-")
h5filename = h5file + str(coeff) + ".h5"
hdf = HDFStore(h5filename)

raw_df = hdf['results/residues/rd'].T
#normalization
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_df = pd.DataFrame(scaler.fit_transform(raw_df))
#get labels
scaled_df['labels'] = hdf.get('source/labels').values
#shuffle dataset
raw_dataset = scaled_df.sample(frac=1, random_state=2020)

In [5]:
#Feature engineering

#preprocess labels to get the right data type for training
labels = raw_dataset["labels"].values.astype('int8')
timeseries = raw_dataset.loc[:, raw_dataset.columns != 'labels'].values.astype("float32")

#data augmentation, build features windows
for i,features in enumerate(timeseries):
    new = windowed_dataset(features,window_size,window_size,labels[i])
    if i>0:
        dataset = tf.data.Dataset.concatenate(dataset,new)
    else:
        dataset = new    

In [6]:
DATASET_SIZE = int(len(list(iter(dataset.cache()))))

In [None]:
# Very important to guarantee good shuffle data. Get some label values to review shuffle effect
for x,y in dataset.take(15):
    #print("x= ",x)
    print("y= ",y)

In [7]:
# Build datasets splits
train, val, test = split_train_test_val(dataset, DATASET_SIZE, train_ratio=0.7, 
                                                                val_ratio=0.15, test_ratio=0.15, shuffle_buffer=shuffle_buffer)
train_dataset = train.batch(16).prefetch(1)
val_dataset = val.batch(8).prefetch(1)
test_dataset = test.batch(8).prefetch(1)

In [8]:
# Very important to guarantee good shuffle data. Get some label values to review shuffle effect
train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)
train_size, val_size, test_size

(125369, 26865, 26865)

In case we want to validate training splits execute cell below

In [None]:
len(list(iter(train_dataset))),len(list(iter(val_dataset))),len(list(iter(test_dataset)))

In [None]:
# Very important to guarantee good shuffle data. Get some label values to review shuffle effect
for x,y in train_dataset.take(2):
    #print("x= ",x)
    print("y= ",y)

In [11]:
# Build Neural Network architecture and compile
mdpi = build_mdpi(window_size)# build cnn1d architecture
#mdpi.compile(optimizer=tf.keras.optimizers.SGD(lr=1e-1, momentum=0.9),
mdpi.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=7e-8, epsilon=None, decay=1e-4, beta_1=0.9, beta_2=0.99, amsgrad=False),
                          loss='binary_crossentropy',metrics=['accuracy'])#compile
#mdpi.summary()#review the architecture

In [12]:
## Training
tf.keras.backend.clear_session()# clean previous training cached data
histories = {}
#histories['mdpi'] = mdpi.fit(train_dataset, validation_data=val_dataset, epochs=EPOCHS, callbacks=get_callbacks('mdpi',10), verbose=0, use_multiprocessing=True,)
histories['mdpi/local'] = mdpi.fit(train_dataset.cache(), validation_data=val_dataset.cache(), epochs=EPOCHS, callbacks=get_callbacks_t('mdpi/local',10), verbose=0)

W0107 19:59:55.108215 12512 callbacks.py:244] Method (on_train_batch_end) is slow compared to the batch update (0.118868). Check your callbacks.



Epoch: 0, accuracy:0.5526,  loss:0.7161,  val_accuracy:0.4801,  val_loss:0.7214,  
..........
Epoch: 10, accuracy:0.5583,  loss:0.7103,  val_accuracy:0.4757,  val_loss:0.7210,  
..........
Epoch: 20, accuracy:0.5586,  loss:0.7102,  val_accuracy:0.4746,  val_loss:0.7209,  
..........
Epoch: 30, accuracy:0.5583,  loss:0.7090,  val_accuracy:0.4738,  val_loss:0.7208,  
..

KeyboardInterrupt: 

In [None]:
#Evaluation: plot training & validation accuracy values
plot_report(histories['mdpi'])

In [None]:
print('\n# Evaluate on test data')
results = mdpi.evaluate(test_dataset)
print('test loss, test acc:', results)