# Test the full connected layer model

## Import necessary modules

In [1]:
# python built-in libraries
import datetime

In [2]:
# choose the tensorflow log level
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}

In [3]:
# import tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [4]:
# ohter packages
import numpy as np

import tfhelper.core as tfhelper

## Global Variables

In [5]:
# sampling
FEATURES_FILE = "./data/tensors/features_continous.npy"  # path to the file of the feature tensor
LABELS_FILE = "./data/tensors/labels_continous.npy"  # path to the file of the feature tensor
FRACTIONS = (0.8, 0.1, 0.1)  # train, validation, test
BATCH_SIZE = 256  # size of the batch
BUFFER_SIZE = BATCH_SIZE * 2  # size of the shuffle buffer
# training
LEARNING_RATE = 0.001  # starting learning rate
BETA1 = 0.9  # decay 1
BETA2 = 0.999  # decay 2
EPOCHS = 200  # number of epochs
# saving
TIME_STAMP = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")  # time stamp
SAVE_LOC = "./data/models/cnn_2_{}".format(TIME_STAMP)  # path to the folder to save the model
LOG_LOC = "./data/logs/fit/cnn_2_{}".format(TIME_STAMP)  # path to the log, if you change this, you also need to change it in the run_tensorboard

## Obtain data

The data is a 209 data point PDF. The label is a one-hot 2 dim vector. `10` means major phase >= threshold, `01` means major phase <= threshold.

In [6]:
def load_data():
    # load
    features, labels = np.load(FEATURES_FILE), np.load(LABELS_FILE)
    # sort the labels
    labels = np.sort(labels, axis=1)[:, ::-1]
    # shuffle
    n = features.shape[0]
    shuffled_idx = np.random.permutation(n)
    features, labels = features[shuffled_idx], labels[shuffled_idx]
    # split
    f0, f1, f2 = FRACTIONS
    i, j, k = round(f0 * n), round((f0 + f1) * n), round((f0 + f1 + f2) * n)
    train_data = tf.data.Dataset.from_tensor_slices((features[:i], labels[:i])).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
    valid_data = tf.data.Dataset.from_tensor_slices((features[i:j], labels[i:j])).batch(BATCH_SIZE)
    test_data = tf.data.Dataset.from_tensor_slices((features[j:k], labels[j:k])).batch(BATCH_SIZE)
    return train_data, valid_data, test_data

In [7]:
train_data, valid_data, test_data = load_data()

## Create the model

We use the logistric regression. It is a single layer with a softmax function.

In [8]:
def create_model():
    model = keras.Sequential()
    model.add(keras.Input(shape=(209,)))
    model.add(keras.layers.Reshape((209, 1)))
    model.add(keras.layers.Conv1D(32, 3, strides=1, activation='relu', padding="same"))
    model.add(keras.layers.Conv1D(32, 3, strides=1, activation='relu', padding="same"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPooling1D(2))
    model.add(keras.layers.Conv1D(64, 3, strides=1, activation='relu', padding="same"))
    model.add(keras.layers.Conv1D(64, 3, strides=1, activation='relu', padding="same"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPooling1D(2))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(512, activation='relu'))
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dense(32, activation='relu'))
    model.add(keras.layers.Dense(2, activation='softmax'))
    return model

In [9]:
model = create_model()

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 209, 1)            0         
_________________________________________________________________
conv1d (Conv1D)              (None, 209, 32)           128       
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 209, 32)           3104      
_________________________________________________________________
batch_normalization (BatchNo (None, 209, 32)           128       
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 104, 32)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 104, 64)           6208      
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 104, 64)           1

## Choose optimization method

In [11]:
model.compile(
    # Optimizer
    optimizer=keras.optimizers.Adam(LEARNING_RATE, BETA1, BETA2),
    # Loss function to minimize
    loss=keras.losses.MeanSquaredError(),
    # List of metrics to monitor
    metrics=[
        keras.metrics.CosineSimilarity()
    ],
)

## Train the model

In [12]:
# tensor board
tensorboard_callback = keras.callbacks.TensorBoard(
    log_dir=LOG_LOC, 
    histogram_freq=1
)

In [13]:
# early stopping to avoid over fitting
earlystopping_callback = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=50, 
    restore_best_weights=True
)

In [14]:
history = model.fit(
    x=train_data,
    epochs=EPOCHS,
    callbacks=[
        tensorboard_callback,
        earlystopping_callback
    ],
    validation_data=valid_data
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

## Evaluate the model

In [15]:
result = model.evaluate(
    x=test_data,
    return_dict=True
)



In [16]:
for name, val in result.items():
    print("{:20s} {:.4f}".format(name, val))

loss                 0.0090
cosine_similarity    0.9887


## Save the model

In [17]:
model.save(SAVE_LOC)

INFO:tensorflow:Assets written to: ./data/models/cnn_2_20210914-150241/assets
