In [1]:
# limit gpu memory

import tensorflow as tf
# detect GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    # to limit TensorFlow to a specific set of GPUs
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU') # Set the list of visible devices
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

`tf.config.experimental.list_physical_devices(device_type=None)`
* Return a list of physical devices visible to the runtime.
* Physical devices are hardware devices locally present on the current machine. By default all discovered CPU and GPU devices are considered visible. The `list_physical_devices` allows querying the hardware prior to runtime initialization.

`tf.config.experimental.list_logical_devices(device_type=None)`
* Return a list of logical devices created by runtime.
* Logical devices may correspond to physical devices or remote devices in the cluster. Operations and tensors may be placed on these devices by using the `name` of the LogicalDevice.

`tf.config.list_physical_devices('GPU')` to confirm that TensorFlow is using the GPU.

The above block is to limit GPU memory growth


More details about GPU
https://tensorflow.google.cn/guide/gpu

In [5]:
from tensorflow.keras.layers import Flatten, Dense, Dropout, Activation, Input, LSTM, Reshape, Conv2D, MaxPooling2D

In [None]:
# load packages
import pandas as pd
import pickle
import numpy as np
from tensorflow import keras
from keras import backend as K
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Flatten, Dense, Dropout, Activation, Input, LSTM, Reshape, Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.layers.advanced_activations import LeakyReLU

from keras.utils import np_utils
import matplotlib.pyplot as plt

# set random seeds
np.random.seed(1)
tf.random.set_seed(2)

In [None]:
# # Data preparation

def prepare_x(data):
    df1 = data[:40, :].T
    return np.array(df1)

def get_label(data):
    lob = data[-5:, :].T
    return lob

def data_classification(X, Y, T):
    [N, D] = X.shape
    df = np.array(X)

    dY = np.array(Y)

    dataY = dY[T - 1:N]

    dataX = np.zeros((N - T + 1, T, D))
    for i in range(T, N + 1):
        dataX[i - T] = df[i - T:i, :]

    return dataX.reshape(dataX.shape + (1,)), dataY

In [None]:
# please change the data_path to your local path
data_path = '/users/b152278'

dec_train = np.loadtxt(data_path + '/Train_Dst_NoAuction_DecPre_CF_7.txt')
dec_test1 = np.loadtxt(data_path + '/Test_Dst_NoAuction_DecPre_CF_7.txt')
dec_test2 = np.loadtxt(data_path + '/Test_Dst_NoAuction_DecPre_CF_8.txt')
dec_test3 = np.loadtxt(data_path + '/Test_Dst_NoAuction_DecPre_CF_9.txt')
dec_test = np.hstack((dec_test1, dec_test2, dec_test3))

# extract limit order book data from the FI-2010 dataset
train_lob = prepare_x(dec_train)
test_lob = prepare_x(dec_test)

# extract label from the FI-2010 dataset
train_label = get_label(dec_train)
test_label = get_label(dec_test)

# prepare training data. We feed past 100 observations into our algorithms and choose the prediction horizon. 
trainX_CNN, trainY_CNN = data_classification(train_lob, train_label, T=100)
trainY_CNN = trainY_CNN[:,3] - 1
trainY_CNN = np_utils.to_categorical(trainY_CNN, 3)

# prepare test data.
testX_CNN, testY_CNN = data_classification(test_lob, test_label, T=100)
testY_CNN = testY_CNN[:,3] - 1

testY_CNN = np_utils.to_categorical(testY_CNN, 3)

In [96]:
# to find the shape at each step
input_lmd = Input(shape=(100, 40, 1))

In [97]:
conv_first1 = Conv2D(32, (1, 2), strides=(1, 2))(input_lmd)
conv_first1.shape

In [100]:
conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
conv_first1.shape

In [107]:
conv_first1 = Conv2D(32, (1, 2), strides=(1, 2))(conv_first1)
conv_first1.shape

TensorShape([None, 100, 10, 32])

In [108]:
conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
conv_first1.shape

TensorShape([None, 100, 10, 32])

In [109]:
conv_first1 = Conv2D(32, (1, 10))(conv_first1)
conv_first1.shape

TensorShape([None, 100, 1, 32])

In [110]:
conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
conv_first1.shape

TensorShape([None, 100, 1, 32])

In [111]:
convsecond_1 = Conv2D(64, (1, 1), padding='same')(conv_first1)
convsecond_1 = tf.keras.layers.LeakyReLU(alpha=0.01)(convsecond_1)
convsecond_1 = Conv2D(64, (3, 1), padding='same')(convsecond_1)
convsecond_1 = tf.keras.layers.LeakyReLU(alpha=0.01)(convsecond_1)
convsecond_1.shape

TensorShape([None, 100, 1, 64])

In [126]:
conv_first1.shape

TensorShape([None, 100, 1, 32])

In [127]:
convsecond_2 = Conv2D(64, (1, 1), padding='same')(conv_first1)
convsecond_2 = tf.keras.layers.LeakyReLU(alpha=0.01)(convsecond_2)
convsecond_2 = Conv2D(64, (5, 1), padding='same')(convsecond_2)
convsecond_2 = tf.keras.layers.LeakyReLU(alpha=0.01)(convsecond_2)

In [128]:
convsecond_2.shape

TensorShape([None, 100, 1, 64])

In [129]:
convsecond_3 = MaxPooling2D((3, 1), strides=(1, 1), padding='same')(conv_first1)
convsecond_3 = Conv2D(64, (1, 1), padding='same')(convsecond_3)
convsecond_3 = tf.keras.layers.LeakyReLU(alpha=0.01)(convsecond_3)
convsecond_3.shape

TensorShape([None, 100, 1, 64])

In [131]:
convsecond_output = tf.keras.layers.concatenate([convsecond_1, convsecond_2, convsecond_3], axis=3)
convsecond_output.shape

TensorShape([None, 100, 1, 192])

In [133]:
convsecond_output.shape[1]

100

In [134]:
convsecond_output.shape[3]

192

In [137]:
conv_reshape = Reshape((int(convsecond_output.shape[1]), int(convsecond_output.shape[3])))(convsecond_output)

In [138]:
conv_lstm = LSTM(64)(conv_reshape)
conv_lstm.shape

In [140]:
out = Dense(3, activation='softmax')(conv_lstm)
out.shape

In [None]:
# # Model Architecture
# 
# Please find the detailed discussion of our model architecture in our paper.
def create_deeplob(T, NF, number_of_lstm):
    input_lmd = Input(shape=(T, NF, 1))

    # build the convolutional block
    conv_first1 = Conv2D(32, (1, 2), strides=(1, 2))(input_lmd) # (batch_size, 100, 20, 32)
    conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)

    conv_first1 = Conv2D(32, (1, 2), strides=(1, 2))(conv_first1) # (batch_size, 100, 10, 32)
    conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)

    conv_first1 = Conv2D(32, (1, 10))(conv_first1) # (batch_size, 100, 1, 32)
    conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv_first1)

    # build the inception module
    convsecond_1 = Conv2D(64, (1, 1), padding='same')(conv_first1)# (batch_size, 100, 1, 64)
    convsecond_1 = tf.keras.layers.LeakyReLU(alpha=0.01)(convsecond_1)
    convsecond_1 = Conv2D(64, (3, 1), padding='same')(convsecond_1)
    convsecond_1 = tf.keras.layers.LeakyReLU(alpha=0.01)(convsecond_1)

    convsecond_2 = Conv2D(64, (1, 1), padding='same')(conv_first1)# (batch_size, 100, 1, 64)
    convsecond_2 = tf.keras.layers.LeakyReLU(alpha=0.01)(convsecond_2)
    convsecond_2 = Conv2D(64, (5, 1), padding='same')(convsecond_2)
    convsecond_2 = tf.keras.layers.LeakyReLU(alpha=0.01)(convsecond_2)

    convsecond_3 = MaxPooling2D((3, 1), strides=(1, 1), padding='same')(conv_first1) # (batch_size, 100, 1, 64)
    convsecond_3 = Conv2D(64, (1, 1), padding='same')(convsecond_3)
    convsecond_3 = tf.keras.layers.LeakyReLU(alpha=0.01)(convsecond_3)

    convsecond_output = tf.keras.layers.concatenate([convsecond_1, convsecond_2, convsecond_3], axis=3) # (batch_size, 100, 1, 192)
    conv_reshape = Reshape((int(convsecond_output.shape[1]), int(convsecond_output.shape[3])))(convsecond_output)  # (batch_size, 100, 192)

    # build the last LSTM layer
    conv_lstm = LSTM(number_of_lstm)(conv_reshape) # (batch_size, 64)

    # build the output layer
    out = Dense(3, activation='softmax')(conv_lstm) # (batch_size, 3)
    model = Model(inputs=input_lmd, outputs=out)
    adam = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

    return model


### Keras layers API

https://keras.io/api/layers/

#### Input object

`Input(shape=None)` 
* `Input()` is used to instantiate a Keras tensor. A Keras tensor is a symbolic tensor-like object, which we augment with certain attributes that allow us to build a Keras model just by knowing the inputs and outputs of the model.
   * `shape`: A shape tuple (integers), not including the batch size.

https://keras.io/api/layers/core_layers/input/

In [77]:
x = tf.constant([[[1., 2., 3.],
                 [4., 5., 6.],
                 [7., 8., 9.]],
                 
                 [[1., 2., 3.],
                 [4., 5., 6.],
                 [7., 8., 9.]]])

In [80]:
x=tf.reshape(x, [2, 3, 3, 1])

In [81]:
x

<tf.Tensor: id=1323, shape=(2, 3, 3, 1), dtype=float32, numpy=
array([[[[1.],
         [2.],
         [3.]],

        [[4.],
         [5.],
         [6.]],

        [[7.],
         [8.],
         [9.]]],


       [[[1.],
         [2.],
         [3.]],

        [[4.],
         [5.],
         [6.]],

        [[7.],
         [8.],
         [9.]]]], dtype=float32)>

In [90]:
x=tf.keras.layers.Reshape([3,3,1])(x)

In [91]:
x

<tf.Tensor: id=1397, shape=(2, 3, 3, 1), dtype=float32, numpy=
array([[[[1.],
         [2.],
         [3.]],

        [[4.],
         [5.],
         [6.]],

        [[7.],
         [8.],
         [9.]]],


       [[[1.],
         [2.],
         [3.]],

        [[4.],
         [5.],
         [6.]],

        [[7.],
         [8.],
         [9.]]]], dtype=float32)>

#### Conv2D layer

`tf.keras.layers.Conv2D(filters, kernel_size, strides=(1, 1), padding='valid')`
* 2D convolution layer.
* This layer creates a convolution kernel that is convolved with the layer input to produce a tensor of outputs.
* `filters`: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution).
* `kernel_size`: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window.
* `strides`: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width.
* `padding`: one of `"valid"` or `"same"` (case-insensitive). 

https://keras.io/api/layers/convolution_layers/convolution2d/

In [147]:
tf.random.set_seed(2)
tf.keras.layers.Conv2D(16, (1,3), strides=(1, 1), padding='same')(x)

<tf.Tensor: id=3000, shape=(2, 3, 3, 16), dtype=float32, numpy=
array([[[[ 0.02256036, -0.04763192, -0.37105578,  0.10203108,
           0.5915518 , -0.4305753 ,  0.3330405 , -0.59057033,
           0.80231094,  0.42239928,  0.14835277,  0.75566375,
          -0.41989833,  0.70394146,  0.21496657,  0.0541499 ],
         [-0.15648721, -0.23902962, -0.788972  , -0.09846756,
           1.3815429 , -0.44101706,  0.5103389 , -1.2312247 ,
           1.6579247 ,  0.807556  ,  0.6599337 ,  1.3844982 ,
          -0.3216521 ,  1.0579752 ,  0.10667959,  0.3310047 ],
         [-0.17307481, -0.90741163, -0.6394613 , -1.1054143 ,
           1.6324292 , -0.14045012, -0.11338243, -1.1495548 ,
           1.3848704 ,  0.54594636,  1.4275559 ,  0.79161835,
           0.8729454 ,  0.21263716, -0.99112236,  0.95702004]],

        [[ 0.21208641, -0.54826593, -1.0586529 , -0.19671175,
           1.9618788 , -1.4890447 ,  0.73139715, -1.8205382 ,
           2.362743  ,  1.2045224 ,  0.785442  ,  2.1063693 ,
 

In [124]:
tf.keras.layers.Conv2D(16, (1,3), padding='same')(x).shape

TensorShape([2, 3, 3, 16])

In [122]:
tf.keras.layers.Conv2D(16, (1,3), strides=(2,2),padding='same')(x).shape

TensorShape([2, 2, 2, 16])

In [125]:
tf.keras.layers.Conv2D(16, (1,3), padding='valid')(x).shape

TensorShape([2, 3, 1, 16])

In [123]:
tf.keras.layers.Conv2D(16, (1,3), strides=(2,2),padding='valid')(x).shape

TensorShape([2, 2, 1, 16])

In [95]:
tf.keras.layers.Conv2D(16, (1,3), strides=(1, 1), padding='same')(x).shape

TensorShape([2, 3, 3, 16])

#### LeakyReLU layer

`tf.keras.layers.LeakyReLU(alpha=0.3, **kwargs)`
* Leaky version of a Rectified Linear Unit. It allows a small gradient when the unit is not active.
* `alpha`: Float >= 0. Negative slope coefficient. Default to 0.3.


https://keras.io/api/layers/activation_layers/leaky_relu/

In [146]:
tf.random.set_seed(2)
tf.keras.layers.Conv2D(16, (1,3), strides=(1, 1), padding='same')(x)

<tf.Tensor: id=2973, shape=(2, 3, 3, 16), dtype=float32, numpy=
array([[[[ 0.02256036, -0.04763192, -0.37105578,  0.10203108,
           0.5915518 , -0.4305753 ,  0.3330405 , -0.59057033,
           0.80231094,  0.42239928,  0.14835277,  0.75566375,
          -0.41989833,  0.70394146,  0.21496657,  0.0541499 ],
         [-0.15648721, -0.23902962, -0.788972  , -0.09846756,
           1.3815429 , -0.44101706,  0.5103389 , -1.2312247 ,
           1.6579247 ,  0.807556  ,  0.6599337 ,  1.3844982 ,
          -0.3216521 ,  1.0579752 ,  0.10667959,  0.3310047 ],
         [-0.17307481, -0.90741163, -0.6394613 , -1.1054143 ,
           1.6324292 , -0.14045012, -0.11338243, -1.1495548 ,
           1.3848704 ,  0.54594636,  1.4275559 ,  0.79161835,
           0.8729454 ,  0.21263716, -0.99112236,  0.95702004]],

        [[ 0.21208641, -0.54826593, -1.0586529 , -0.19671175,
           1.9618788 , -1.4890447 ,  0.73139715, -1.8205382 ,
           2.362743  ,  1.2045224 ,  0.785442  ,  2.1063693 ,
 

In [145]:
tf.random.set_seed(2)
tf.keras.layers.LeakyReLU(alpha=0)(tf.keras.layers.Conv2D(16, (1,3), strides=(1, 1), padding='same')(x))

<tf.Tensor: id=2946, shape=(2, 3, 3, 16), dtype=float32, numpy=
array([[[[0.02256036, 0.        , 0.        , 0.10203108, 0.5915518 ,
          0.        , 0.3330405 , 0.        , 0.80231094, 0.42239928,
          0.14835277, 0.75566375, 0.        , 0.70394146, 0.21496657,
          0.0541499 ],
         [0.        , 0.        , 0.        , 0.        , 1.3815429 ,
          0.        , 0.5103389 , 0.        , 1.6579247 , 0.807556  ,
          0.6599337 , 1.3844982 , 0.        , 1.0579752 , 0.10667959,
          0.3310047 ],
         [0.        , 0.        , 0.        , 0.        , 1.6324292 ,
          0.        , 0.        , 0.        , 1.3848704 , 0.54594636,
          1.4275559 , 0.79161835, 0.8729454 , 0.21263716, 0.        ,
          0.95702004]],

        [[0.21208641, 0.        , 0.        , 0.        , 1.9618788 ,
          0.        , 0.73139715, 0.        , 2.362743  , 1.2045224 ,
          0.785442  , 2.1063693 , 0.        , 1.9162372 , 0.11773005,
          0.47847   ],
  

#### MaxPooling2D layer

`tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding="valid")`
* Max pooling operation for 2D spatial data.
* Downsamples the input along its spatial dimensions (height and width) by taking the maximum value over an input window (of size defined by `pool_size`) for each channel of the input. The window is shifted by strides along each dimension.
* The resulting output, when using the `"valid"` padding option, has a spatial shape (number of rows or columns) of: `output_shape = math.floor((input_shape - pool_size) / strides) + 1 (when input_shape >= pool_size)`
* The resulting output shape when using the `"same"` padding option is: `output_shape = math.floor((input_shape - 1) / strides) + 1`
* `pool_size`: integer or tuple of 2 integers, window size over which to take the maximum. (2, 2) will take the max value over a 2x2 pooling window. If only one integer is specified, the same window length will be used for both dimensions.
* `strides`: Integer, tuple of 2 integers, or None. Strides values. Specifies how far the pooling window moves for each pooling step. If None, it will default to pool_size.
* `padding`: One of `"valid"` or `"same"` (case-insensitive). `"valid"` means no padding. `"same"` results in padding evenly to the left/right or up/down of the input such that output has the same height/width dimension as the input.

https://keras.io/api/layers/pooling_layers/max_pooling2d/

In [168]:
 x = tf.constant([[1., 2., 3.],
                  [4., 5., 6.],
                  [7., 8., 9.]])
x

<tf.Tensor: id=3094, shape=(3, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.],
       [7., 8., 9.]], dtype=float32)>

In [170]:
x=tf.reshape(x, [1, 3, 3, 1])
x

<tf.Tensor: id=3098, shape=(1, 3, 3, 1), dtype=float32, numpy=
array([[[[1.],
         [2.],
         [3.]],

        [[4.],
         [5.],
         [6.]],

        [[7.],
         [8.],
         [9.]]]], dtype=float32)>

In [172]:
max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='valid')
max_pool_2d(x)

<tf.Tensor: id=3100, shape=(1, 2, 2, 1), dtype=float32, numpy=
array([[[[5.],
         [6.]],

        [[8.],
         [9.]]]], dtype=float32)>

In [171]:
max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same')
max_pool_2d(x)

<tf.Tensor: id=3099, shape=(1, 3, 3, 1), dtype=float32, numpy=
array([[[[5.],
         [6.],
         [6.]],

        [[8.],
         [9.],
         [9.]],

        [[8.],
         [9.],
         [9.]]]], dtype=float32)>

#### Concatenate layer

`tf.keras.layers.concatenate(axis=-1, **kwargs)`

* Layer that concatenates a list of inputs.
* It takes as input a list of tensors, all of the same shape except for the concatenation axis, and returns a single tensor that is the concatenation of all inputs.

https://keras.io/api/layers/merging_layers/concatenate/

In [23]:
x = np.arange(20).reshape(2, 2, 5)
print(x)

[[[ 0  1  2  3  4]
  [ 5  6  7  8  9]]

 [[10 11 12 13 14]
  [15 16 17 18 19]]]


In [24]:
y = np.arange(20, 30).reshape(2, 1, 5)
print(y)

[[[20 21 22 23 24]]

 [[25 26 27 28 29]]]


In [28]:
tf.keras.layers.Concatenate(axis=1)([x, y])

<tf.Tensor: id=1246, shape=(2, 3, 5), dtype=int32, numpy=
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [20, 21, 22, 23, 24]],

       [[10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [25, 26, 27, 28, 29]]])>

#### Reshape layer

`tf.keras.layers.Reshape(target_shape)`
* Layer that reshapes inputs into the given shape.
* Input shape: Arbitrary, although all dimensions in the input shape must be known/fixed. Use the keyword argument `input_shape` (tuple of integers, does not include the samples/batch size axis) when using this layer as the first layer in a model.
* `Output shape: (batch_size,) + target_shape`

https://keras.io/api/layers/reshaping_layers/reshape/

#### LSTM layer

`LSTM(units)`

* `units`. Positive integer, dimensionality of the output space.

https://stats.stackexchange.com/questions/405163/what-is-the-output-of-an-lstm

https://keras.io/api/layers/recurrent_layers/lstm/

https://colab.research.google.com/github/kmkarakaya/ML_tutorials/blob/master/LSTM_Understanding_Output_Types.ipynb

In [173]:
tf.random.set_seed(2)
inputs = tf.random.normal([32, 10, 8])
lstm = tf.keras.layers.LSTM(4)
output = lstm(inputs)

In [174]:
output.shape

TensorShape([32, 4])

In [175]:
output

<tf.Tensor: id=3501, shape=(32, 4), dtype=float32, numpy=
array([[ 0.09413459, -0.17695509, -0.18305953, -0.31181994],
       [-0.08445209,  0.2748353 ,  0.12777619,  0.0444978 ],
       [ 0.03007457, -0.13513406, -0.21724427, -0.04993619],
       [ 0.3366659 , -0.03555514, -0.15768133,  0.03883805],
       [ 0.44277942, -0.11839138, -0.1934308 ,  0.09702534],
       [-0.22052634,  0.23573917,  0.09343931, -0.24706258],
       [-0.15747093,  0.04004642,  0.1250249 , -0.2187701 ],
       [ 0.11924846, -0.11971949, -0.02756443,  0.31493527],
       [ 0.07165063, -0.21045978, -0.1805625 ,  0.09383859],
       [ 0.42524016, -0.15774517, -0.04876529,  0.1422765 ],
       [ 0.147457  , -0.02021897, -0.04197685,  0.470117  ],
       [-0.08220198,  0.02921901,  0.19284774, -0.25388905],
       [-0.4847142 ,  0.1034104 ,  0.03589425, -0.7184677 ],
       [-0.32839295,  0.16934715,  0.19576101, -0.07985481],
       [ 0.38353828, -0.23423022, -0.08337194,  0.04048729],
       [ 0.32051775,  0.108

#### Dense layer

`tf.keras.layers.Dense(units, activation=None)`

* Just your regular densely-connected NN layer.
* `Dense` implements the operation: `output = activation(dot(input, kernel) + bias)` where `activation` is the element-wise activation function passed as the `activation` argument, `kernel` is a weights matrix created by the layer, and `bias` is a bias vector created by the layer (only applicable if `use_bias` is `True`).
* `units`: Positive integer, dimensionality of the output space.
* `activation`: Activation function to use. If you don't specify anything, no activation is applied (ie. `"linear"` activation: `a(x) = x`).



https://keras.io/api/layers/core_layers/dense/

In [176]:
x

<tf.Tensor: id=3098, shape=(1, 3, 3, 1), dtype=float32, numpy=
array([[[[1.],
         [2.],
         [3.]],

        [[4.],
         [5.],
         [6.]],

        [[7.],
         [8.],
         [9.]]]], dtype=float32)>

In [178]:
tf.random.set_seed(2)
tf.keras.layers.Dense(3)(x)

<tf.Tensor: id=3576, shape=(1, 3, 3, 3), dtype=float32, numpy=
array([[[[-0.8649089 , -0.08755267, -0.67385525],
         [-1.7298177 , -0.17510533, -1.3477105 ],
         [-2.5947266 , -0.262658  , -2.0215657 ]],

        [[-3.4596355 , -0.35021067, -2.695421  ],
         [-4.3245444 , -0.43776333, -3.3692763 ],
         [-5.189453  , -0.525316  , -4.0431314 ]],

        [[-6.0543623 , -0.61286867, -4.7169867 ],
         [-6.919271  , -0.70042133, -5.390842  ],
         [-7.7841797 , -0.787974  , -6.0646973 ]]]], dtype=float32)>

In [179]:
tf.random.set_seed(2)
tf.keras.layers.Dense(3, activation='softmax',kernel_initializer='ones')(x)

<tf.Tensor: id=3615, shape=(1, 3, 3, 3), dtype=float32, numpy=
array([[[[0.33333334, 0.33333334, 0.33333334],
         [0.33333334, 0.33333334, 0.33333334],
         [0.33333334, 0.33333334, 0.33333334]],

        [[0.33333334, 0.33333334, 0.33333334],
         [0.33333334, 0.33333334, 0.33333334],
         [0.33333334, 0.33333334, 0.33333334]],

        [[0.33333334, 0.33333334, 0.33333334],
         [0.33333334, 0.33333334, 0.33333334],
         [0.33333334, 0.33333334, 0.33333334]]]], dtype=float32)>

#### The Model class

Model groups layers into an object with training and inference features.
* `inputs`: The input(s) of the model: a `keras.Input` object or list of `keras.Input` objects.
* `outputs`: The output(s) of the model.
* `name`: String, the name of the model.
* Once the model is created, you can config the model with losses and metrics with `model.compile()`, train the model with `model.fit()`, or use the model to do prediction with `model.predict()`.
* `summary` method
    * `Model.summary()`
    * Prints a string summary of the network.
    
https://keras.io/api/models/model/

https://keras.io/api/models/

##### compile method

`Model.compile(optimizer="rmsprop", loss=None, metrics=None)`
* Configures the model for training.
* `optimizer`: String (name of optimizer) or optimizer instance. See `tf.keras.optimizers`.
* `loss`: Loss function. Maybe be a string (name of loss function), or a `tf.keras.losses.Loss` instance. See [`tf.keras.losses`](https://keras.io/api/losses/). A loss function is any callable with the signature `loss = fn(y_true, y_pred)`, where `y_true` are the ground truth values, and `y_pred` are the model's predictions. `y_true` should have shape `(batch_size, d0, .. dN)`. `y_pred` should have shape `(batch_size, d0, .. dN)`. The loss function should return a float tensor. 
* `metrics`: List of metrics to be evaluated by the model during training and testing. Each of this can be a string (name of a built-in function), function or a `tf.keras.metrics.Metric` instance. See `tf.keras.metrics`. Typically you will use `metrics=['accuracy']`. A function is any callable with the signature `result = fn(y_true, y_pred)`. To specify different metrics for different outputs of a multi-output model, you could also pass a dictionary, such as `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`. You can also pass a list to specify a metric or a list of metrics for each output, such as `metrics=[['accuracy'], ['accuracy', 'mse']]` or `metrics=['accuracy', ['accuracy', 'mse']]`.

https://keras.io/api/models/model_training_apis/#compile-method


##### fit method

`Model.fit(x=None,y=None,batch_size=None,epochs=1,verbose="auto", validation_data=None)`
* Trains the model for a fixed number of epochs (iterations on a dataset).
* `x`: Input data. It could be:
  - A Numpy array (or array-like), or a list of arrays
    (in case the model has multiple inputs).
  - A TensorFlow tensor, or a list of tensors
    (in case the model has multiple inputs).
  - A dict mapping input names to the corresponding array/tensors,
    if the model has named inputs.
  - A `tf.data` dataset. 
  - A generator or `keras.utils.Sequence`.
* `y`: Target data. 
    * Like the input data `x`, it could be either Numpy array(s) or TensorFlow tensor(s).
    * It should be consistent with `x` (you cannot have Numpy inputs and tensor targets, or inversely). 
    * If `x` is a dataset, generator, or `keras.utils.Sequence` instance, `y` should not be specified (since targets will be obtained from `x`).
* `batch_size`: Integer or `None`.
    Number of samples per gradient update.
    If unspecified, `batch_size` will default to 32.
    Do not specify the `batch_size` if your data is in the
    form of symbolic tensors, datasets,
    generators, or `keras.utils.Sequence` instances (since they generate
    batches).
* `epochs`: Integer. Number of epochs to train the model.
    An epoch is an iteration over the entire `x` and `y`
    data provided.
* `verbose`: 0, 1, or 2. Verbosity mode.
    * 0 = silent, 1 = progress bar, 2 = one line per epoch.
    * Note that the progress bar is not particularly useful when logged to a file, so verbose=2 is recommended when not running interactively (eg, in a production environment).

* `validation_data`: Data on which to evaluate the loss and any model metrics at the end of each epoch.
    * The model will not be trained on this data.

#### Adam

`tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07)`
* Optimizer that implements the Adam algorithm.
* Adam optimization is a stochastic gradient descent method that is based on adaptive estimation of first-order and second-order moments.
* `learning_rate`: The learning rate. Defaults to 0.001.
* `beta_1`: The exponential decay rate for the 1st moment estimates. Defaults to 0.9. 0<beta<1，usually close to 1.
* `beta_2`: The exponential decay rate for the 2nd moment estimates. Defaults to 0.999. 0<beta<1，usually close to 1.
* `epsilon`: A small constant for numerical stability. Float, epsilon > 0. It is a very small number to prevent any division by zero in the implementation

https://keras.io/api/optimizers/

https://arxiv.org/pdf/1412.6980v8.pdf

https://www.jianshu.com/p/aebcaf8af76e

https://venkat-rajgopal.github.io/Rectified-ADAM-optimizer/

https://stats.stackexchange.com/questions/265400/deep-learning-how-does-beta-1-and-beta-2-in-the-adam-optimizer-affect-its-lear

In [None]:
deeplob = create_deeplob(100,40,64)

# # Model Training

deeplob.fit(trainX_CNN, trainY_CNN, epochs=200, batch_size=32, verbose=2, validation_data=(testX_CNN, testY_CNN))

deeplob.save('paper5.model')

deeplob.summary()

#### links

Conv2d
https://www.leiphone.com/category/yanxishe/fhJ72ZPlmYr12CwV.html


https://blog.csdn.net/Zh_1999a/article/details/107526001

padding

https://stackoverflow.com/questions/37674306/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-t

Keras tutorial

https://victorzhou.com/blog/keras-cnn-tutorial/