# Resources
https://towardsdatascience.com/the-time-series-transformer-2a521a0efad3

https://github.com/ntakouris/timeseries-pretrain-tests/blob/main/transformers.py

# Setup

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import pickle
import time
from sklearn.preprocessing import StandardScaler

from custom_methods import model_eval

datapath = '../Data/'

startTime = time.time()

In [None]:
filename = 'ann_ready.pickle'
infile = open(datapath+filename,'rb')
data = pickle.load(infile)
X = data['X']
Y = data['Y']
infile.close()

print(type(X))
print('Format: [num_data_objects, max_sequence_length, num_vars]')
print('\nX')
print(X[1])
print('\nY')
print(Y[1])

## Split Training, Testing Data

In [None]:
scalers = {}
for i in range(X.shape[1]):
    scalers[i] = StandardScaler()
    X[:, i, :] = scalers[i].fit_transform(X[:, i, :])

TEST_FRAC = 1/3

# Split Train/Test sets
df_train, df_test = model_eval.split_on_people(df, id_col=id_col, test_frac=TEST_FRAC)

# Split X and Y
X_train = df_train.drop('CMIS_MATCH', axis=1)
Y_train = df_train[['CMIS_MATCH', id_col]].groupby(id_col).first()

X_test = df_test.drop('CMIS_MATCH', axis=1)
Y_test = df_test[['CMIS_MATCH', id_col]].groupby(id_col).first()

# Model

In [None]:
# Custom time embedding layer
# https://towardsdatascience.com/the-time-series-transformer-2a521a0efad3

class Time2Vec(keras.layers.Layer):
    def __init__(self, kernel_size=1):
        super(Time2Vec, self).__init__(trainable=True, name='Time2VecLayer')
        self.k = kernel_size
    
    def build(self, input_shape):
        # trend
        self.wb = self.add_weight(name='wb',shape=(input_shape[1],),initializer='uniform',trainable=True)
        self.bb = self.add_weight(name='bb',shape=(input_shape[1],),initializer='uniform',trainable=True)
        # periodic
        self.wa = self.add_weight(name='wa',shape=(1, input_shape[1], self.k),initializer='uniform',trainable=True)
        self.ba = self.add_weight(name='ba',shape=(1, input_shape[1], self.k),initializer='uniform',trainable=True)
        super(Time2Vec, self).build(input_shape)
    
    def call(self, inputs, **kwargs):
        bias = self.wb * inputs + self.bb
        dp = K.dot(inputs, self.wa) + self.ba
        wgts = K.sin(dp) # or K.cos(.)

        ret = K.concatenate([K.expand_dims(bias, -1), wgts], -1)
        ret = K.reshape(ret, (-1, inputs.shape[1]*(self.k+1)))
        return ret
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1]*(self.k + 1))

In [None]:
# Create attention block
# https://towardsdatascience.com/the-time-series-transformer-2a521a0efad3

from tensorflow_addons.layers import MultiHeadAttention

class AttentionBlock(keras.Model):
    def __init__(self, name='AttentionBlock', num_heads=2, head_size=128, ff_dim=None, dropout=0, **kwargs):
        super().__init__(name=name, **kwargs)

        if ff_dim is None:
            ff_dim = head_size

        self.attention = MultiHeadAttention(num_heads=num_heads, head_size=head_size, dropout=dropout)
        self.attention_dropout = keras.layers.Dropout(dropout)
        self.attention_norm = keras.layers.LayerNormalization(epsilon=1e-6)

        self.ff_conv1 = keras.layers.Conv1D(filters=ff_dim, kernel_size=1, activation='relu')
        # self.ff_conv2 at build()
        self.ff_dropout = keras.layers.Dropout(dropout)
        self.ff_norm = keras.layers.LayerNormalization(epsilon=1e-6)

    def build(self, input_shape):
        self.ff_conv2 = keras.layers.Conv1D(filters=input_shape[-1], kernel_size=1) 

    def call(self, inputs):
        x = self.attention([inputs, inputs])
        x = self.attention_dropout(x)
        x = self.attention_norm(inputs + x)

        x = self.ff_conv1(x)
        x = self.ff_conv2(x)
        x = self.ff_dropout(x)

        x = self.ff_norm(inputs + x)
        return x

In [None]:
# https://towardsdatascience.com/the-time-series-transformer-2a521a0efad3

class ModelTrunk(keras.Model):
      def __init__(self, name='ModelTrunk', time2vec_dim=1, num_heads=2, head_size=128, ff_dim=None, num_layers=1, dropout=0, **kwargs):
        super().__init__(name=name, **kwargs)
        self.time2vec = Time2Vec(kernel_size=time2vec_dim)
        if ff_dim is None:
            ff_dim = head_size
        self.dropout = dropout
        self.attention_layers = [AttentionBlock(num_heads=num_heads, head_size=head_size, ff_dim=ff_dim, dropout=dropout) for _ in range(num_layers)]

        
    def call(self, inputs):
        time_embedding = keras.layers.TimeDistributed(self.time2vec)(inputs)
        x = K.concatenate([inputs, time_embedding], -1)
        for attention_layer in self.attention_layers:
            x = attention_layer(x)

        return K.reshape(x, (-1, x.shape[1] * x.shape[2])) # flat vector of features out

In [None]:
# Custom learning rate
# https://towardsdatascience.com/the-time-series-transformer-2a521a0efad3

def lr_scheduler(epoch, lr, warmup_epochs=15, decay_epochs=100, initial_lr=1e-6, base_lr=1e-3, min_lr=5e-5):
    if epoch <= warmup_epochs:
        pct = epoch / warmup_epochs
        return ((base_lr - initial_lr) * pct) + initial_lr

    if epoch > warmup_epochs and epoch < warmup_epochs+decay_epochs:
        pct = 1 - ((epoch - warmup_epochs) / decay_epochs)
        return ((base_lr - min_lr) * pct) + min_lr

    return min_lr

callbacks += [keras.callbacks.LearningRateScheduler(partial(lr_scheduler, ...), verbose=0)]