In [1]:
import matplotlib.pyplot as plt
import numpy as np

! pip install -q pyyaml h5py  # Required to save models in HDF5 format

### Mount Google Drive

**Requires dataset_tensor.npy file in "Colab Notebooks/Tensorized Transformers/Data" folder!**

In [2]:

from google.colab import drive
drive.mount('/content/drive')

PATH = '/content/drive/My Drive/Colab Notebooks/Tensorized Transformers/'
DATA_PATH = PATH + 'Data/'

Mounted at /content/drive


### Clone Tensorized Transformers github repository

In [3]:
print('Github username:')
git_username = %sx read -p ''
git_username = git_username[0]

Github username:


In [4]:
print('Github access token (https://github.com/settings/tokens):')
git_token = %sx read -p ''
git_token = git_token[0]

Github access token (https://github.com/settings/tokens):


In [None]:
# Clone the entire repo.
%cd /content
!git clone -l -s https://$git_username:$git_token@github.com/onurbil/tensorized_transformers.git tensorized_transformers
%cd tensorized_transformers
!ls
%cd ..

REPO_PATH = '/content/tensorized_transformers'

In [None]:
import sys
sys.path.append(REPO_PATH)
print(sys.path)

## Experiments

In [None]:
import tensorflow as tf
import tensorflow.keras as kr
import numpy as np
import matplotlib.pyplot as plt

import os

import model.tt_mod_weights as tt ###### MODEL 1 SHOULD CHANGE TO ---> import model.tensorized_transformer as tt
import dataset_tools.split
from visualization_tools.visualization import visualize_pos_encoding, attention_plotter

from tensorflow.keras.callbacks import TensorBoard
import datetime

%load_ext tensorboard
%tensorboard --logdir '/notebooks/output'

In [None]:
dir = '/notebooks/output/'  + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# Load dataset:
filename = DATA_PATH + 'dataset_tensor.npy'
# file_path = os.path.join(common.paths.PROCESSED_DATASET_DIR, filename)
dataset = np.load(filename, allow_pickle=True)

print(dataset.shape)

###### ALL PARAMETERS HERE######:
softmax_type = 2
input_length = 16
lag = 4
epoch = 100

learning_rate = 0.001
head_num = 16
d_model = 32
dense_units = 64
batch_size = 64

num_examples = 10000
num_valid_examples = 500
initializer = 'RandomNormal'

train, test = dataset_tools.split.split_train_test(dataset)
x_train, y_train = dataset_tools.split.get_xy(train, input_length=input_length, lag=lag)
x_test, y_test = dataset_tools.split.get_xy(test, input_length=input_length, lag=lag)

#x_train = x_train.astype('float32')
x_train = tf.reshape(x_train, (x_train.shape[0], x_train.shape[1], dataset.shape[1], dataset.shape[2]))
y_train = tf.reshape(y_train, (y_train.shape[0], dataset.shape[1], dataset.shape[2]))
x_test = tf.reshape(x_test, (x_test.shape[0], x_test.shape[1], dataset.shape[1], dataset.shape[2]))
y_test = tf.reshape(y_test, (y_test.shape[0], dataset.shape[1], dataset.shape[2]))

# Choosing first 29 cities
x_train = x_train[:, :, :29, :]
y_train = y_train[:, :29, :]
x_test = x_test[:, :, :29, :]
y_test = y_test[:, :29, :]

input_shape = (input_length, x_train.shape[-2], x_train.shape[-1])
output_shape = (1, 1)

# Choosing temperature as output
y_train = y_train[..., 0, 4]
y_test = y_test[..., 0, 4]

print(f'x_train.shape: {x_train.shape}')
print(f'x_test.shape: {x_test.shape}')

model = kr.Sequential([
    kr.Input(shape=input_shape),
    tt.PositionalEncoding(broadcast=True),
    tt.EncoderLayer(input_length, d_model, head_num, dense_units, initializer, softmax_type),
    tt.EncoderLayer(input_length, d_model, head_num, dense_units, initializer, softmax_type),
    tt.EncoderLayer(input_length, d_model, head_num, dense_units, initializer, softmax_type),
    tt.EncoderLayer(input_length, d_model, head_num, dense_units, initializer, softmax_type),
    tt.EncoderLayer(input_length, d_model, head_num, dense_units, initializer, softmax_type),
    tt.EncoderLayer(input_length, d_model, head_num, dense_units, initializer, softmax_type),
    kr.layers.Flatten(),
    kr.layers.Dense(tf.reduce_prod(output_shape), activation='linear'),
    kr.layers.Reshape(output_shape),
])

model.summary()
model.compile(optimizer=kr.optimizers.Adam(learning_rate=learning_rate), loss='mse', metrics=['mae'])

x_valid = x_train[-num_examples - num_valid_examples:-num_examples, ...]
y_valid = y_train[-num_examples - num_valid_examples:-num_examples]
print(f'x_valid.shape: {x_valid.shape}')

x_train = x_train[-num_examples:]
y_train = y_train[-num_examples:]

# Callbacks
print_attention_weights = kr.callbacks.LambdaCallback(
    on_train_end=lambda batch: print(model.layers[1].attention_weights))
early_stopping = kr.callbacks.EarlyStopping(patience=10,
                                            restore_best_weights=True,
                                            verbose=1)

model.fit(
    x_train, y_train,
    epochs=epoch,
    batch_size=batch_size,
    validation_data=(x_valid, y_valid),
    callbacks=[early_stopping, TensorBoard(log_dir=dir)]
)

pred = model.predict(x_test[0:10])
labels = np.arange(model.layers[1].attention_weights.shape[-2]).tolist()
attention_plotter(tf.reshape(model.layers[1].attention_weights[1][0], (input_length, -1)), labels)
attention_plotter(tf.reshape(model.layers[1].attention_weights[2][0], (input_length, -1)), labels)
attention_plotter(tf.reshape(model.layers[1].attention_weights[3][0], (input_length, -1)), labels)

preds = []
for i in range(x_valid.shape[0]):
    if (i + 1) % 100 == 0:
        print(f'prediction: {i + 1}/{x_valid.shape[0]}')
    preds.append(model.predict(x_valid[i][np.newaxis, ...]))
pred = np.concatenate(preds, axis=0)
mse = np.mean(kr.metrics.mse(y_valid, pred))
mae = np.mean(kr.metrics.mae(y_valid, pred))
print(f'mse: {mse}, mae: {mae}')

plt.figure(figsize=(14, 8))
plt.plot(range(pred.size), pred.flatten(), label='pred')
plt.plot(range(len(y_valid)), y_valid, label='true')
plt.legend()
plt.show()

print("\n\n######################## Model description ################################")
model.summary()
print("softmax_type = ", softmax_type)
print("Input_length = ", input_length)
print("Lag = ", lag)
print("Epoch = ", epoch)

print("LR = ", learning_rate)
print("Head_num = ", head_num)
print("d_model = ", d_model)
print("dense_units = ", dense_units)
print("batch_size = ", batch_size)

print("num_examples = ", num_examples)
print("num_valid_examples = ", num_valid_examples)
print("input_shape = ", input_shape)

pred = model.predict(x_test)
mae = kr.metrics.mae(y_test.numpy().flatten(), pred.flatten())
print("\n\n######################## Results ##########################################")
print(f'test mae: {np.mean(mae)}')

### Saving Model:
model.save('/content/drive/My Drive/Colab Notebooks/Model/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
# TO get it back
# new_model = tf.keras.models.load_model('saved_model/my_model')