In [1]:
import matplotlib.pyplot as plt
import numpy as np

### Mount Google Drive

**Requires dataset_tensor.npy file in "Colab Notebooks/Tensorized Transformers/Data" folder!**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

PATH = '/content/drive/My Drive/Colab Notebooks/Tensorized Transformers/'
DATA_PATH = PATH + 'Data/'

### Clone Tensorized Transformers github repository

In [None]:
print('Github username:')
git_username = %sx read -p ''
git_username = git_username[0]

In [None]:
print('Github access token (https://github.com/settings/tokens):')
git_token = %sx read -p ''
git_token = git_token[0]

In [None]:
# Clone the entire repo.
%cd /content
!git clone -l -s https://$git_username:$git_token@github.com/onurbil/tensorized_transformers.git tensorized_transformers
%cd tensorized_transformers
!ls
%cd ..

REPO_PATH = '/content/tensorized_transformers'

In [None]:
import sys
sys.path.append(REPO_PATH)
print(sys.path)

## Experiments

In [None]:
import tensorflow as tf
import tensorflow.keras as kr
import numpy as np
import matplotlib.pyplot as plt

import model.tt_mod_weights as tt
import dataset_tools.split
from visualization_tools.visualization import visualize_pos_encoding, attention_plotter

from tensorflow.keras.callbacks import TensorBoard
import datetime

%load_ext tensorboard
%tensorboard --logdir '/notebooks/output'

In [None]:
dir = '/notebooks/output/'  + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Load dataset:
filename = DATA_PATH + 'dataset_tensor.npy'
# file_path = os.path.join(common.paths.PROCESSED_DATASET_DIR, filename)
dataset = np.load(filename, allow_pickle=True)

print(dataset.shape)

input_length = 4
lag = 4
train, test = dataset_tools.split.split_train_test(dataset)
x_train, y_train = dataset_tools.split.get_xy(train, input_length=input_length, lag=lag)
x_test, y_test = dataset_tools.split.get_xy(test, input_length=input_length, lag=lag)

#x_train = x_train.astype('float32')
x_train = tf.reshape(x_train, (x_train.shape[0], x_train.shape[1], dataset.shape[1], dataset.shape[2]))
y_train = tf.reshape(y_train, (y_train.shape[0], dataset.shape[1], dataset.shape[2]))
x_test = tf.reshape(x_test, (x_test.shape[0], x_test.shape[1], dataset.shape[1], dataset.shape[2]))
y_test = tf.reshape(y_test, (y_test.shape[0], dataset.shape[1], dataset.shape[2]))

# x_train = tf.transpose(x_train, perm=(0, 1, 3, 2))
# y_train = tf.transpose(y_train, perm=(0, 2, 1))
# x_test = tf.transpose(x_test, perm=(0, 1, 3, 2))
# y_test = tf.transpose(y_test, perm=(0, 2, 1))

print(f'x_train.shape: {x_train.shape}')
print(f'x_test.shape: {x_test.shape}')

# Parameters:
epoch = 300
learning_rate = 0.001
head_num = 1
d_model = head_num * 36
dense_units = 64
batch_size = 64
input_shape = (input_length, x_train.shape[-2], x_train.shape[-1])
output_shape = (1, 1)
# y_train = y_train[..., 4, 0]
# y_test = y_test[..., 4, 0]
y_train = y_train[..., 0, 4]
y_test = y_test[..., 0, 4]
initializer = 'RandomNormal'

model = kr.Sequential([
    kr.Input(shape=input_shape),
    tt.PositionalEncoding(broadcast=True),
    tt.EncoderLayer(input_length, d_model, head_num, dense_units, initializer),
    tt.EncoderLayer(input_length, d_model, head_num, dense_units, initializer),
    tt.EncoderLayer(input_length, d_model, head_num, dense_units, initializer),
    tt.EncoderLayer(input_length, d_model, head_num, dense_units, initializer),
    kr.layers.Flatten(),
    kr.layers.Dense(tf.reduce_prod(output_shape), activation='linear'),
    kr.layers.Reshape(output_shape),
])

model.summary()
model.compile(optimizer=kr.optimizers.Adam(learning_rate=learning_rate), loss='mse', metrics=['mae'])

num_examples = 10000
x_train = x_train[:num_examples]
y_train = y_train[:num_examples]

num_test_examples = 200
x_test = x_test[:num_test_examples, ...]
y_test = y_test[:num_test_examples]

model.fit(x_train, y_train, 
          epochs=epoch, 
          batch_size=batch_size, 
          validation_data=(x_test, y_test), 
          callbacks=[TensorBoard(log_dir=dir)]
          )

# print(model.layers[1].attention_weights)
labels = np.arange(model.layers[1].attention_weights.shape[1]).tolist()
# print(tf.shape(model.layers[1].attention_weights))
from visualization_tools.visualization import visualize_pos_encoding, attention_plotter
attention_plotter(model.layers[1].attention_weights[5], labels)

import matplotlib.pyplot as plt
preds = []
for i in range(x_test.shape[0]):
    if (i + 1) % 100 == 0:
        print(f'prediction: {i + 1}/{x_test.shape[0]}')
    preds.append(model.predict(x_test[i][np.newaxis, ...]))
pred = np.concatenate(preds, axis=0)
mse = np.mean(kr.metrics.mse(y_test, pred))
mae = np.mean(kr.metrics.mae(y_test, pred))
print(f'mse: {mse}, mae: {mae}')



print(pred.flatten().shape)
print(y_test.shape)

plt.plot(range(pred.size), pred.flatten(), label='pred')
plt.plot(range(len(y_test)), y_test, label='true')
plt.legend()
plt.show()

#### Old experiments