### Mount Google Drive

**Requires dataset_tensor.npy file in "Colab Notebooks/Tensorized Transformers/Data" folder!**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

PATH = '/content/drive/My Drive/Colab Notebooks/Tensorized Transformers/'
DATA_PATH = PATH + 'Data/'

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.io
import sklearn.model_selection
import datetime
import tensorflow as tf
import tensorflow.keras as kr
import torch
from torchsummary import summary

! pip install -q pyyaml h5py  # Required to save models in HDF5 format
! pip install torch
! pip install einops
! pip install tqdm
! pip install torchsummary
! pip install scipy

### Clone Tensorized Transformers and multidim conv github repository

In [None]:
# git_username = ''
# git_token =  ''

# if git_username == '':
#   print('Github username:')
#   git_username = %sx read -p ''
#   git_username = git_username[0]

# if git_token == '':
#   print('Github access token (https://github.com/settings/tokens):')
#   print('Github Token:')
#   git_token = %sx read -p ''
#   git_token = git_token[0]

In [None]:
# Clone the entire repo.
%cd /content
!git clone -l -s https://github.com/onurbil/TENT.git tensorized_transformers
%cd tensorized_transformers
!ls
%cd ..

# Update the repository
%cd tensorized_transformers
!git pull
!ls

In [None]:
import sys

TT_REPO_PATH = '/content/tensorized_transformers'

sys.path.append(TT_REPO_PATH)
print(sys.path)

### Get Kaggle data and save it to your Drive

** Only if you don't have it saved in your drive or want to update it **

In [None]:
import shutil
import os
from common.paths import PROCESSED_DATASET_DIR
from common.paths import EU_PROCESSED_DATASET_DIR
from tensorized_transformers import main

filesToMoveUS = ['dataset_tensor.npy',
               'scale.npy']
filesToMoveEU = ['eu_dataset_tensor.npy',
               'eu_scale.npy']

os.makedirs(os.path.dirname(DATA_PATH), exist_ok=True)
for files in filesToMoveUS:
  shutil.copy(PROCESSED_DATASET_DIR + '/' + files, DATA_PATH)
for files in filesToMoveEU:
  shutil.copy(EU_PROCESSED_DATASET_DIR + '/' + files, DATA_PATH)


---

---

# Dataset




## USA+Canada

In [None]:
import experiment_tools.load_dataset as load_dataset
from common.variables import city_labels

input_length = 16
prediction_time = 4
y_feature = 4
y_city = 0
num_cities = 30
remove_last_from_test= 800 
valid_size = 1024

dataset, dataset_params = load_dataset.get_usa_dataset(DATA_PATH, 
                                                       input_length, prediction_time, 
                                                       y_feature, y_city, 
                                                       end_city=num_cities, 
                                                       remove_last_from_test=remove_last_from_test, 
                                                       valid_split=valid_size, split_random=1337)

denorm_min, denorm_max = load_dataset.get_usa_normalization(DATA_PATH, y_feature)

Xtr, Ytr, Xvalid, Yvalid, Xtest, Ytest = dataset
print('Xtr.shape', Xtr.shape)
print('Ytr.shape', Ytr.shape)
print('Xvalid.shape', Xvalid.shape)
print('Yvalid.shape', Yvalid.shape)
print('Xtest.shape', Xtest.shape)
print('Ytest.shape', Ytest.shape)

print('denorm_min', denorm_min)
print('denorm_max', denorm_max)

## EU

In [None]:
import experiment_tools.load_dataset as load_dataset
from common.variables import eu_city_labels
city_labels = eu_city_labels

input_length = 8
prediction_time = 6
y_feature = 3  # 3=avg_temp(F)
y_city = 1 
valid_size = 512
test_size = 1095 # 3 years of measurements

dataset, dataset_params = load_dataset.get_eu_dataset(DATA_PATH, test_size,
                                                      input_length, prediction_time, 
                                                      y_feature, y_city, 
                                                      valid_split=valid_size, split_random=1337)

denorm_min, denorm_max = load_dataset.get_eu_normalization(DATA_PATH, y_feature)

Xtr, Ytr, Xvalid, Yvalid, Xtest, Ytest = dataset
print('Xtr.shape', Xtr.shape)
print('Ytr.shape', Ytr.shape)
print('Xvalid.shape', Xvalid.shape)
print('Yvalid.shape', Yvalid.shape)
print('Xtest.shape', Xtest.shape)
print('Ytest.shape', Ytest.shape)

print('denorm_min', denorm_min)
print('denorm_max', denorm_max)


---

---

# Experiments







## Tensorized Transformer
** Run on TPU **

In [None]:
import time
start = time.time()

import experiment_tools.tt_training as tt_training
from visualization_tools.AW_save import save_weights
import datetime

save_aw = True ## To store the attention weights set this variable to true
folder = datetime.datetime.now().strftime("%Y%m%d") + '_' + datetime.datetime.now().strftime("%H%M%S")

# model
softmax_type = 3
epoch = 300
patience = 20
num_layers = 3
head_num = 32
d_model = 256
dense_units = 128
batch_size = 16
loss = 'mse'

model, model_params, history = tt_training.train_model(dataset, 
                                                       softmax_type, epoch, patience, 
                                                       num_layers, head_num, d_model, dense_units, 
                                                       batch_size, loss, use_tpu=True, save_aw = save_aw)
if save_aw:
  save_weights(model, city_labels, layer=1, folder_name = '/content/drive/MyDrive/Colab Notebooks/Tensorized Transformers/AW/' + folder)

end = time.time()
print('Time elapsed: ', str(np.round(end - start, decimals=2)), 'seconds')

In [None]:
import experiment_tools.results as results

params = dataset_params + model_params
results.print_params(params)

folder, name = results.save_results_with_datetime(model, 'TT', PATH, params)

Xtr, Ytr, Xvalid, Yvalid, Xtest, Ytest = dataset

results.plot_valid_test_predictions(model, Xvalid, Yvalid, Xtest, Ytest, 
                                    y_feature=None, denorm_min=denorm_min, denorm_max=denorm_max, 
                                    folder=folder, base_name=name)

## Vanilla Transformer
** Run on GPU **

In [None]:
import time
start = time.time()

import experiment_tools.vanilla_training as vanilla_training

# model
epoch = 300
patience = 20
num_layers = 3 
head_num = 32
d_model = 512
dense_units = 512
dropout_rate = 0.01
batch_size = 128
loss = kr.losses.mean_squared_error
use_tpu = False

model, model_params = vanilla_training.train_model(dataset, 
                                                   epoch, patience,
                                                   num_layers, head_num,
                                                   d_model, dense_units,
                                                   batch_size, dropout_rate,
                                                   loss, use_tpu=use_tpu)

end = time.time()
print('Time elapsed: ', str(np.round(end - start, decimals=2)), 'seconds')

In [None]:
import experiment_tools.results as results
import experiment_tools.load_dataset as experiment_dataset

params = dataset_params + model_params
results.print_params(params)

folder, name = results.save_results_with_datetime(model, 'Vanilla', PATH, params)

Xtr, Ytr, Xvalid, Yvalid, Xtest, Ytest = dataset
Xtr_flat, Xtest_flat, Xvalid_flat = experiment_dataset.to_flatten_dataset(Xtr, Xtest, Xvalid)

results.plot_valid_test_predictions(model, Xvalid_flat, Yvalid, Xtest_flat, Ytest, 
                                    y_feature=None, denorm_min=denorm_min, denorm_max=denorm_max, 
                                    folder=folder, base_name=name, model_returns_activations=True)

## 3D CNN
** Run on GPU **

In [None]:
import time
start = time.time()

import experiment_tools.load_dataset as load_dataset
import experiment_tools.cnn3d_training as cnn3d_training

# model
epoch = 20
patience = 20
filters = 10
kernel_size = 2
batch_size = 128
learning_rate = 0.0001
loss='mse'
use_tpu = False

model, model_params, history = cnn3d_training.train_model(dataset, 
                                                          epoch, patience,
                                                          filters, kernel_size,
                                                          batch_size, 
                                                          learning_rate, loss, use_tpu=use_tpu)

end = time.time()
print('Time elapsed: ', str(np.round(end - start, decimals=2)), 'seconds')

In [None]:
import experiment_tools.results as results
import experiment_tools.cnn3d_training as cnn3d_training

params = dataset_params + model_params
results.print_params(params)

folder, name = results.save_results_with_datetime(model, 'CNN3D', PATH, params)

Xtr, Ytr, Xvalid, Yvalid, Xtest, Ytest = dataset
Xtr_t, Xvalid_t, Xtest_t = cnn3d_training.transform_dataset(Xtr, Xvalid, Xtest)


# y_feature=None, denorm_min=None, denorm_max=None,
results.plot_valid_test_predictions(model, Xvalid_t, Yvalid, Xtest_t, Ytest, 
                                    y_feature=None, denorm_min=denorm_min, denorm_max=denorm_max, 
                                    folder=folder, base_name=name)

## LSTM
** Run on GPU **

In [None]:
import experiment_tools.load_dataset as load_dataset
import experiment_tools.lstm_training as lstm_training

import time
start = time.time()

# model
epoch = 300
patience = 20

num_layers=2
hidden_units=128
dropout_rate=0.1

batch_size = 128
learning_rate = 0.0001
loss='mse'

model, model_params = lstm_training.train_lstm(dataset,
                                               epoch, patience,
                                               num_layers, hidden_units, dropout_rate,
                                               learning_rate, batch_size, loss)

end = time.time()
print('Time elapsed: ', str(np.round(end - start, decimals=2)), 'seconds')

In [None]:
import experiment_tools.results as results
import experiment_tools.load_dataset as experiment_dataset

params = dataset_params + model_params
results.print_params(params)

folder, name = results.save_results_with_datetime(model, 'LSTM', PATH, params)

Xtr, Ytr, Xvalid, Yvalid, Xtest, Ytest = dataset
Xtr_flat, Xtest_flat, Xvalid_flat = experiment_dataset.to_flatten_dataset(Xtr, Xtest, Xvalid)


# y_feature=None, denorm_min=None, denorm_max=None,
r = results.plot_valid_test_predictions(model, Xvalid_flat, Yvalid, Xtest_flat, Ytest, 
                                    y_feature=None, denorm_min=denorm_min, denorm_max=denorm_max, 
                                    folder=folder, base_name=name)
print(f'{r[2]}\t{r[3]}\t{r[4]}\t{r[5]}')

## ConvLSTM
** Run on GPU **

In [None]:
import experiment_tools.load_dataset as load_dataset
import experiment_tools.lstm_training as lstm_training

import time
start = time.time()

# model
epoch = 300
patience = 20

num_layers=2
filters=16
kernel_size=3
padding='same'
dropout_rate=0.1

batch_size = 128
learning_rate = 0.0001
loss='mse'

model, model_params = lstm_training.train_conv_lstm(dataset,
                                                    epoch, patience,
                                                    num_layers, filters, kernel_size, 
                                                    dropout_rate, padding,
                                                    learning_rate, batch_size, loss)

end = time.time()
print('Time elapsed: ', str(np.round(end - start, decimals=2)), 'seconds')

In [None]:
import experiment_tools.results as results
import experiment_tools.lstm_training as lstm_training

params = dataset_params + model_params
results.print_params(params)

folder, name = results.save_results_with_datetime(model, 'LSTM', PATH, params)

Xtr, Ytr, Xvalid, Yvalid, Xtest, Ytest = dataset
Xtr_flat, Xtest_flat, Xvalid_flat = lstm_training.transform_dataset_for_conv_lstm(Xtr, Xtest, Xvalid)


# y_feature=None, denorm_min=None, denorm_max=None,
r = results.plot_valid_test_predictions(model, Xvalid_flat, Yvalid, Xtest_flat, Ytest, 
                                    y_feature=None, denorm_min=denorm_min, denorm_max=denorm_max, 
                                    folder=folder, base_name=name)
print(f'{r[2]}\t{r[3]}\t{r[4]}\t{r[5]}')