# Introduction

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')

import numpy as np
import torch
from sklearn.preprocessing import OneHotEncoder

from src.grus import GRU_Cell, compute_loss, train_gru, cross_entropy

# TORCH CONFIGURATION
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Predict Repeated Sequence

In [3]:
# basic parameters
input_size = 4
output_size = 4
hidden_size = 3

# utils
def encode_character(char, c2n):
    number = c2n[char]
    one_hot = np.zeros(len(c2n))
    one_hot[number] = 1
    return one_hot
    
def decode_character(onehot):
    indx_max = torch.argmax(onehot.squeeze()).tolist()
    return n2c[indx_max]

# data preparation
examples = 1000
seq = 'abcd'
string = seq*examples
c2n = {val: index for index, val in enumerate(seq)}
n2c = {v:k for k,v in c2n.items()}

features = np.array([encode_character(char, c2n) for char in string]).T
targets = np.roll(features, shift=-1, axis=0)

# convert numpy arrays to tensors
features = torch.tensor(features, dtype=torch.float32).reshape(-1, input_size)
targets = torch.tensor(targets, dtype=torch.float32).reshape(-1, input_size)

# make a dataset from these tensors
training_data = torch.utils.data.TensorDataset(features, targets)

# build a dataloader
loader_params = dict(batch_size=64, shuffle=False)
train_loader = torch.utils.data.DataLoader(dataset=training_data, **loader_params)

# how you iterate
for x_train, y_train in train_loader:
    print(x_train.shape), print(y_train.shape)
    print(x_train[0], decode_character(x_train[0]))
    print(y_train[0], decode_character(y_train[0]))
    break

torch.Size([64, 4])
torch.Size([64, 4])
tensor([1., 0., 0., 0.]) a
tensor([0., 1., 0., 0.]) b


In [5]:
# instantiate GRU Model
gru = GRU_Cell(hidden_size, input_size, output_size)

# define training parameters
epochs = 100
optimizer = torch.optim.SGD(gru.parameters(), lr=0.01)

# train
state = train_gru(gru, train_loader, epochs, cross_entropy, optimizer, device, verbose=True)

epoch 0 loss = 1.42
epoch 10 loss = 0.24
epoch 20 loss = 0.12
epoch 30 loss = 0.08
epoch 40 loss = 0.06
epoch 50 loss = 0.05
epoch 60 loss = 0.04
epoch 70 loss = 0.04
epoch 80 loss = 0.03
epoch 90 loss = 0.03


In [6]:
# testing
def softmax_to_onehot(x):
    res = torch.zeros(4)
    res[torch.argmax(x)] = 1
    return res

test_loader = torch.utils.data.DataLoader(dataset=training_data, batch_size=1, shuffle=True, )

examples = 10
for x_test, y_test in test_loader:
    activations, _ = gru(x_test, state)
    for input_example, output_example in zip(x_test, activations):
        prediction_onehot = softmax_to_onehot(output_example)
        print(decode_character(input_example), decode_character(output_example))
    examples -= 1
    if examples == 0:
        break

a b
d a
b d
b d
d a
c d
d a
c d
a b
b d


# Timeseries Forecasting

In [10]:
from src.grus import GRU_Cell, compute_loss

class TS_DS(torch.utils.data.Dataset):
    def __init__(self, data, targets, input_window, output_window):
        self.data = data
        self.targets = targets
        self.input_window = input_window
        self.output_window = output_window

    def __getitem__(self, index):
        x = self.data[index: index + self.input_window]
        y = self.targets[index: index + self.output_window]
        return x, y

    def __len__(self):
        return len(self.data) - self.input_window
    
input_window = 2
output_window = 2
hidden_size = 5
batch_size = 32

data = torch.sin(torch.linspace(0, 1000*2*np.pi, steps=4000))
targets = torch.roll(data, shifts=-input_window, dims=0)

# make sliding window datasets
train_ds = TS_DS(data, targets, input_window, output_window)

# build a dataloader
loader_params = dict(batch_size=batch_size, shuffle=False)
train_loader = torch.utils.data.DataLoader(dataset=train_ds, **loader_params)

# example -- sanity check
'''
print(data[:100])
for x_train, y_train in train_loader:
    print(x_train)
    print(y_train)
    break
'''
# instantiate GRU Model
gru_ts = GRU_Cell(hidden_size, input_window, output_window, output_activation='tanh')

# define training parameters
epochs = 100
optimizer = torch.optim.SGD(gru_ts.parameters(), lr=0.01)

# train
def mse_loss(y_pred, y_test):
    return torch.mean(torch.sum((y_pred - y_test) ** 2, dim=0))

state = train_gru(gru_ts, train_loader, epochs, mse_loss, optimizer, device, verbose=True)

epoch 0 loss = 0.21
epoch 10 loss = 0.12
epoch 20 loss = 0.09
epoch 30 loss = 0.08
epoch 40 loss = 0.07
epoch 50 loss = 0.06
epoch 60 loss = 0.06
epoch 70 loss = 0.06
epoch 80 loss = 0.06
epoch 90 loss = 0.06
