In [2]:
from random import randint
import time
import torch

from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
BATCH_SIZE = 20
SEQUENCE_SIZE = 20
NUM_SEQUENCES = 50000
NUM_EPOCHS = 20
LEARNING_RATE = 0.05
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [11]:
class Generator(object):

    def __init__(self):
        self.x = []
        self.y = []

    def __len__(self):
        return len(self.x)

    def __str__(self):
        return f"X is:\n{self.x}\nY is:\n{self.y}"

    def generate_sequence(self):
        x = []
        y = []
        x.append(randint(0, 9))
        y.append(x[0])
        for i in range(SEQUENCE_SIZE-1):
            xi = randint(0, 9)
            yi = (xi + x[0])%10
            x.append(xi)
            y.append(yi)
        return x, y
    
    def generate_sequences(self):
        for i in range(NUM_SEQUENCES):
            x, y = self.generate_sequence()
            self.x.append(x)
            self.y.append(y)


In [12]:
class SequenceDataset(torch.utils.data.Dataset):

    def __init__(self, x, y):
        super().__init__()
        self._len = len(x)
        self.x = torch.tensor(x).to(DEVICE)
        self.y = torch.tensor(y).to(DEVICE)

    def __len__(self):
        return self._len

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [13]:
gen = Generator()
gen.generate_sequences()

X_train, X_test, Y_train, Y_test = train_test_split(gen.x, gen.y, test_size=0.2, shuffle=True)

X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, test_size=0.5, shuffle=True)

X_val = torch.tensor(X_val)
Y_val = torch.tensor(Y_val)

train_dl = torch.utils.data.DataLoader(
    SequenceDataset(
        X_train, Y_train
    ),
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=True
)
test_dl = torch.utils.data.DataLoader(
    SequenceDataset(
        X_test, Y_test
    ),
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=True
)

In [14]:
class RNNModel(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.embedding = torch.nn.Embedding(10, 32)
        self.rnn = torch.nn.LSTM(32, 128, batch_first=True)
        self.linear = torch.nn.Linear(128, 10)

    def forward(self, sentence, state=None):
        embedding = self.embedding(sentence)
        o, h = self.rnn(embedding)
        return self.linear(o)

In [15]:
model = RNNModel().to(DEVICE)
loss = torch.nn.CrossEntropyLoss().to(DEVICE)
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [16]:
for epoch in range(NUM_EPOCHS):
    train_loss, train_acc, iter_num = .0, .0, .0
    start_epoch_time = time.time()
    model.train()
    for x_in, y_in in train_dl:
        y_in = y_in.view(1, -1).squeeze()
        optimizer.zero_grad()
        out = model.forward(x_in)
        out = out.view(-1, 10)
        l = loss(out, y_in)
        train_loss += l.item()
        batch_acc = (out.argmax(dim=1) == y_in)
        train_acc += batch_acc.sum().item() / batch_acc.shape[0]
        l.backward()
        optimizer.step()
        iter_num += 1
    print(
        f"Epoch: {epoch}, loss: {train_loss:.4f}, acc: "
        f"{train_acc / iter_num:.4f}",
        end=" | "
    )
    test_loss, test_acc, iter_num = .0, .0, .0
    model.eval()
    for x_in, y_in in test_dl:
        x_in = x_in
        y_in = y_in.view(1, -1).squeeze()
        out = model.forward(x_in).view(-1, 10)
        l = loss(out, y_in)
        test_loss += l.item()
        batch_acc = (out.argmax(dim=1) == y_in)
        test_acc += batch_acc.sum().item() / batch_acc.shape[0]
        iter_num += 1
    print(
        f"test loss: {test_loss:.4f}, test acc: {test_acc / iter_num:.4f} | "
        f"{time.time() - start_epoch_time:.2f} sec."
    )

Epoch: 0, loss: 4594.1671, acc: 0.1405 | test loss: 573.7687, test acc: 0.1444 | 4.59 sec.
Epoch: 1, loss: 4587.4591, acc: 0.1450 | test loss: 573.4676, test acc: 0.1444 | 4.55 sec.
Epoch: 2, loss: 4584.6806, acc: 0.1450 | test loss: 573.0368, test acc: 0.1445 | 4.64 sec.
Epoch: 3, loss: 4580.0654, acc: 0.1450 | test loss: 572.2070, test acc: 0.1444 | 4.58 sec.
Epoch: 4, loss: 4566.1841, acc: 0.1452 | test loss: 569.0390, test acc: 0.1457 | 4.43 sec.
Epoch: 5, loss: 4512.5608, acc: 0.1533 | test loss: 557.8044, test acc: 0.1620 | 4.52 sec.
Epoch: 6, loss: 4345.9271, acc: 0.1889 | test loss: 556.7754, test acc: 0.1492 | 4.51 sec.
Epoch: 7, loss: 4397.8383, acc: 0.1568 | test loss: 536.9816, test acc: 0.1750 | 4.55 sec.
Epoch: 8, loss: 4081.8502, acc: 0.2214 | test loss: 472.3137, test acc: 0.2629 | 4.53 sec.
Epoch: 9, loss: 3270.7293, acc: 0.3770 | test loss: 254.4637, test acc: 0.6770 | 4.50 sec.
Epoch: 10, loss: 818.8022, acc: 0.9301 | test loss: 28.8318, test acc: 1.0000 | 4.48 sec.


In [27]:
idx = 5
val_results = model(X_val.to(DEVICE)).argmax(dim=2)
val_acc = (val_results == Y_val.to(DEVICE)).flatten()
val_acc = (val_acc.sum() / val_acc.shape[0]).item()
out = val_results[idx]
true = Y_val[idx]
print(f"Validation accuracy is : {val_acc:.4f}")
print("-" * 20)
print(f"Input sequence is:  \"{X_val[idx].numpy()}\"")
print("-" * 20)
print(f"Output sequence is: \"{out.cpu().numpy()}\"")
print("-" * 20)
print(f"True sequence is:   \"{true.numpy()}\"")

Validation accuracy is : 0.9999
--------------------
Input sequence is:  "[9 4 5 0 6 2 8 6 9 3 6 0 8 8 3 1 1 3 0 5]"
--------------------
Output sequence is: "[9 3 4 9 5 1 7 5 8 2 5 9 7 7 2 0 0 2 9 4]"
--------------------
True sequence is:   "[9 3 4 9 5 1 7 5 8 2 5 9 7 7 2 0 0 2 9 4]"
