Сгенерировать последовательности, которые состоят из цифр (от 0 до 9) и задаются следующим образом:

x - последовательность цифр

    y1 = x1

    yi = xi + x1

Если yi >= 10 то 

    yi = yi - 10

Научить модель рекуррентной нейронной сети предсказывать yi по xi 
Использовать: RNN, LSTM, GRU

6 баллов за правильно выполненное задание.


In [1]:
import torch
from torch.utils.data import *
import time

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

batch_size = 256
num_epoch = 100
lr = 0.001
len_obs = 50
num_obs = 10000

Для начала сгенерируем последовательности для обучения по правилам

Для обучения подаем пару **x, y** длины **len_obs**

In [3]:
def digit_encoder(x):
    y = torch.zeros_like(x)
    y[0] = x[0]
    for i in range(1, len(x)):
        if x[i]+x[0] < 10:
            y[i] = (x[i]+x[0])
        else:
            y[i] = (x[i]+x[0]-10)
    return y

Создаем датасет по сгенерированным последовательностям

In [4]:
def make_dataset(num_obs):
    x_train = torch.stack([torch.randint(low=0, high=9, size=(len_obs,)) for i in range(num_obs)])
    y_train = torch.stack([digit_encoder(i) for i in x_train])

    x_test = torch.stack([torch.randint(low=0, high=9, size=(len_obs,)) for i in range(int(num_obs*0.2))])
    y_test = torch.stack([digit_encoder(i) for i in x_test])

    return x_train, y_train, x_test, y_test

In [5]:
x_train, y_train, x_test, y_test = make_dataset(num_obs)

In [6]:
train_ds = DataLoader(TensorDataset(x_train, y_train),
                      batch_size=batch_size,
                      shuffle=True)
test_ds = DataLoader(TensorDataset(x_test, y_test),
                     batch_size=batch_size,
                     shuffle=True)

Функция тренировки модели

In [7]:
def train(train_ds, test_ds, model, optimizer, loss, num_epoch, DEVICE):
  for epoch in range(num_epoch):
      train_loss, train_acc, iter_num = .0, .0, .0
      start_epoch_time = time.time()
      model.train()
      for x, y in train_ds:
          x = x.to(DEVICE)
          y = y.view(1, -1).squeeze().to(DEVICE)
          optimizer.zero_grad()
          out = model.forward(x).view(-1, 10)
          l = loss(out, y)
          train_loss += l.item()
          batch_acc = (out.argmax(dim=1) == y)
          train_acc += batch_acc.sum().item() / batch_acc.shape[0]
          l.backward()
          optimizer.step()
          iter_num += 1
      if epoch%10 == 0:
        print(f"Epoch: {epoch+1}, loss: {train_loss:.4f}, acc: "
              f"{train_acc / iter_num:.4f}",
              end=" | ")
      test_loss, test_acc, iter_num = .0, .0, .0
      model.eval()
      for x, y in test_ds:
          x = x.to(DEVICE)
          y = y.view(1, -1).squeeze().to(DEVICE)
          out = model.forward(x).view(-1, 10)
          l = loss(out, y).to(DEVICE)
          test_loss += l.item()
          batch_acc = (out.argmax(dim=1) == y)
          test_acc += batch_acc.sum().item() / batch_acc.shape[0]
          iter_num += 1
      if epoch%10 == 0:
        print(
            f"test loss: {test_loss:.4f}, test acc: {test_acc / iter_num:.4f} | "
            f"{time.time() - start_epoch_time:.2f} sec."
      )

Простая RNN модель

In [8]:
class RNNModel(torch.nn.Module):

    def __init__(self):
        super().__init__()
        self.embed = torch.nn.Embedding(10, 10)
        self.rnn1 = torch.nn.RNN(10, 128, batch_first=True)
        self.linear = torch.nn.Linear(128, 10)

    def forward(self, sentence, state=None):
        embed = self.embed(sentence)
        o, h = self.rnn1(embed)
        return self.linear(o)

In [9]:
model = RNNModel().to(DEVICE)
loss = torch.nn.CrossEntropyLoss().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [10]:
train(train_ds, test_ds, model, optimizer, loss, num_epoch, DEVICE)

Epoch: 1, loss: 90.9431, acc: 0.1172 | test loss: 18.0378, test acc: 0.1153 | 1.36 sec.
Epoch: 11, loss: 86.5629, acc: 0.1788 | test loss: 17.1725, test acc: 0.2038 | 0.20 sec.
Epoch: 21, loss: 67.6984, acc: 0.2489 | test loss: 13.6187, test acc: 0.2468 | 0.20 sec.
Epoch: 31, loss: 42.3193, acc: 0.4701 | test loss: 8.3550, test acc: 0.4734 | 0.19 sec.
Epoch: 41, loss: 38.7775, acc: 0.4811 | test loss: 7.7497, test acc: 0.4872 | 0.20 sec.
Epoch: 51, loss: 29.8151, acc: 0.5902 | test loss: 5.5860, test acc: 0.5996 | 0.20 sec.
Epoch: 61, loss: 18.6927, acc: 0.6958 | test loss: 3.7925, test acc: 0.6928 | 0.20 sec.
Epoch: 71, loss: 14.7642, acc: 0.7844 | test loss: 2.7309, test acc: 0.7909 | 0.20 sec.
Epoch: 81, loss: 10.1838, acc: 0.8887 | test loss: 1.6379, test acc: 0.8953 | 0.20 sec.
Epoch: 91, loss: 6.0375, acc: 0.9133 | test loss: 1.2455, test acc: 0.9095 | 0.21 sec.


In [11]:
x = torch.randint(0, 9, (10,)).to(DEVICE)
out = model.forward(x).argmax(dim=1).view(-1)
y = digit_encoder(x).view(-1)

print(f'Test seq {x}\nPredict seq {out}\nReal seq {y}\nAcc {(out == y).sum()/len(out)}')

Test seq tensor([4, 5, 5, 0, 3, 3, 4, 4, 0, 1], device='cuda:0')
Predict seq tensor([4, 9, 9, 4, 7, 7, 8, 8, 4, 5], device='cuda:0')
Real seq tensor([4, 9, 9, 4, 7, 7, 8, 8, 4, 5], device='cuda:0')
Acc 1.0


LSTM модель, просто для разнообразия двуслойная

In [12]:
class LSTMModel(torch.nn.Module):

    def __init__(self):
        super().__init__()
        self.embed = torch.nn.Embedding(10, 10)
        self.lstm1 = torch.nn.LSTM(10, 128, batch_first=True)
        self.lstm2 = torch.nn.LSTM(128, 128, batch_first=True)
        self.linear = torch.nn.Linear(128, 10)

    def forward(self, sentence, state=None):
        embed = self.embed(sentence)
        o, h = self.lstm1(embed)
        o, h = self.lstm2(o, h)
        return self.linear(o)

In [13]:
model_LSTM = LSTMModel().to(DEVICE)
optimizer_LSTM = torch.optim.Adam(model_LSTM.parameters(), lr=lr)

In [14]:
train(train_ds, test_ds, model_LSTM, optimizer_LSTM, loss, num_epoch, DEVICE)

Epoch: 1, loss: 91.8264, acc: 0.1105 | test loss: 18.2614, test acc: 0.1157 | 0.54 sec.
Epoch: 11, loss: 27.9808, acc: 0.7065 | test loss: 4.7486, test acc: 0.7518 | 0.59 sec.
Epoch: 21, loss: 0.5986, acc: 1.0000 | test loss: 0.1106, test acc: 1.0000 | 0.55 sec.
Epoch: 31, loss: 0.1813, acc: 1.0000 | test loss: 0.0350, test acc: 1.0000 | 0.55 sec.
Epoch: 41, loss: 0.0908, acc: 1.0000 | test loss: 0.0178, test acc: 1.0000 | 0.55 sec.
Epoch: 51, loss: 0.0543, acc: 1.0000 | test loss: 0.0107, test acc: 1.0000 | 0.56 sec.
Epoch: 61, loss: 0.0355, acc: 1.0000 | test loss: 0.0071, test acc: 1.0000 | 0.56 sec.
Epoch: 71, loss: 0.0246, acc: 1.0000 | test loss: 0.0049, test acc: 1.0000 | 0.56 sec.
Epoch: 81, loss: 0.0177, acc: 1.0000 | test loss: 0.0035, test acc: 1.0000 | 0.57 sec.
Epoch: 91, loss: 0.0131, acc: 1.0000 | test loss: 0.0026, test acc: 1.0000 | 0.57 sec.


In [15]:
x = torch.randint(0, 9, (10,)).to(DEVICE)
out = model_LSTM.forward(x).argmax(dim=1).view(-1)
y = digit_encoder(x).view(-1)

print(f'Test seq {x}\nPredict seq {out}\nReal seq {y}\nAcc {(out == y).sum()/len(out)}')

Test seq tensor([1, 1, 0, 7, 4, 8, 6, 7, 6, 5], device='cuda:0')
Predict seq tensor([1, 2, 1, 8, 5, 9, 7, 8, 7, 6], device='cuda:0')
Real seq tensor([1, 2, 1, 8, 5, 9, 7, 8, 7, 6], device='cuda:0')
Acc 1.0


GRU модель

In [16]:
class GRUModel(torch.nn.Module):

    def __init__(self):
        super().__init__()
        self.embed = torch.nn.Embedding(10, 10)
        self.gru = torch.nn.GRU(10, 128, batch_first=True)
        self.linear = torch.nn.Linear(128, 10)

    def forward(self, sentence, state=None):
        embed = self.embed(sentence)
        o, h = self.gru(embed)
        return self.linear(o)

In [17]:
model_GRU = GRUModel().to(DEVICE)
optimizer_GRU = torch.optim.Adam(model_GRU.parameters(), lr=lr)

In [18]:
train(train_ds, test_ds, model_GRU, optimizer_GRU, loss, num_epoch, DEVICE)

Epoch: 1, loss: 91.2936, acc: 0.1145 | test loss: 18.1154, test acc: 0.1180 | 0.27 sec.
Epoch: 11, loss: 49.8879, acc: 0.6558 | test loss: 6.8605, test acc: 0.8507 | 0.27 sec.
Epoch: 21, loss: 0.7635, acc: 1.0000 | test loss: 0.1389, test acc: 1.0000 | 0.32 sec.
Epoch: 31, loss: 0.2454, acc: 1.0000 | test loss: 0.0468, test acc: 1.0000 | 0.27 sec.
Epoch: 41, loss: 0.1237, acc: 1.0000 | test loss: 0.0239, test acc: 1.0000 | 0.26 sec.
Epoch: 51, loss: 0.0734, acc: 1.0000 | test loss: 0.0142, test acc: 1.0000 | 0.27 sec.
Epoch: 61, loss: 0.0479, acc: 1.0000 | test loss: 0.0093, test acc: 1.0000 | 0.26 sec.
Epoch: 71, loss: 0.0330, acc: 1.0000 | test loss: 0.0065, test acc: 1.0000 | 0.27 sec.
Epoch: 81, loss: 0.0238, acc: 1.0000 | test loss: 0.0046, test acc: 1.0000 | 0.27 sec.
Epoch: 91, loss: 0.0175, acc: 1.0000 | test loss: 0.0034, test acc: 1.0000 | 0.27 sec.


In [19]:
x = torch.randint(0, 9, (10,)).to(DEVICE)
out = model_GRU.forward(x).argmax(dim=1).view(-1)
y = digit_encoder(x).view(-1)

print(f'Test seq {x}\nPredict seq {out}\nReal seq {y}\nAcc {(out == y).sum()/len(out)}')

Test seq tensor([7, 1, 3, 8, 7, 3, 8, 8, 5, 7], device='cuda:0')
Predict seq tensor([7, 8, 0, 5, 4, 0, 5, 5, 2, 4], device='cuda:0')
Real seq tensor([7, 8, 0, 5, 4, 0, 5, 5, 2, 4], device='cuda:0')
Acc 1.0
