In [1]:
import torch
import random
import time
from torch import nn
from typing import List

### Функция генерации новой последовательности из исходной

In [2]:
def get_y_sequence(x: List[int]) -> List[int]:
    y = [0 for _ in x]
    y[0] = x[0]
    for i in range(1, len(x)):
        y[i] = x[i] + x[0]
        if y[i] >= 10:
            y[i] -= 10
    return y

In [3]:
x = [5, 2, 6, 7]
y = get_y_sequence(x)
y

[5, 7, 1, 2]

### Создание датасета

In [4]:
SEQUENCE_LEN = 15
DATASET_LEN = 10000
X = [[random.randint(0, 9) for i in range(SEQUENCE_LEN)] for _ in range(DATASET_LEN)]
Y = [get_y_sequence(x) for x in X]

In [5]:
X[:2]

[[6, 7, 2, 2, 1, 6, 1, 0, 1, 2, 8, 9, 4, 2, 4],
 [1, 6, 3, 8, 0, 8, 8, 3, 0, 6, 0, 2, 3, 5, 0]]

In [6]:
Y[:2]

[[6, 3, 8, 8, 7, 2, 7, 6, 7, 8, 4, 5, 0, 8, 0],
 [1, 7, 4, 9, 1, 9, 9, 4, 1, 7, 1, 3, 4, 6, 1]]

По-скольку значения уже типа int, дополнительных преобразований не требуется, можно сразу переводить в тензоры.

In [7]:
X_train = torch.tensor(X)
Y_train = torch.tensor(Y)

In [8]:
X_train[:2]

tensor([[6, 7, 2, 2, 1, 6, 1, 0, 1, 2, 8, 9, 4, 2, 4],
        [1, 6, 3, 8, 0, 8, 8, 3, 0, 6, 0, 2, 3, 5, 0]])

In [9]:
Y_train[:2]

tensor([[6, 3, 8, 8, 7, 2, 7, 6, 7, 8, 4, 5, 0, 8, 0],
        [1, 7, 4, 9, 1, 9, 9, 4, 1, 7, 1, 3, 4, 6, 1]])

### RNN

In [10]:
class Network(torch.nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.embedding = torch.nn.Embedding(10, 30)
        self.rnn = torch.nn.RNN(30, 128, batch_first=True)
        self.out = torch.nn.Linear(128, 10)

    def forward(self, sentences, state=None):
        x = self.embedding(sentences)
        x, s = self.rnn(x) # берём выход с последнего слоя для всех токенов, а не скрытое состояние
        return self.out(x)

In [11]:
model = Network()
device = "mps" if torch.backends.mps.is_available() else "cpu"
model.to(device)

Network(
  (embedding): Embedding(10, 30)
  (rnn): RNN(30, 128, batch_first=True)
  (out): Linear(in_features=128, out_features=10, bias=True)
)

In [129]:
criterion = torch.nn.CrossEntropyLoss()  # типичный лосс многоклассовой классификации
optimizer = torch.optim.SGD(model.parameters(), lr=.05)

In [133]:
for ep in range(100):
    start = time.time()
    train_loss = 0.
    train_passed = 0

    for i in range(int(len(X_train) / 100)):
        # берём батч в 100 элементов
        X_batch = X_train[i * 100:(i + 1) * 100].to(device) # Shape [100, 15]
        Y_batch = Y_train[i * 100:(i + 1) * 100].to(device) # Shape [100, 15]

        optimizer.zero_grad()
        answers = model.forward(X_batch)              # Shape: [100, 15, 10]
        answers_view = answers.view(-1, 10)           # Shape: [100, 15]
        Y_view = Y_batch.view(-1)                     # Shape: [1500]
        loss = criterion(answers.view(-1, 10), Y_batch.view(-1))
        train_loss += loss.item()

        loss.backward()
        optimizer.step()
        train_passed += 1

    print("Epoch {}. Time: {:.3f}, Train loss: {:.3f}".format(ep, time.time() - start, train_loss / train_passed))

Epoch 0. Time: 0.888, Train loss: 2.283
Epoch 1. Time: 0.789, Train loss: 2.282
Epoch 2. Time: 0.895, Train loss: 2.280
Epoch 3. Time: 0.808, Train loss: 2.278
Epoch 4. Time: 0.775, Train loss: 2.275
Epoch 5. Time: 0.774, Train loss: 2.270
Epoch 6. Time: 0.775, Train loss: 2.263
Epoch 7. Time: 0.777, Train loss: 2.253
Epoch 8. Time: 0.784, Train loss: 2.241
Epoch 9. Time: 0.778, Train loss: 2.220
Epoch 10. Time: 0.778, Train loss: 2.193
Epoch 11. Time: 0.775, Train loss: 2.167
Epoch 12. Time: 0.764, Train loss: 2.137
Epoch 13. Time: 0.819, Train loss: 2.107
Epoch 14. Time: 0.771, Train loss: 2.076
Epoch 15. Time: 0.768, Train loss: 2.045
Epoch 16. Time: 0.781, Train loss: 2.017
Epoch 17. Time: 0.769, Train loss: 1.994
Epoch 18. Time: 0.816, Train loss: 1.974
Epoch 19. Time: 0.805, Train loss: 1.958
Epoch 20. Time: 0.773, Train loss: 1.945
Epoch 21. Time: 0.775, Train loss: 1.933
Epoch 22. Time: 0.789, Train loss: 1.923
Epoch 23. Time: 0.786, Train loss: 1.915
Epoch 24. Time: 0.779, Tra

In [134]:
def generate_sentence(sentence):
    answers = model.forward(torch.tensor(sentence).to(device))
    probas, indices = answers.topk(1)
    return [ind.item() for ind in indices.flatten()]

In [135]:
TEST_DATASET_LEN = 10
X_test = [[random.randint(0, 9) for i in range(SEQUENCE_LEN)] for _ in range(TEST_DATASET_LEN)]
Y_test = [get_y_sequence(x) for x in X_test]

all = 0
right = 0

for x, y in zip(X_test, Y_test):
    all += 1
    answer = generate_sentence(x)
    print(x, answer, y)
    if answer == y:
        right += 1

print(f'Accuracy: {right/all:.3f}')

[5, 2, 9, 0, 8, 4, 0, 9, 3, 9, 1, 8, 6, 7, 9] [5, 7, 4, 5, 3, 9, 5, 4, 8, 4, 6, 3, 1, 2, 4] [5, 7, 4, 5, 3, 9, 5, 4, 8, 4, 6, 3, 1, 2, 4]
[7, 3, 3, 2, 8, 5, 5, 5, 4, 4, 6, 1, 3, 1, 9] [7, 0, 0, 9, 5, 2, 2, 2, 1, 1, 3, 8, 0, 8, 6] [7, 0, 0, 9, 5, 2, 2, 2, 1, 1, 3, 8, 0, 8, 6]
[0, 8, 8, 9, 5, 1, 5, 3, 3, 7, 2, 9, 6, 0, 4] [0, 8, 8, 9, 5, 1, 5, 3, 3, 7, 2, 9, 6, 0, 4] [0, 8, 8, 9, 5, 1, 5, 3, 3, 7, 2, 9, 6, 0, 4]
[3, 3, 8, 6, 9, 7, 5, 8, 0, 3, 4, 0, 6, 6, 7] [3, 6, 1, 9, 2, 0, 8, 1, 3, 6, 7, 3, 9, 9, 0] [3, 6, 1, 9, 2, 0, 8, 1, 3, 6, 7, 3, 9, 9, 0]
[2, 6, 6, 5, 8, 1, 6, 3, 0, 1, 2, 2, 5, 8, 1] [2, 8, 8, 7, 0, 3, 8, 5, 2, 3, 4, 4, 7, 0, 3] [2, 8, 8, 7, 0, 3, 8, 5, 2, 3, 4, 4, 7, 0, 3]
[2, 0, 2, 1, 4, 7, 6, 8, 6, 6, 4, 0, 8, 2, 6] [2, 2, 4, 3, 6, 9, 8, 0, 8, 8, 6, 2, 0, 4, 8] [2, 2, 4, 3, 6, 9, 8, 0, 8, 8, 6, 2, 0, 4, 8]
[2, 5, 5, 2, 7, 7, 0, 6, 3, 6, 4, 5, 5, 6, 5] [2, 7, 7, 4, 9, 9, 2, 8, 5, 8, 6, 7, 7, 8, 7] [2, 7, 7, 4, 9, 9, 2, 8, 5, 8, 6, 7, 7, 8, 7]
[7, 5, 3, 1, 4, 4, 2, 3, 3, 5, 2, 

RNN обучилась за 80 эпох до 100% точности.

### GRU

In [8]:
BATCH_SIZE=512
dataset = torch.utils.data.TensorDataset(X_train, Y_train)
data = torch.utils.data.DataLoader(dataset, BATCH_SIZE, shuffle=True)

In [9]:
class NeuralNetwork(nn.Module):
    def __init__(self, rnnClass, dictionary_size, embedding_size, num_hiddens, num_classes):
        super().__init__()
        
        self.num_hiddens = num_hiddens
        self.embedding = nn.Embedding(dictionary_size, embedding_size)
        self.hidden = rnnClass(embedding_size, num_hiddens, batch_first=True)
        self.output = nn.Linear(num_hiddens, num_classes)
        
    def forward(self, X):
        out = self.embedding(X)
        x, state = self.hidden(out)
        return self.output(x)

In [10]:
model = NeuralNetwork(nn.GRU, 10, 64, 128, 10)

In [11]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model.to(device)

NeuralNetwork(
  (embedding): Embedding(10, 64)
  (hidden): GRU(64, 128, batch_first=True)
  (output): Linear(in_features=128, out_features=10, bias=True)
)

In [12]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [16]:
for ep in range(10):
    start = time.time()
    train_loss = 0.
    train_passed = 0

    model.train()
    for X_b, y_b in data:
        y_b = y_b.float()       # Shape 512, 15
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer.zero_grad()
        answers = model(X_b)    # Shape 512, 15, 10
        loss = criterion(answers.view(-1, 10), y_b.view(-1) )
        train_loss += loss.item()

        loss.backward()
        optimizer.step()
        train_passed += 1

    print("Epoch {}. Time: {:.3f}, Train loss: {:.3f}".format(ep, time.time() - start, train_loss / train_passed))

Epoch 0. Time: 0.921, Train loss: 2.293
Epoch 1. Time: 0.306, Train loss: 2.275
Epoch 2. Time: 0.299, Train loss: 2.237
Epoch 3. Time: 0.294, Train loss: 2.149
Epoch 4. Time: 0.290, Train loss: 1.891
Epoch 5. Time: 0.297, Train loss: 1.334
Epoch 6. Time: 0.296, Train loss: 0.729
Epoch 7. Time: 0.297, Train loss: 0.335
Epoch 8. Time: 0.298, Train loss: 0.162
Epoch 9. Time: 0.294, Train loss: 0.093


In [17]:
def generate_sentence(sentence):
    answers = model.forward(torch.tensor(sentence).to(device))
    probas, indices = answers.topk(1)
    return [ind.item() for ind in indices.flatten()]

In [18]:
SEQUENCE_LEN = 10
TEST_DATASET_LEN = 10
X_test = [[random.randint(0, 9) for i in range(SEQUENCE_LEN)] for _ in range(TEST_DATASET_LEN)]
Y_test = [get_y_sequence(x) for x in X_test]

all = 0
right = 0

for x, y in zip(X_test, Y_test):
    all += 1
    answer = generate_sentence(x)
    print(x, answer, y)
    if answer == y:
        right += 1

print(f'Accuracy: {right/all:.3f}')

[0, 6, 8, 9, 9, 9, 0, 1, 7, 6] [0, 6, 8, 9, 9, 9, 0, 1, 7, 6] [0, 6, 8, 9, 9, 9, 0, 1, 7, 6]
[7, 5, 4, 1, 7, 7, 0, 8, 8, 9] [7, 2, 1, 8, 4, 4, 7, 5, 5, 6] [7, 2, 1, 8, 4, 4, 7, 5, 5, 6]
[3, 9, 2, 2, 0, 9, 8, 1, 3, 9] [3, 2, 5, 5, 3, 2, 1, 4, 6, 2] [3, 2, 5, 5, 3, 2, 1, 4, 6, 2]
[8, 3, 1, 5, 2, 9, 7, 4, 1, 1] [8, 1, 9, 3, 0, 7, 5, 2, 9, 9] [8, 1, 9, 3, 0, 7, 5, 2, 9, 9]
[6, 0, 4, 2, 7, 6, 6, 2, 5, 1] [6, 6, 0, 8, 3, 2, 2, 8, 1, 7] [6, 6, 0, 8, 3, 2, 2, 8, 1, 7]
[5, 4, 7, 7, 9, 2, 6, 0, 8, 1] [5, 9, 2, 2, 4, 7, 1, 5, 3, 6] [5, 9, 2, 2, 4, 7, 1, 5, 3, 6]
[2, 2, 3, 2, 5, 3, 0, 5, 3, 0] [2, 4, 5, 4, 7, 5, 2, 7, 5, 2] [2, 4, 5, 4, 7, 5, 2, 7, 5, 2]
[8, 6, 5, 3, 6, 9, 0, 3, 9, 8] [8, 4, 3, 1, 4, 7, 8, 1, 7, 6] [8, 4, 3, 1, 4, 7, 8, 1, 7, 6]
[9, 7, 5, 3, 9, 6, 6, 4, 4, 0] [9, 6, 4, 2, 8, 5, 5, 3, 3, 9] [9, 6, 4, 2, 8, 5, 5, 3, 3, 9]
[4, 9, 4, 2, 0, 9, 1, 9, 4, 8] [4, 3, 8, 6, 4, 3, 5, 3, 8, 2] [4, 3, 8, 6, 4, 3, 5, 3, 8, 2]
Accuracy: 1.000


GRU обучилась за 10 эпох до 100% точности.

### LSTM

In [21]:
model = NeuralNetwork(nn.LSTM, 10, 64, 128, 10)

In [22]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model.to(device)

NeuralNetwork(
  (embedding): Embedding(10, 64)
  (hidden): LSTM(64, 128, batch_first=True)
  (output): Linear(in_features=128, out_features=10, bias=True)
)

In [28]:
for ep in range(50):
    start = time.time()
    train_loss = 0.
    train_passed = 0

    model.train()
    for X_b, y_b in data:
        y_b = y_b.float()       # Shape 512, 15
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer.zero_grad()
        answers = model(X_b)    # Shape 512, 15, 10
        loss = criterion(answers.view(-1, 10), y_b.view(-1) )
        train_loss += loss.item()

        loss.backward()
        optimizer.step()
        train_passed += 1

    print("Epoch {}. Time: {:.3f}, Train loss: {:.3f}".format(ep, time.time() - start, train_loss / train_passed))

Epoch 0. Time: 0.488, Train loss: 2.306
Epoch 1. Time: 0.182, Train loss: 2.306
Epoch 2. Time: 0.185, Train loss: 2.306
Epoch 3. Time: 0.183, Train loss: 2.307
Epoch 4. Time: 0.191, Train loss: 2.306
Epoch 5. Time: 0.203, Train loss: 2.306
Epoch 6. Time: 0.186, Train loss: 2.306
Epoch 7. Time: 0.186, Train loss: 2.307
Epoch 8. Time: 0.261, Train loss: 2.306
Epoch 9. Time: 0.184, Train loss: 2.306
Epoch 10. Time: 0.181, Train loss: 2.306
Epoch 11. Time: 0.182, Train loss: 2.306
Epoch 12. Time: 0.207, Train loss: 2.306
Epoch 13. Time: 0.186, Train loss: 2.306
Epoch 14. Time: 0.184, Train loss: 2.306
Epoch 15. Time: 0.183, Train loss: 2.306
Epoch 16. Time: 0.185, Train loss: 2.306
Epoch 17. Time: 0.181, Train loss: 2.306
Epoch 18. Time: 0.182, Train loss: 2.306
Epoch 19. Time: 0.182, Train loss: 2.306
Epoch 20. Time: 0.206, Train loss: 2.307
Epoch 21. Time: 0.182, Train loss: 2.306
Epoch 22. Time: 0.182, Train loss: 2.306
Epoch 23. Time: 0.182, Train loss: 2.306
Epoch 24. Time: 0.183, Tra

In [24]:
def generate_sentence(sentence):
    answers = model.forward(torch.tensor(sentence).to(device))
    probas, indices = answers.topk(1)
    return [ind.item() for ind in indices.flatten()]

In [27]:
SEQUENCE_LEN = 10
TEST_DATASET_LEN = 10
X_test = [[random.randint(0, 9) for i in range(SEQUENCE_LEN)] for _ in range(TEST_DATASET_LEN)]
Y_test = [get_y_sequence(x) for x in X_test]

all = 0
right = 0

for x, y in zip(X_test, Y_test):
    all += 1
    answer = generate_sentence(x)
    print(x, answer, y)
    if answer == y:
        right += 1

print(f'Accuracy: {right/all:.3f}')

[0, 6, 6, 5, 0, 5, 1, 6, 9, 4] [3, 0, 0, 5, 5, 5, 5, 0, 0, 5] [0, 6, 6, 5, 0, 5, 1, 6, 9, 4]
[1, 2, 9, 1, 0, 9, 0, 9, 0, 0] [2, 5, 0, 0, 2, 0, 3, 0, 3, 3] [1, 3, 0, 2, 1, 0, 1, 0, 1, 1]
[0, 3, 9, 3, 6, 1, 6, 8, 7, 8] [3, 0, 0, 0, 0, 0, 0, 2, 0, 0] [0, 3, 9, 3, 6, 1, 6, 8, 7, 8]
[0, 8, 8, 6, 9, 6, 6, 6, 0, 2] [3, 2, 0, 0, 0, 0, 0, 0, 3, 3] [0, 8, 8, 6, 9, 6, 6, 6, 0, 2]
[9, 6, 0, 6, 7, 2, 8, 1, 5, 7] [0, 0, 3, 0, 0, 3, 0, 5, 5, 0] [9, 5, 9, 5, 6, 1, 7, 0, 4, 6]
[4, 4, 0, 4, 5, 7, 3, 5, 1, 1] [5, 5, 5, 5, 5, 0, 0, 5, 5, 5] [4, 8, 4, 8, 9, 1, 7, 9, 5, 5]
[5, 6, 9, 8, 3, 1, 1, 2, 1, 8] [5, 0, 0, 0, 0, 0, 5, 5, 5, 2] [5, 1, 4, 3, 8, 6, 6, 7, 6, 3]
[9, 6, 6, 2, 7, 0, 0, 5, 6, 6] [0, 0, 0, 0, 0, 3, 3, 5, 2, 0] [9, 5, 5, 1, 6, 9, 9, 4, 5, 5]
[4, 3, 1, 7, 4, 3, 2, 6, 2, 6] [5, 0, 5, 0, 5, 0, 3, 0, 3, 0] [4, 7, 5, 1, 8, 7, 6, 0, 6, 0]
[7, 4, 3, 3, 3, 5, 6, 5, 7, 7] [0, 5, 0, 0, 0, 5, 0, 5, 0, 0] [7, 1, 0, 0, 0, 2, 3, 2, 4, 4]
Accuracy: 0.000


LSTM по непонятной причине не обучается совсем.