In [41]:
import torch
import random
import time
from torch import nn
from typing import List

### Функция генерации новой последовательности из исходной

In [29]:
def get_y_sequence(x: List[int]) -> List[int]:
    y = [0 for _ in x]
    y[0] = x[0]
    for i in range(1, len(x)):
        y[i] = x[i] + x[0]
        if y[i] >= 10:
            y[i] -= 10
    return y

In [30]:
x = [5, 2, 6, 7]
y = get_y_sequence(x)
y

[5, 7, 1, 2]

### Создание датасета

In [106]:
SEQUENCE_LEN = 15
DATASET_LEN = 10000
X = [[random.randint(0, 9) for i in range(SEQUENCE_LEN)] for _ in range(DATASET_LEN)]
Y = [get_y_sequence(x) for x in X]

In [107]:
X[:2]

[[7, 6, 9, 5, 4, 3, 8, 9, 2, 2, 0, 0, 8, 1, 4],
 [0, 4, 4, 8, 8, 3, 2, 5, 2, 9, 4, 1, 6, 1, 8]]

In [108]:
Y[:2]

[[7, 3, 6, 2, 1, 0, 5, 6, 9, 9, 7, 7, 5, 8, 1],
 [0, 4, 4, 8, 8, 3, 2, 5, 2, 9, 4, 1, 6, 1, 8]]

По-скольку значения уже типа int, дополнительных преобразований не требуется, можно сразу переводить в тензоры.

In [109]:
X_train = torch.tensor(X)
Y_train = torch.tensor(Y)

In [110]:
X_train[:2]

tensor([[7, 6, 9, 5, 4, 3, 8, 9, 2, 2, 0, 0, 8, 1, 4],
        [0, 4, 4, 8, 8, 3, 2, 5, 2, 9, 4, 1, 6, 1, 8]])

In [111]:
Y_train[:2]

tensor([[7, 3, 6, 2, 1, 0, 5, 6, 9, 9, 7, 7, 5, 8, 1],
        [0, 4, 4, 8, 8, 3, 2, 5, 2, 9, 4, 1, 6, 1, 8]])

### RNN

In [127]:
class Network(torch.nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.embedding = torch.nn.Embedding(10, 30)
        self.rnn = torch.nn.RNN(30, 128, batch_first=True)
        self.out = torch.nn.Linear(128, 10)

    def forward(self, sentences, state=None):
        x = self.embedding(sentences)
        x, s = self.rnn(x) # берём выход с последнего слоя для всех токенов, а не скрытое состояние
        return self.out(x)

In [128]:
model = Network()
device = "mps" if torch.backends.mps.is_available() else "cpu"
model.to(device)

Network(
  (embedding): Embedding(10, 30)
  (rnn): RNN(30, 128, batch_first=True)
  (out): Linear(in_features=128, out_features=10, bias=True)
)

In [129]:
criterion = torch.nn.CrossEntropyLoss()  # типичный лосс многоклассовой классификации
optimizer = torch.optim.SGD(model.parameters(), lr=.05)

In [133]:
for ep in range(100):
    start = time.time()
    train_loss = 0.
    train_passed = 0

    for i in range(int(len(X_train) / 100)):
        # берём батч в 100 элементов
        X_batch = X_train[i * 100:(i + 1) * 100].to(device) # Shape [100, 15]
        Y_batch = Y_train[i * 100:(i + 1) * 100].to(device) # Shape [100, 15]

        optimizer.zero_grad()
        answers = model.forward(X_batch)              # Shape: [100, 15, 10]
        answers_view = answers.view(-1, 10)           # Shape: [100, 15]
        Y_view = Y_batch.view(-1)                     # Shape: [1500]
        loss = criterion(answers.view(-1, 10), Y_batch.view(-1))
        train_loss += loss.item()

        loss.backward()
        optimizer.step()
        train_passed += 1

    print("Epoch {}. Time: {:.3f}, Train loss: {:.3f}".format(ep, time.time() - start, train_loss / train_passed))

Epoch 0. Time: 0.888, Train loss: 2.283
Epoch 1. Time: 0.789, Train loss: 2.282
Epoch 2. Time: 0.895, Train loss: 2.280
Epoch 3. Time: 0.808, Train loss: 2.278
Epoch 4. Time: 0.775, Train loss: 2.275
Epoch 5. Time: 0.774, Train loss: 2.270
Epoch 6. Time: 0.775, Train loss: 2.263
Epoch 7. Time: 0.777, Train loss: 2.253
Epoch 8. Time: 0.784, Train loss: 2.241
Epoch 9. Time: 0.778, Train loss: 2.220
Epoch 10. Time: 0.778, Train loss: 2.193
Epoch 11. Time: 0.775, Train loss: 2.167
Epoch 12. Time: 0.764, Train loss: 2.137
Epoch 13. Time: 0.819, Train loss: 2.107
Epoch 14. Time: 0.771, Train loss: 2.076
Epoch 15. Time: 0.768, Train loss: 2.045
Epoch 16. Time: 0.781, Train loss: 2.017
Epoch 17. Time: 0.769, Train loss: 1.994
Epoch 18. Time: 0.816, Train loss: 1.974
Epoch 19. Time: 0.805, Train loss: 1.958
Epoch 20. Time: 0.773, Train loss: 1.945
Epoch 21. Time: 0.775, Train loss: 1.933
Epoch 22. Time: 0.789, Train loss: 1.923
Epoch 23. Time: 0.786, Train loss: 1.915
Epoch 24. Time: 0.779, Tra

In [134]:
def generate_sentence(sentence):
    answers = model.forward(torch.tensor(sentence).to(device))
    probas, indices = answers.topk(1)
    return [ind.item() for ind in indices.flatten()]

In [135]:
TEST_DATASET_LEN = 10
X_test = [[random.randint(0, 9) for i in range(SEQUENCE_LEN)] for _ in range(TEST_DATASET_LEN)]
Y_test = [get_y_sequence(x) for x in X_test]

all = 0
right = 0

for x, y in zip(X_test, Y_test):
    all += 1
    answer = generate_sentence(x)
    print(x, answer, y)
    if answer == y:
        right += 1

print(f'Accuracy: {right/all:.3f}')

[5, 2, 9, 0, 8, 4, 0, 9, 3, 9, 1, 8, 6, 7, 9] [5, 7, 4, 5, 3, 9, 5, 4, 8, 4, 6, 3, 1, 2, 4] [5, 7, 4, 5, 3, 9, 5, 4, 8, 4, 6, 3, 1, 2, 4]
[7, 3, 3, 2, 8, 5, 5, 5, 4, 4, 6, 1, 3, 1, 9] [7, 0, 0, 9, 5, 2, 2, 2, 1, 1, 3, 8, 0, 8, 6] [7, 0, 0, 9, 5, 2, 2, 2, 1, 1, 3, 8, 0, 8, 6]
[0, 8, 8, 9, 5, 1, 5, 3, 3, 7, 2, 9, 6, 0, 4] [0, 8, 8, 9, 5, 1, 5, 3, 3, 7, 2, 9, 6, 0, 4] [0, 8, 8, 9, 5, 1, 5, 3, 3, 7, 2, 9, 6, 0, 4]
[3, 3, 8, 6, 9, 7, 5, 8, 0, 3, 4, 0, 6, 6, 7] [3, 6, 1, 9, 2, 0, 8, 1, 3, 6, 7, 3, 9, 9, 0] [3, 6, 1, 9, 2, 0, 8, 1, 3, 6, 7, 3, 9, 9, 0]
[2, 6, 6, 5, 8, 1, 6, 3, 0, 1, 2, 2, 5, 8, 1] [2, 8, 8, 7, 0, 3, 8, 5, 2, 3, 4, 4, 7, 0, 3] [2, 8, 8, 7, 0, 3, 8, 5, 2, 3, 4, 4, 7, 0, 3]
[2, 0, 2, 1, 4, 7, 6, 8, 6, 6, 4, 0, 8, 2, 6] [2, 2, 4, 3, 6, 9, 8, 0, 8, 8, 6, 2, 0, 4, 8] [2, 2, 4, 3, 6, 9, 8, 0, 8, 8, 6, 2, 0, 4, 8]
[2, 5, 5, 2, 7, 7, 0, 6, 3, 6, 4, 5, 5, 6, 5] [2, 7, 7, 4, 9, 9, 2, 8, 5, 8, 6, 7, 7, 8, 7] [2, 7, 7, 4, 9, 9, 2, 8, 5, 8, 6, 7, 7, 8, 7]
[7, 5, 3, 1, 4, 4, 2, 3, 3, 5, 2, 

RNN обучилась за 80 эпох до 100% точности.

### GRU

In [136]:
BATCH_SIZE=512
dataset = torch.utils.data.TensorDataset(X_train, Y_train)
data = torch.utils.data.DataLoader(dataset, BATCH_SIZE, shuffle=True)

In [137]:
class NeuralNetwork(nn.Module):
    def __init__(self, rnnClass, dictionary_size, embedding_size, num_hiddens, num_classes):
        super().__init__()
        
        self.num_hiddens = num_hiddens
        self.embedding = nn.Embedding(dictionary_size, embedding_size)
        self.hidden = rnnClass(embedding_size, num_hiddens, batch_first=True)
        self.output = nn.Linear(num_hiddens, num_classes)
        
    def forward(self, X):
        out = self.embedding(X)
        _, state = self.hidden(out)
        predictions = self.output(state[0])#.squeeze())
        return predictions

In [138]:
model = NeuralNetwork(nn.GRU, 10, 64, 128, 10)

In [139]:
model.to(device)

NeuralNetwork(
  (embedding): Embedding(10, 64)
  (hidden): GRU(64, 128, batch_first=True)
  (output): Linear(in_features=128, out_features=10, bias=True)
)

In [None]:
# def sample(preds):
#     softmaxed = torch.softmax(preds, 0)
#     probas = torch.distributions.multinomial.Multinomial(1, softmaxed).sample()
#     return probas.argmax()

# def generate_sequence():
#     generated = [random.randint(0, 9) for i in range(SEQUENCE_LEN)]

#     for i in range(MAX_LEN):
#         x_pred = torch.zeros((1, MAX_LEN), dtype=int)
#         for t, char in enumerate(generated[-MAX_LEN:]):
#             x_pred[0, t] = CHAR_TO_INDEX[char]

#         preds = model.forward(x_pred.to(device))[0].to('cpu')
#         next_char = INDEX_TO_CHAR[sample(preds)]
#         generated = generated + next_char

#     print(generated[:MAX_LEN] + '|' + generated[MAX_LEN:])

In [140]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [143]:
for ep in range(10):
    start = time.time()
    train_loss = 0.
    train_passed = 0

    model.train()
    for X_b, y_b in data:
        y_b = y_b.float()       # Shape 512, 15
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer.zero_grad()
        answers = model(X_b)    # Shape 512, 10
        loss = criterion(answers, y_b)
        train_loss += loss.item()

        loss.backward()
        optimizer.step()
        train_passed += 1

    print("Epoch {}. Time: {:.3f}, Train loss: {:.3f}".format(ep, time.time() - start, train_loss / train_passed))

RuntimeError: 0D or 1D target tensor expected, multi-target not supported

In [98]:
def generate_sentence(sentence):
    answers = model.forward(torch.tensor(sentence).to(device))
    probas, indices = answers.topk(1)
    return [ind.item() for ind in indices.flatten()]
    # return [ind.item() for ind in answers]

In [99]:
SEQUENCE_LEN = 10
TEST_DATASET_LEN = 10
X_test = [[random.randint(0, 9) for i in range(SEQUENCE_LEN)] for _ in range(TEST_DATASET_LEN)]
Y_test = [get_y_sequence(x) for x in X_test]

all = 0
right = 0

for x, y in zip(X_test, Y_test):
    all += 1
    answer = generate_sentence(x)
    print(x, answer, y)
    if answer == y:
        right += 1

print(f'Accuracy: {right/all:.3f}')

[5, 9, 3, 8, 9, 1, 3, 0, 6, 1] [0.07498680055141449, -0.0982784628868103, 0.4880581796169281, -0.4027999937534332, -0.14040526747703552, 0.32545006275177, 0.5708295702934265, 0.04787703603506088, -1.619030475616455, 0.22371217608451843] [5, 4, 8, 3, 4, 6, 8, 5, 1, 6]
[4, 9, 7, 6, 1, 9, 0, 1, 8, 8] [0.9444923996925354, 0.5247421860694885, -0.5133309960365295, -4.38349723815918, 0.9919009804725647, 0.6777141094207764, 0.9149038791656494, 1.0781443119049072, 0.1123666912317276, 0.21075138449668884] [4, 3, 1, 0, 5, 3, 4, 5, 2, 2]
[9, 6, 9, 0, 4, 4, 9, 5, 2, 7] [0.6963919997215271, 0.10238951444625854, 0.5821854472160339, 0.7114826440811157, -0.3683471381664276, -0.39141035079956055, 0.5761909484863281, -0.10388124734163284, -1.770476222038269, 0.3202802240848541] [9, 5, 8, 9, 3, 3, 8, 4, 1, 6]
[5, 6, 8, 6, 7, 3, 5, 6, 0, 2] [0.9244573712348938, -0.45975539088249207, 0.6095533967018127, -0.44103994965553284, 0.10744420439004898, 1.5253572463989258, -4.515803813934326, -0.5593655109405518, 0