In [414]:
import pandas as pd
import time
import torch
from torch import nn
import re
import numpy as np
import tqdm

## Делаем массив с данными

In [415]:
X = np.random.randint(10, size=(500, 50))
X[0, : 10], X[1, : 10], X[2, : 10]

(array([6, 2, 0, 8, 2, 7, 9, 8, 4, 2]),
 array([6, 8, 1, 7, 5, 7, 2, 2, 2, 3]),
 array([0, 5, 8, 0, 7, 3, 8, 0, 5, 4]))

In [416]:
Y = X.copy()
Y_ = Y[:, 1:]
Y_ += (np.full((X.shape[1] - 1, X.shape[0]), X[:, 0])).T

Y = np.where(Y >= 10, Y - 10, Y)

Y[0, : 10], Y[1, : 10], Y[2, : 10]

(array([6, 8, 6, 4, 8, 3, 5, 4, 0, 8]),
 array([6, 4, 7, 3, 1, 3, 8, 8, 8, 9]),
 array([0, 5, 8, 0, 7, 3, 8, 0, 5, 4]))

In [417]:
X = torch.tensor(X)
Y = torch.tensor(Y)#.astype(np.float32))

In [418]:
X, Y

(tensor([[6, 2, 0,  ..., 2, 4, 6],
         [6, 8, 1,  ..., 5, 6, 3],
         [0, 5, 8,  ..., 6, 7, 9],
         ...,
         [5, 4, 1,  ..., 8, 7, 3],
         [4, 4, 9,  ..., 3, 1, 7],
         [1, 7, 3,  ..., 7, 8, 7]]), tensor([[6, 8, 6,  ..., 8, 0, 2],
         [6, 4, 7,  ..., 1, 2, 9],
         [0, 5, 8,  ..., 6, 7, 9],
         ...,
         [5, 9, 6,  ..., 3, 2, 8],
         [4, 8, 3,  ..., 7, 5, 1],
         [1, 8, 4,  ..., 8, 9, 8]]))

In [419]:
BATCH_SIZE=512
dataset = torch.utils.data.TensorDataset(X, Y)
data = torch.utils.data.DataLoader(dataset, BATCH_SIZE, shuffle=True)

**Модель RNN.**

In [420]:
class Network(torch.nn.Module):
    def __init__(self, rnnClass, dictionary_size, embedding_size, num_hiddens, num_classes):
        super(Network, self).__init__()
        self.embed = nn.Embedding(dictionary_size, embedding_size)
        self.hidden = rnnClass(embedding_size, num_hiddens, batch_first=True)
        self.linear = nn.Linear(num_hiddens, num_classes)
        
    def forward(self, sentences, state=None):
        embed = self.embed(sentences)
        o, s = self.hidden(embed)
        out = self.linear(o)
        return out

In [421]:
model = Network(nn.RNN, 10, 64, 128, 10)
model = model.cuda()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

**Обучаем модель.**

In [422]:
for ep in range(200):
    start = time.time()
    train_loss = 0.
    train_passed = 0

    for i in range(int(len(X) / 20)):
        X_batch = (X[i * 20:(i + 1) * 20]).cuda()
        Y_batch = (Y[i * 20:(i + 1) * 20].flatten()).cuda()

        optimizer.zero_grad()
        answers = model(X_batch)
        answers = answers.view(-1, 10)
        loss = criterion(answers, Y_batch)
        train_loss += loss.item()

        loss.backward()
        optimizer.step()
        train_passed += 1 

    if ep%5 == 0: 
        print("\nEpoch {}. Time: {:.3f}, Train loss: {:.3f}".format(ep, time.time() - start, train_loss / train_passed))
    else:
        print(f"\rEpoch {ep}, loss: {train_loss / train_passed:.3f}", end='') 


Epoch 0. Time: 0.062, Train loss: 2.355
Epoch 4, loss: 1.950
Epoch 5. Time: 0.058, Train loss: 1.825
Epoch 9, loss: 1.690
Epoch 10. Time: 0.058, Train loss: 1.681
Epoch 14, loss: 1.675
Epoch 15. Time: 0.056, Train loss: 1.664
Epoch 19, loss: 1.460
Epoch 20. Time: 0.060, Train loss: 1.365
Epoch 24, loss: 1.304
Epoch 25. Time: 0.060, Train loss: 1.543
Epoch 29, loss: 1.205
Epoch 30. Time: 0.058, Train loss: 1.209
Epoch 34, loss: 1.099
Epoch 35. Time: 0.061, Train loss: 1.088
Epoch 39, loss: 1.153
Epoch 40. Time: 0.058, Train loss: 1.160
Epoch 44, loss: 0.902
Epoch 45. Time: 0.059, Train loss: 0.955
Epoch 49, loss: 0.867
Epoch 50. Time: 0.057, Train loss: 0.858
Epoch 54, loss: 0.879
Epoch 55. Time: 0.061, Train loss: 0.866
Epoch 59, loss: 0.881
Epoch 60. Time: 0.059, Train loss: 0.853
Epoch 64, loss: 0.670
Epoch 65. Time: 0.057, Train loss: 0.820
Epoch 69, loss: 0.650
Epoch 70. Time: 0.058, Train loss: 1.175
Epoch 74, loss: 0.813
Epoch 75. Time: 0.068, Train loss: 0.818
Epoch 79, loss: 1

**Модель GRU.**

In [433]:
model = Network(nn.GRU, 10, 64, 128, 10)
model = model.cuda()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

In [434]:
for ep in range(100):
    start = time.time()
    train_loss = 0.
    train_passed = 0

    for i in range(int(len(X) / 20)):
        X_batch = (X[i * 20:(i + 1) * 20]).cuda()
        Y_batch = (Y[i * 20:(i + 1) * 20].flatten()).cuda()

        optimizer.zero_grad()
        answers = model(X_batch)
        answers = answers.view(-1, 10)
        loss = criterion(answers, Y_batch)
        train_loss += loss.item()

        loss.backward()
        optimizer.step()
        train_passed += 1 

    if ep%5 == 0: 
        print("\nEpoch {}. Time: {:.3f}, Train loss: {:.3f}".format(ep, time.time() - start, train_loss / train_passed))
    else:
        print(f"\rEpoch {ep}, loss: {train_loss / train_passed:.3f}", end='') 


Epoch 0. Time: 0.073, Train loss: 2.308
Epoch 4, loss: 2.275
Epoch 5. Time: 0.064, Train loss: 2.242
Epoch 9, loss: 1.383
Epoch 10. Time: 0.064, Train loss: 0.845
Epoch 14, loss: 0.080
Epoch 15. Time: 0.068, Train loss: 0.057
Epoch 19, loss: 0.024
Epoch 20. Time: 0.064, Train loss: 0.020
Epoch 24, loss: 0.013
Epoch 25. Time: 0.060, Train loss: 0.012
Epoch 29, loss: 0.008
Epoch 30. Time: 0.062, Train loss: 0.008
Epoch 34, loss: 0.006
Epoch 35. Time: 0.061, Train loss: 0.006
Epoch 39, loss: 0.004
Epoch 40. Time: 0.065, Train loss: 0.004
Epoch 44, loss: 0.004
Epoch 45. Time: 0.060, Train loss: 0.003
Epoch 49, loss: 0.003
Epoch 50. Time: 0.063, Train loss: 0.003
Epoch 54, loss: 0.002
Epoch 55. Time: 0.059, Train loss: 0.002
Epoch 59, loss: 0.002
Epoch 60. Time: 0.068, Train loss: 0.002
Epoch 64, loss: 0.002
Epoch 65. Time: 0.067, Train loss: 0.002
Epoch 69, loss: 0.001
Epoch 70. Time: 0.060, Train loss: 0.001
Epoch 74, loss: 0.001
Epoch 75. Time: 0.059, Train loss: 0.001
Epoch 79, loss: 0

**Модель LSTM.**

In [436]:
model = Network(nn.LSTM, 10, 64, 128, 10)
model = model.cuda()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

In [437]:
for ep in range(100):
    start = time.time()
    train_loss = 0.
    train_passed = 0

    for i in range(int(len(X) / 20)):
        X_batch = (X[i * 20:(i + 1) * 20]).cuda()
        Y_batch = (Y[i * 20:(i + 1) * 20].flatten()).cuda()

        optimizer.zero_grad()
        answers = model(X_batch)
        answers = answers.view(-1, 10)
        loss = criterion(answers, Y_batch)
        train_loss += loss.item()

        loss.backward()
        optimizer.step()
        train_passed += 1 

    if ep%5 == 0: 
        print("\nEpoch {}. Time: {:.3f}, Train loss: {:.3f}".format(ep, time.time() - start, train_loss / train_passed))
    else:
        print(f"\rEpoch {ep}, loss: {train_loss / train_passed:.3f}", end='') 


Epoch 0. Time: 0.074, Train loss: 2.303
Epoch 4, loss: 2.268
Epoch 5. Time: 0.063, Train loss: 2.171
Epoch 9, loss: 1.252
Epoch 10. Time: 0.061, Train loss: 1.038
Epoch 14, loss: 0.597
Epoch 15. Time: 0.062, Train loss: 0.493
Epoch 19, loss: 0.182
Epoch 20. Time: 0.062, Train loss: 0.128
Epoch 24, loss: 0.045
Epoch 25. Time: 0.067, Train loss: 0.037
Epoch 29, loss: 0.021
Epoch 30. Time: 0.065, Train loss: 0.019
Epoch 34, loss: 0.013
Epoch 35. Time: 0.064, Train loss: 0.012
Epoch 39, loss: 0.009
Epoch 40. Time: 0.067, Train loss: 0.008
Epoch 44, loss: 0.007
Epoch 45. Time: 0.062, Train loss: 0.006
Epoch 49, loss: 0.005
Epoch 50. Time: 0.063, Train loss: 0.005
Epoch 54, loss: 0.004
Epoch 55. Time: 0.070, Train loss: 0.004
Epoch 59, loss: 0.003
Epoch 60. Time: 0.061, Train loss: 0.003
Epoch 64, loss: 0.003
Epoch 65. Time: 0.059, Train loss: 0.003
Epoch 69, loss: 0.002
Epoch 70. Time: 0.065, Train loss: 0.002
Epoch 74, loss: 0.002
Epoch 75. Time: 0.070, Train loss: 0.002
Epoch 79, loss: 0

**Проверка модели.**

In [476]:
def predict_digits(x):
    ret = []
        
    o = model(x.cuda())

    for i in range(len(x)):
        d = torch.argmax(o[i, :], keepdim=True).cpu().numpy()[0]

        ret.append(d)
        
    return np.array(ret)

In [477]:
predict_digits(X[7])

array([1, 0, 8, 9, 6, 7, 8, 8, 1, 4, 2, 3, 1, 7, 6, 9, 2, 2, 6, 1, 4, 4,
       5, 6, 1, 3, 9, 9, 1, 4, 1, 0, 3, 1, 3, 4, 2, 1, 4, 4, 7, 6, 0, 7,
       6, 4, 5, 0, 1, 1])

In [478]:
Y[7]

tensor([1, 0, 8, 9, 6, 7, 8, 8, 1, 4, 2, 3, 1, 7, 6, 9, 2, 2, 6, 1, 4, 4, 5, 6,
        1, 3, 9, 9, 1, 4, 1, 0, 3, 1, 3, 4, 2, 1, 4, 4, 7, 6, 0, 7, 6, 4, 5, 0,
        1, 1])

**Видно, что модель LSTM отлично обучена.**

Выводы: Модель GPU и LST обучаются идеально, до нулевого loss, а модель RNN имеет среднее качество, самый низкий loss = 0.732.
Для получения лучшего качества модели RNN требуется увеличить обучающую выборку.