### Original code from the artical

In [1]:
from random import randint
from numpy import array
from numpy import argmax

 
# generate a sequence of random numbers in [0, 99]
def generate_sequence(length=25):
    return [randint(0, 99) for _ in range(length)]
 
# one hot encode sequence
def one_hot_encode(sequence, n_unique=100):
    encoding = list()
    for value in sequence:
        vector = [0 for _ in range(n_unique)]
        vector[value] = 1
        encoding.append(vector)
    return array(encoding)
 
# decode a one hot encoded string
def one_hot_decode(encoded_seq):
    return [argmax(vector) for vector in encoded_seq]
 
# generate random sequence
sequence = generate_sequence()
print(sequence)
# one hot encode
encoded = one_hot_encode(sequence)
print(encoded)
# one hot decode
decoded = one_hot_decode(encoded)
print(decoded)

[15, 30, 85, 93, 59, 52, 98, 40, 47, 70, 66, 85, 81, 69, 3, 99, 41, 16, 87, 8, 24, 44, 79, 74, 73]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[15, 30, 85, 93, 59, 52, 98, 40, 47, 70, 66, 85, 81, 69, 3, 99, 41, 16, 87, 8, 24, 44, 79, 74, 73]


In [2]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense

# generate data for the lstm
def generate_data():
    # generate sequence
    sequence = generate_sequence()
    # one hot encode
    encoded = one_hot_encode(sequence)
    # convert to 3d for input
    X = encoded.reshape(encoded.shape[0], 1, encoded.shape[1])
    return X, encoded
 
# define model
model = Sequential()
model.add(LSTM(15, input_shape=(1, 100)))
model.add(Dense(100, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model
for i in range(500):
    X, y = generate_data()
    model.fit(X, y, epochs=1, batch_size=1, verbose=2)
# evaluate model on new data
X, y = generate_data()
yhat = model.predict(X)
print('Expected:  %s' % one_hot_decode(y))
print('Predicted: %s' % one_hot_decode(yhat))

25/25 - 1s - loss: 4.6023 - accuracy: 0.0800 - 734ms/epoch - 29ms/step
25/25 - 0s - loss: 4.6071 - accuracy: 0.0000e+00 - 18ms/epoch - 721us/step
25/25 - 0s - loss: 4.6053 - accuracy: 0.0000e+00 - 18ms/epoch - 721us/step
25/25 - 0s - loss: 4.5996 - accuracy: 0.0000e+00 - 18ms/epoch - 721us/step
25/25 - 0s - loss: 4.5982 - accuracy: 0.1200 - 18ms/epoch - 721us/step
25/25 - 0s - loss: 4.5860 - accuracy: 0.1600 - 17ms/epoch - 681us/step
25/25 - 0s - loss: 4.6157 - accuracy: 0.0000e+00 - 19ms/epoch - 761us/step
25/25 - 0s - loss: 4.6036 - accuracy: 0.0800 - 19ms/epoch - 761us/step
25/25 - 0s - loss: 4.6144 - accuracy: 0.0800 - 18ms/epoch - 721us/step
25/25 - 0s - loss: 4.5838 - accuracy: 0.0400 - 18ms/epoch - 721us/step
25/25 - 0s - loss: 4.5672 - accuracy: 0.0400 - 18ms/epoch - 721us/step
25/25 - 0s - loss: 4.5870 - accuracy: 0.0000e+00 - 17ms/epoch - 681us/step
25/25 - 0s - loss: 4.5705 - accuracy: 0.0400 - 18ms/epoch - 721us/step
25/25 - 0s - loss: 4.5594 - accuracy: 0.0000e+00 - 18ms/e

### PyTorch

In [3]:
from torch import nn
import torch

from sklearn.metrics import accuracy_score

In [4]:
class CustomLSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(CustomLSTM, self).__init__()
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) 
        
        self.fc = nn.Linear(hidden_size, num_classes) 
    
    def forward(self,x):
        output, (hn, cn) = self.lstm(x)
        hn = hn.view(-1, self.hidden_size) 
        out = self.fc(hn) 
        return out

In [5]:
lstm_net = CustomLSTM(num_classes=100, input_size=100, hidden_size=15, num_layers=1)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm_net.parameters(), lr = 1e-3)
n_epochs = 700

In [6]:
for epoch in range(n_epochs):
    X, y = generate_data()
    X = torch.FloatTensor(X)
    y = torch.FloatTensor(y)
    
    optimizer.zero_grad() # Устанавливает градиенты всех оптимизированных равными нулю.
    output = lstm_net.forward(X)
            
    _, predicted = torch.max(output.detach(), 1)
    loss = criterion(output, y)
    
    loss.backward() # Вычисляет градиент текущего тензора
    optimizer.step() #Обновляем параметры оптимайзера
    
    if epoch % 20 == 0: # Каждую 20 эпоху вычиcляем accuracy
        accuracy_train = accuracy_score(predicted, one_hot_decode(y))
    
        print(f'epoch: {epoch}, accuracy: {accuracy_train}')

epoch: 0, accuracy: 0.04
epoch: 20, accuracy: 0.0
epoch: 40, accuracy: 0.0
epoch: 60, accuracy: 0.0
epoch: 80, accuracy: 0.08
epoch: 100, accuracy: 0.0
epoch: 120, accuracy: 0.08
epoch: 140, accuracy: 0.04
epoch: 160, accuracy: 0.12
epoch: 180, accuracy: 0.08
epoch: 200, accuracy: 0.08
epoch: 220, accuracy: 0.0
epoch: 240, accuracy: 0.16
epoch: 260, accuracy: 0.24
epoch: 280, accuracy: 0.36
epoch: 300, accuracy: 0.48
epoch: 320, accuracy: 0.6
epoch: 340, accuracy: 0.76
epoch: 360, accuracy: 0.6
epoch: 380, accuracy: 0.64
epoch: 400, accuracy: 0.68
epoch: 420, accuracy: 0.88
epoch: 440, accuracy: 0.92
epoch: 460, accuracy: 0.96
epoch: 480, accuracy: 0.96
epoch: 500, accuracy: 0.92
epoch: 520, accuracy: 1.0
epoch: 540, accuracy: 1.0
epoch: 560, accuracy: 1.0
epoch: 580, accuracy: 0.92
epoch: 600, accuracy: 1.0
epoch: 620, accuracy: 1.0
epoch: 640, accuracy: 1.0
epoch: 660, accuracy: 1.0
epoch: 680, accuracy: 1.0


In [7]:
with torch.no_grad(): #отключает градиенты
    X, y = generate_data()
    X = torch.FloatTensor(X)
    output = lstm_net.forward(X)
    _, yhat = torch.max(output.detach(), 1)
    print('Expected:  %s' % one_hot_decode(y))
    print('Predicted: %s' % yhat.tolist())

Expected:  [54, 82, 83, 55, 15, 71, 67, 31, 51, 27, 90, 18, 52, 5, 38, 35, 12, 86, 76, 6, 23, 34, 8, 12, 79]
Predicted: [54, 82, 83, 55, 15, 71, 67, 31, 51, 27, 90, 18, 52, 5, 38, 35, 12, 86, 76, 6, 23, 34, 8, 12, 79]


### Second task

In [8]:

from random import randint
from numpy import array
from numpy import argmax
from pandas import concat
from pandas import DataFrame
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
 
# generate a sequence of random numbers in [0, 99]
def generate_sequence(length=25):
    return [randint(0, 99) for _ in range(length)]
 
# one hot encode sequence
def one_hot_encode(sequence, n_unique=100):
    encoding = list()
    for value in sequence:
        vector = [0 for _ in range(n_unique)]
        vector[value] = 1
        encoding.append(vector)
    return array(encoding)
 
# decode a one hot encoded string
def one_hot_decode(encoded_seq):
    return [argmax(vector) for vector in encoded_seq]
 
# generate data for the lstm
def generate_data():
    # generate sequence
    sequence = generate_sequence()
    # one hot encode
    encoded = one_hot_encode(sequence)
    # create lag inputs
    df = DataFrame(encoded)
    df = concat([df.shift(4), df.shift(3), df.shift(2), df.shift(1), df], axis=1)
    # remove non-viable rows
    values = df.values
    values = values[5:,:]
    # convert to 3d for input
    X = values.reshape(len(values), 5, 100)
    # drop last value from y
    y = encoded[4:-1,:]
    return X, y
 
# define model
model = Sequential()
model.add(LSTM(50, batch_input_shape=(5, 5, 100), stateful=True))
model.add(Dense(100, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model
for i in range(2000):
    X, y = generate_data()
    model.fit(X, y, epochs=1, batch_size=5, verbose=2, shuffle=False)
    model.reset_states()
# evaluate model on new data
X, y = generate_data()
yhat = model.predict(X, batch_size=5)
print('Expected:  %s' % one_hot_decode(y))
print('Predicted: %s' % one_hot_decode(yhat))

4/4 - 1s - loss: 4.5944 - accuracy: 0.0000e+00 - 690ms/epoch - 172ms/step
4/4 - 0s - loss: 4.6144 - accuracy: 0.0000e+00 - 5ms/epoch - 1ms/step
4/4 - 0s - loss: 4.6012 - accuracy: 0.1000 - 6ms/epoch - 2ms/step
4/4 - 0s - loss: 4.6120 - accuracy: 0.0000e+00 - 5ms/epoch - 1ms/step
4/4 - 0s - loss: 4.6108 - accuracy: 0.0000e+00 - 6ms/epoch - 2ms/step
4/4 - 0s - loss: 4.6124 - accuracy: 0.0500 - 5ms/epoch - 1ms/step
4/4 - 0s - loss: 4.5926 - accuracy: 0.1000 - 5ms/epoch - 1ms/step
4/4 - 0s - loss: 4.6006 - accuracy: 0.0000e+00 - 5ms/epoch - 1ms/step
4/4 - 0s - loss: 4.6051 - accuracy: 0.0000e+00 - 5ms/epoch - 1ms/step
4/4 - 0s - loss: 4.6010 - accuracy: 0.0000e+00 - 5ms/epoch - 1ms/step
4/4 - 0s - loss: 4.6002 - accuracy: 0.0000e+00 - 6ms/epoch - 2ms/step
4/4 - 0s - loss: 4.6086 - accuracy: 0.0500 - 5ms/epoch - 1ms/step
4/4 - 0s - loss: 4.6027 - accuracy: 0.0000e+00 - 5ms/epoch - 1ms/step
4/4 - 0s - loss: 4.6092 - accuracy: 0.0500 - 6ms/epoch - 2ms/step
4/4 - 0s - loss: 4.6000 - accuracy: 

### PyTorch

In [9]:
lstm_net_second = CustomLSTM(num_classes=100, input_size=100, hidden_size=50, num_layers=1)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm_net_second.parameters(), lr = 1e-3)
n_epochs = 1100

In [10]:
for epoch in range(n_epochs):
    X, y = generate_data()
    X = torch.FloatTensor(X)
    y = torch.FloatTensor(y)
    
    optimizer.zero_grad() # Устанавливает градиенты всех оптимизированных равными нулю.
    output = lstm_net_second.forward(X)
            
    _, predicted = torch.max(output.detach(), 1)
    loss = criterion(output, y)
    
    loss.backward() # Вычисляет градиент текущего тензора
    optimizer.step() #Обновляем параметры оптимайзера
    
    if epoch % 20 == 0: # Каждую 20 эпоху вычиcляем accuracy
        accuracy_train = accuracy_score(predicted, one_hot_decode(y))
    
        print(f'epoch: {epoch}, accuracy: {accuracy_train}')

epoch: 0, accuracy: 0.0
epoch: 20, accuracy: 0.05
epoch: 40, accuracy: 0.0
epoch: 60, accuracy: 0.05
epoch: 80, accuracy: 0.05
epoch: 100, accuracy: 0.0
epoch: 120, accuracy: 0.0
epoch: 140, accuracy: 0.1
epoch: 160, accuracy: 0.05
epoch: 180, accuracy: 0.1
epoch: 200, accuracy: 0.25
epoch: 220, accuracy: 0.0
epoch: 240, accuracy: 0.0
epoch: 260, accuracy: 0.05
epoch: 280, accuracy: 0.1
epoch: 300, accuracy: 0.15
epoch: 320, accuracy: 0.3
epoch: 340, accuracy: 0.1
epoch: 360, accuracy: 0.25
epoch: 380, accuracy: 0.2
epoch: 400, accuracy: 0.15
epoch: 420, accuracy: 0.15
epoch: 440, accuracy: 0.4
epoch: 460, accuracy: 0.15
epoch: 480, accuracy: 0.2
epoch: 500, accuracy: 0.2
epoch: 520, accuracy: 0.25
epoch: 540, accuracy: 0.3
epoch: 560, accuracy: 0.35
epoch: 580, accuracy: 0.45
epoch: 600, accuracy: 0.4
epoch: 620, accuracy: 0.3
epoch: 640, accuracy: 0.55
epoch: 660, accuracy: 0.4
epoch: 680, accuracy: 0.55
epoch: 700, accuracy: 0.6
epoch: 720, accuracy: 0.6
epoch: 740, accuracy: 0.6
ep

In [11]:
with torch.no_grad(): #отключает градиенты
    X, y = generate_data()
    X = torch.FloatTensor(X)
    output = lstm_net_second.forward(X)
    _, yhat = torch.max(output.detach(), 1)
    print('Expected:  %s' % one_hot_decode(y))
    print('Predicted: %s' % yhat.tolist())

Expected:  [61, 17, 80, 68, 90, 15, 47, 96, 5, 61, 62, 68, 47, 16, 80, 12, 19, 24, 66, 20]
Predicted: [61, 17, 80, 68, 90, 15, 47, 96, 5, 61, 62, 68, 47, 16, 80, 12, 19, 24, 66, 20]
