In [36]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np


In [37]:
input_str='python progra'
label_str='ython program'
char_vocab=sorted(list(set(input_str+label_str)))
vocab_size=len(char_vocab)
print(char_vocab, vocab_size)

[' ', 'a', 'g', 'h', 'm', 'n', 'o', 'p', 'r', 't', 'y'] 11


In [38]:
input_size=vocab_size
hidden_size=11
output_size=11
learning_rate=0.1


In [39]:
char_to_index=dict((c,i) for i, c in enumerate(char_vocab))
print(char_to_index)

{' ': 0, 'a': 1, 'g': 2, 'h': 3, 'm': 4, 'n': 5, 'o': 6, 'p': 7, 'r': 8, 't': 9, 'y': 10}


In [40]:
index_to_char={}
for key, value in char_to_index.items():
    index_to_char[value]=key
print(index_to_char)


{0: ' ', 1: 'a', 2: 'g', 3: 'h', 4: 'm', 5: 'n', 6: 'o', 7: 'p', 8: 'r', 9: 't', 10: 'y'}


In [41]:
x_data=[char_to_index[c] for c in input_str]
y_data=[char_to_index[c] for c in label_str]
print(x_data)
print(y_data)

[7, 10, 9, 3, 6, 5, 0, 7, 8, 6, 2, 8, 1]
[10, 9, 3, 6, 5, 0, 7, 8, 6, 2, 8, 1, 4]


In [42]:
x_data=[x_data]
y_data=[y_data]
print(x_data, y_data)

[[7, 10, 9, 3, 6, 5, 0, 7, 8, 6, 2, 8, 1]] [[10, 9, 3, 6, 5, 0, 7, 8, 6, 2, 8, 1, 4]]


In [43]:
x_one_hot=[np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

[array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])]


In [44]:
X=torch.FloatTensor(x_one_hot)
Y=torch.LongTensor(y_data)
print(X.shape, X)
print(Y.shape, Y)

torch.Size([1, 13, 11]) tensor([[[0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]])
torch.Size([1, 13]) tensor([[10,  9,  3,  6,  5,  0,  7,  8,  6,  2,  8,  1,  4]])


In [45]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.rnn=nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc=nn.Linear(hidden_size, output_size, bias=True)

    def forward(self, x):
        x, _status=self.rnn(x)
        x= self.fc(x)
        return x
    

In [46]:
net=Net(input_size, hidden_size, output_size)

outputs=net(X)

print(outputs.shape, outputs)

torch.Size([1, 13, 11]) tensor([[[-0.1234,  0.1175,  0.2140, -0.0013, -0.3184, -0.3095, -0.1935,
           0.3784,  0.2096, -0.1200,  0.2034],
         [ 0.0407, -0.0797,  0.0219,  0.1945, -0.2623, -0.2075, -0.4469,
           0.3390,  0.4528, -0.3562,  0.2720],
         [-0.0091, -0.1582,  0.3170,  0.2806, -0.0519, -0.2846, -0.3503,
           0.4468,  0.1999, -0.1510,  0.1624],
         [-0.2277, -0.1014,  0.2664,  0.1470, -0.1313, -0.2047, -0.3524,
           0.3566,  0.2298,  0.0172,  0.0128],
         [-0.1264, -0.1892,  0.1973,  0.0299, -0.2011, -0.4049, -0.4643,
           0.3137,  0.2142,  0.0770,  0.1279],
         [ 0.0430, -0.0180,  0.3261, -0.0247, -0.1806, -0.5101, -0.4899,
           0.4686,  0.2562, -0.2094,  0.1634],
         [-0.1807,  0.0489,  0.1675, -0.0640, -0.1776, -0.4308, -0.4334,
           0.3281,  0.3332, -0.1463,  0.0814],
         [-0.1293,  0.1058,  0.1737, -0.0904, -0.2792, -0.4211, -0.3813,
           0.3304,  0.2783, -0.1167,  0.1764],
         [-0.007

In [47]:
print(outputs.view(-1, input_size).shape)

torch.Size([13, 11])


In [48]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([1, 13])
torch.Size([13])


In [49]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(net.parameters(), lr=learning_rate)

In [50]:
for i in range(10):
    optimizer.zero_grad()
    outputs=net(X)
    loss=criterion(outputs.view(-1, input_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    result=outputs.data.numpy().argmax(axis=2)
    result_str=''.join([index_to_char[c] for c in np.squeeze(result)])
    print(i, "loss:", loss.item(), "prediction:", result, "true Y:", y_data, "prediction_str:", result_str)

0 loss: 2.419865608215332 prediction: [[7 8 7 7 7 7 8 7 8 7 7 8 7]] true Y: [[10, 9, 3, 6, 5, 0, 7, 8, 6, 2, 8, 1, 4]] prediction_str: prpppprprpprp
1 loss: 2.1031877994537354 prediction: [[ 8 10  3  6 10 10  6  8  6  6  8  6  6]] true Y: [[10, 9, 3, 6, 5, 0, 7, 8, 6, 2, 8, 1, 4]] prediction_str: ryhoyyorooroo
2 loss: 1.8838578462600708 prediction: [[8 6 4 6 6 0 4 8 6 4 8 6 4]] true Y: [[10, 9, 3, 6, 5, 0, 7, 8, 6, 2, 8, 1, 4]] prediction_str: romoo mromrom
3 loss: 1.4198862314224243 prediction: [[8 6 4 6 5 0 8 8 6 4 8 1 4]] true Y: [[10, 9, 3, 6, 5, 0, 7, 8, 6, 2, 8, 1, 4]] prediction_str: romon rromram
4 loss: 1.001065731048584 prediction: [[8 9 3 6 5 0 8 8 6 2 8 1 4]] true Y: [[10, 9, 3, 6, 5, 0, 7, 8, 6, 2, 8, 1, 4]] prediction_str: rthon rrogram
5 loss: 0.6831492781639099 prediction: [[8 9 3 6 5 0 7 8 6 2 8 1 4]] true Y: [[10, 9, 3, 6, 5, 0, 7, 8, 6, 2, 8, 1, 4]] prediction_str: rthon program
6 loss: 0.4379277229309082 prediction: [[8 9 3 6 5 0 7 8 6 2 8 1 4]] true Y: [[10, 9, 3, 