In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0xe03f046cd0>

In [22]:
lstm = nn.LSTM(3, 3)
inputs = [torch.randn(1, 3) for _ in range(5)]

hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))

for i in inputs:
    out, hidden = lstm(i.view(1, 1, -1), hidden)

inputs = torch.cat(inputs).view(len(inputs), 1, -1)

hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[-0.2932, -0.0950, -0.0214]],

        [[-0.1582, -0.3642, -0.0805]],

        [[-0.0498, -0.2909, -0.0352]],

        [[ 0.0043, -0.6981, -0.0893]],

        [[-0.0660, -0.4401, -0.1084]]])
(tensor([[[-0.0660, -0.4401, -0.1084]]]), tensor([[[-0.2149, -1.2208, -0.3779]]]))


In [85]:
class LSTMNumSeq(nn.Module):
    
    def __init__(self,embedding_dim, hidden_dim, num_size):
        super(LSTMNumSeq, self).__init__()
        self.hidden_dim = hidden_dim
        
        self.word_embeddings = nn.Embedding(num_size, embedding_dim)
        
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, num_size)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        return (torch.zeros(1, 1, self.hidden_dim),
                torch.zeros(1, 1, self.hidden_dim))
    
    def forward(self, seq):
        embeds = self.word_embeddings(seq)
        lstm_out, self.hidden = self.lstm(embeds.view(len(seq), 1, -1), self.hidden)
        tag_space = self.hidden2tag(lstm_out.view(len(seq), -1))
        out_seq = F.log_softmax(tag_space, dim=1)
        return out_seq
        

In [86]:
input_seq = [("012345678","123456789"),("123456789","1234567890"),("234567890","345678901"),("345678901","456789012"),("456789012","567890123")]
num_to_ix = {"0": 0, "1":1,"2":2,"3":3,"4":4,"5":5,"6":6,"7":7,"8":8,"9":9}

def prepare_sequence(seq, to_ix):
    idxs = [to_ix[seq[w]] for w in range(len(seq))]
    return torch.tensor(idxs, dtype = torch.long)


In [88]:
model = LSTMNumSeq(6, 6, 10)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

with torch.no_grad():
    inputs = prepare_sequence(input_seq[0][0], num_to_ix)
    output_seq = model(inputs)
    #print(output_seq)
    
for epoch in range(300):
    for sequence, ty in input_seq:
        model.zero_grad()
        
        model.hidden = model.init_hidden()
        
        seq_in = prepare_sequence(sequence, num_to_ix)
        target = prepare_sequence(ty, num_to_ix)
        
        out = model(seq_in)
        
        loss = loss_function(out, target)
        loss.backward()
        optimizer.step()
        
with torch.no_grad():
    inputs = prepare_sequence(input_seq[0][0], num_to_ix)
    output_seq = model(inputs)
    print(output_seq)

tensor([[-2.4385, -2.6567, -2.0355, -2.3784, -2.6826, -2.5152, -2.1656,
         -2.1878, -2.2799, -1.9600],
        [-2.4365, -2.5437, -1.8834, -2.4181, -2.7020, -2.4230, -2.1658,
         -2.2280, -2.3139, -2.1560],
        [-2.4477, -2.5282, -2.0162, -2.3762, -2.5720, -2.4863, -2.1858,
         -2.2142, -2.3490, -2.0328],
        [-2.5365, -2.4025, -1.9334, -2.3550, -2.5532, -2.5245, -2.1194,
         -2.2775, -2.3665, -2.1505],
        [-2.4448, -2.5392, -1.9304, -2.4246, -2.6111, -2.4764, -2.1588,
         -2.1750, -2.3415, -2.1373],
        [-2.5267, -2.4027, -1.8951, -2.2971, -2.5637, -2.4664, -2.1975,
         -2.3182, -2.4391, -2.1197],
        [-2.4105, -2.4562, -1.9312, -2.4383, -2.5246, -2.4104, -2.2275,
         -2.2066, -2.4103, -2.1658],
        [-2.4357, -2.4900, -2.0147, -2.3343, -2.5145, -2.4335, -2.2581,
         -2.2229, -2.4379, -2.0363],
        [-2.4694, -2.5328, -1.9690, -2.3487, -2.5996, -2.4840, -2.1819,
         -2.2120, -2.3689, -2.0618]])


ValueError: Expected input batch_size (9) to match target batch_size (10).