In [44]:
import torch
from torch import LongTensor
from torch.nn import Embedding, LSTM
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence, pad_sequence

In [45]:
seqs = ['long_str',  # len = 8
        'tiny',      # len = 4
        'medium']    # len = 6

In [46]:
vocab = ['<pad>'] + sorted(set([char for seq in seqs for char in seq]))

In [47]:
vectorized_seqs = [[vocab.index(tok) for tok in seq]for seq in seqs]
vectorized_seqs

[[6, 9, 8, 4, 1, 11, 12, 10], [12, 5, 8, 14], [7, 3, 2, 5, 13, 7]]

In [48]:
embed = Embedding(len(vocab), 4) # embedding_dim = 4
lstm = LSTM(input_size=4, hidden_size=5, batch_first=True) # input_dim = 4, hidden_dim = 5

In [59]:
vectorized_seqs_t = [torch.Tensor([vocab.index(tok) for tok in seq]) for seq in seqs]
#vectorized_seqs_t
seq_tensor_t=pad_sequence(vectorized_seqs_t,batch_first=True)
seq_tensor_t
#embedded_seq_tensor_t = embed(seq_tensor_t)
#packed_input_t = pack_padded_sequence(embedded_seq_tensor_t, np.array([8,4,6]), batch_first=True)


tensor([[ 6.,  9.,  8.,  4.,  1., 11., 12., 10.],
        [12.,  5.,  8., 14.,  0.,  0.,  0.,  0.],
        [ 7.,  3.,  2.,  5., 13.,  7.,  0.,  0.]])

In [50]:
seq_lengths = LongTensor(list(map(len, vectorized_seqs)))
seq_lengths

tensor([8, 4, 6])

In [51]:
seq_tensor = Variable(torch.zeros((len(vectorized_seqs), seq_lengths.max()))).long()
seq_tensor

tensor([[0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0]])

In [53]:
for idx, (seq, seqlen) in enumerate(zip(vectorized_seqs, seq_lengths)):
    seq_tensor[idx, :seqlen] = LongTensor(seq)
seq_tensor

tensor([[ 6,  9,  8,  4,  1, 11, 12, 10],
        [12,  5,  8, 14,  0,  0,  0,  0],
        [ 7,  3,  2,  5, 13,  7,  0,  0]])

In [54]:
seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
seq_tensor = seq_tensor[perm_idx]
seq_tensor

tensor([[ 6,  9,  8,  4,  1, 11, 12, 10],
        [ 7,  3,  2,  5, 13,  7,  0,  0],
        [12,  5,  8, 14,  0,  0,  0,  0]])

In [16]:
embedded_seq_tensor = embed(seq_tensor)
embedded_seq_tensor
#embedded_seq_tensor.shape

tensor([[[-0.0641,  0.9109, -0.9087,  0.3410],
         [ 0.4785,  0.5911,  0.1120,  0.0364],
         [ 0.5495, -0.0577, -0.5352, -1.0014],
         [ 1.0056, -1.4505, -0.7981, -0.3690],
         [-0.5623,  1.1768, -1.8777, -1.0699],
         [-1.2558, -2.5015, -0.0727,  0.5966],
         [-1.5572, -1.9498, -0.0357,  0.0061],
         [-0.9151,  0.8572, -0.0613,  1.1927]],

        [[-0.4312,  0.4526, -0.9976, -0.9833],
         [-1.7393,  0.4898,  0.1344, -0.2936],
         [-1.4355,  0.2238, -0.5792, -1.1298],
         [ 0.8304, -0.4705, -0.5836,  0.3855],
         [-0.7402,  1.5710, -0.8247, -0.6009],
         [-0.4312,  0.4526, -0.9976, -0.9833],
         [-0.2549, -1.2754, -0.3007,  1.1824],
         [-0.2549, -1.2754, -0.3007,  1.1824]],

        [[-1.5572, -1.9498, -0.0357,  0.0061],
         [ 0.8304, -0.4705, -0.5836,  0.3855],
         [ 0.5495, -0.0577, -0.5352, -1.0014],
         [-0.2883,  1.3329, -0.9172, -1.6868],
         [-0.2549, -1.2754, -0.3007,  1.1824],
         

In [18]:
packed_input = pack_padded_sequence(embedded_seq_tensor, seq_lengths.cpu().numpy(), batch_first=True)
packed_input

PackedSequence(data=tensor([[-0.0641,  0.9109, -0.9087,  0.3410],
        [-0.4312,  0.4526, -0.9976, -0.9833],
        [-1.5572, -1.9498, -0.0357,  0.0061],
        [ 0.4785,  0.5911,  0.1120,  0.0364],
        [-1.7393,  0.4898,  0.1344, -0.2936],
        [ 0.8304, -0.4705, -0.5836,  0.3855],
        [ 0.5495, -0.0577, -0.5352, -1.0014],
        [-1.4355,  0.2238, -0.5792, -1.1298],
        [ 0.5495, -0.0577, -0.5352, -1.0014],
        [ 1.0056, -1.4505, -0.7981, -0.3690],
        [ 0.8304, -0.4705, -0.5836,  0.3855],
        [-0.2883,  1.3329, -0.9172, -1.6868],
        [-0.5623,  1.1768, -1.8777, -1.0699],
        [-0.7402,  1.5710, -0.8247, -0.6009],
        [-1.2558, -2.5015, -0.0727,  0.5966],
        [-0.4312,  0.4526, -0.9976, -0.9833],
        [-1.5572, -1.9498, -0.0357,  0.0061],
        [-0.9151,  0.8572, -0.0613,  1.1927]],
       grad_fn=<PackPaddedSequenceBackward0>), batch_sizes=tensor([3, 3, 3, 3, 2, 2, 1, 1]), sorted_indices=None, unsorted_indices=None)

In [24]:
packed_input.batch_sizes

tensor([3, 3, 3, 3, 2, 2, 1, 1])

In [20]:
packed_output, (ht, ct) = lstm(packed_input)

In [21]:
output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)

In [22]:
output.shape

torch.Size([3, 8, 5])

In [23]:
input_sizes

tensor([8, 6, 4])

In [24]:
output

tensor([[[ 0.2006,  0.1382,  0.0585, -0.1302,  0.0500],
         [ 0.1740,  0.0941,  0.1232, -0.2105,  0.1127],
         [ 0.1037,  0.0493,  0.1047, -0.4676,  0.1194],
         [-0.0187,  0.0871, -0.0560, -0.5884,  0.1054],
         [ 0.3081,  0.1872,  0.0327, -0.6560,  0.1076],
         [ 0.0388,  0.1274,  0.1071, -0.3349,  0.3023],
         [ 0.0344,  0.1551,  0.1896, -0.1020,  0.4512],
         [ 0.3019,  0.2200,  0.1950,  0.0981,  0.3258]],

        [[ 0.1591,  0.0360,  0.0780, -0.3081,  0.0786],
         [ 0.3399, -0.0783,  0.1871,  0.0289,  0.2781],
         [ 0.3705, -0.0738,  0.1806,  0.0538,  0.3511],
         [ 0.0824,  0.0746,  0.1143, -0.0474,  0.1796],
         [ 0.4045,  0.1213,  0.1057, -0.1597,  0.2097],
         [ 0.2996,  0.0975,  0.1260, -0.3423,  0.2200],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

        [[-0.0175,  0.0406,  0.1812,  0.1457,  0.2247],
         [-0.0379,  0.1187,  0.0389, -0.0332

In [28]:
ht

tensor([[[ 0.3019,  0.2200,  0.1950,  0.0981,  0.3258],
         [ 0.2996,  0.0975,  0.1260, -0.3423,  0.2200],
         [ 0.2823, -0.0844,  0.0716, -0.5676,  0.1308]]],
       grad_fn=<StackBackward0>)

In [29]:
ct

tensor([[[ 0.5474,  0.4745,  0.4630,  0.2259,  1.1334],
         [ 0.5341,  0.2016,  0.2938, -0.5261,  0.4660],
         [ 0.4441, -0.1284,  0.1968, -0.9596,  0.2366]]],
       grad_fn=<StackBackward0>)

In [64]:
a=[10,100,1000,10000]
for i in range(1,len(a),1):
    print(a[i])


100
1000
10000
