In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

device = torch.device('cpu')

torch.manual_seed(1)

<torch._C.Generator at 0x1121c4450>

In [7]:
from glob import glob
import pickle

def loadall(filenames):
    out = []
    for filename in filenames:
        print(filename)
        with open(filename, "rb") as f:
            out.extend(pickle.load(f))
    return out

ndata = loadall(glob('n/*'))
cdata = loadall(glob('c/*'))
icdata = loadall(glob('ic/*'))

x = ndata + cdata + icdata
# n 0 c 1 ic 2
y = [0] * len(ndata) + [1] * len(cdata) + [2] * len(icdata)



n/dennis_jade.pickle
n/sunho_early1.pickle
c/dennis_jade.pickle
c/sunho_early1.pickle
c/sunho_early2.pickle
ic/dennis_jade.pickle
ic/sunho_early1.pickle
ic/sunho_early2.pickle


In [8]:
class Model(nn.Module):
    def __init__(self, hidden_dim, output_size, n_layers,\
                 drop_lstm=0.1, drop_out = 0.1):

        super().__init__()

        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        # LSTM layers
        self.lstm = nn.LSTM(5, hidden_dim, n_layers, 
                            dropout=drop_lstm, batch_first=True)
        
        # dropout layer
        self.dropout = nn.Dropout(drop_out)
        
        # linear and sigmoid layers
        self.fc = nn.Linear(hidden_dim, output_size)
        self.sig = nn.Softmax()
        

    def forward(self, x, seq_lengths):
                
        # pack, remove pads
        packed_input = nn.utils.rnn.pack_padded_sequence(x, seq_lengths.cpu().numpy(), batch_first=True, enforce_sorted=False)
        packed_output, (ht, ct) = self.lstm(packed_input, None)
          # https://pytorch.org/docs/stable/_modules/torch/nn/modules/rnn.html
          # If `(h_0, c_0)` is not provided, both **h_0** and **c_0** default to zero

        # unpack, recover padded sequence
        output, input_sizes = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
       
        # collect the last output in each batch
        last_idxs = (input_sizes - 1).to(device) # last_idxs = input_sizes - torch.ones_like(input_sizes)
        output = torch.gather(output, 1, last_idxs.view(-1, 1).unsqueeze(2).repeat(1, 1, self.hidden_dim)).squeeze() # [batch_size, hidden_dim]
        
        # dropout and fully-connected layer
        output = self.dropout(output)
        output = self.fc(output).squeeze()
               
        # sigmoid function
        output = self.sig(output)
        
        return output

In [14]:
seq_lengths = torch.LongTensor(list(map(len, x)))
# Add padding(0)
seq_tensor = Variable(torch.zeros((len(x), seq_lengths.max(), 5))).float()
for idx, (seq, seqlen) in enumerate(zip(x, seq_lengths)):
  seq_tensor[idx, :seqlen] = torch.FloatTensor(seq)

# targets = torch.nn.functional.one_hot(torch.LongTensor(y))


# print(targets)


model = Model(10,3,10)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.2)

with torch.no_grad():
    tag_scores = model(seq_tensor, seq_lengths)
    print(tag_scores)

for epoch in range(20):  # again, normally you would NOT do 300 epochs, it is toy data:
    # Step 1. Remember that Pytorch accumulates gradients.
    # We need to clear them out before each instance
    model.zero_grad()
    
    tag_scores = model(seq_tensor, seq_lengths)

    # Step 4. Compute the loss, gradients, and update the parameters by
    #  calling optimizer.step()
    loss = loss_function(tag_scores, torch.LongTensor(y))
    print('loss:', loss, 'epoch:', epoch)
    loss.backward()
    optimizer.step()

# See what the scores are after training
with torch.no_grad():
    tag_scores = model(seq_tensor, seq_lengths)

    # The sentence is "the dog ate the apple".  i,j corresponds to score for tag j
    # for word i. The predicted tag is the maximum scoring tag.
    # Here, we can see the predicted sequence below is 0 1 2 0 1
    # since 0 is index of the maximum value of row 1,
    # 1 is the index of maximum value of row 2, etc.
    # Which is DET NOUN VERB DET NOUN, the correct sequence!
    print(tag_scores)




tensor([[0.3367, 0.3789, 0.2844],
        [0.3512, 0.3652, 0.2835],
        [0.3588, 0.3637, 0.2776],
        [0.3442, 0.3765, 0.2793],
        [0.3749, 0.3596, 0.2655],
        [0.3470, 0.3716, 0.2813],
        [0.3466, 0.3735, 0.2799],
        [0.3597, 0.3669, 0.2734],
        [0.3435, 0.3783, 0.2782],
        [0.3480, 0.3709, 0.2811],
        [0.3478, 0.3725, 0.2796],
        [0.3456, 0.3754, 0.2789],
        [0.3433, 0.3781, 0.2785],
        [0.3510, 0.3609, 0.2881],
        [0.3596, 0.3665, 0.2739],
        [0.3574, 0.3711, 0.2715],
        [0.3598, 0.3666, 0.2736],
        [0.3544, 0.3762, 0.2693],
        [0.3410, 0.3835, 0.2755],
        [0.3452, 0.3755, 0.2793],
        [0.3509, 0.3661, 0.2830],
        [0.3497, 0.3671, 0.2832],
        [0.3575, 0.3688, 0.2737],
        [0.3608, 0.3685, 0.2707],
        [0.3486, 0.3716, 0.2797],
        [0.3597, 0.3661, 0.2742],
        [0.3630, 0.3556, 0.2813],
        [0.3509, 0.3616, 0.2875],
        [0.3514, 0.3612, 0.2874],
        [0.343