In [48]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import os
from simple import *
import math
from collections import defaultdict
import random

In [2]:
path = '../data/NER-RNN/ner'
train_path = os.path.join(path, 'eng.train')
testa_path = os.path.join(path, 'eng.testa')
testb_path = os.path.join(path, 'eng.testb')


In [20]:
def read_corpus(path):
    groups = [[]]
    for line in open(path):
        parts = line.strip().split()
        if len(parts):
            groups[-1].append((parts[0], parts[-1]))
        else:
            groups.append([])
    return groups

all_groups = flatten([read_corpus(f) for f in [train_path, testa_path, testb_path]])[1:]
all_tokens = flatten(all_groups)

entities = list(set([tk[1] for tk in all_tokens]))
chars = list(set(flatten([tk[0] for tk in all_tokens]))) + [' ']


In [59]:
char_lookup = {c: i+1 for i, c in enumerate(chars)}
entity_lookup = {e: i for i, e in enumerate(entities)}

def vectorize_sentence(tokens, bucket_rounding):
    seq_len = sum([len(tk) for tk, ent in tokens]) + len(tokens)
    seq_len = int(math.ceil(seq_len * 1.0 / bucket_rounding) * bucket_rounding)
    x = torch.LongTensor(seq_len)
    x.fill_(0)
    y = torch.LongTensor(seq_len)
    y.fill_(0)
    i = 0
    for token, ent in tokens:
        ent_idx = entity_lookup[ent]
        for c in token + ' ':
            x[i] = char_lookup[c]
            y[i] = ent_idx
            i += 1
    return x, y

def epoch(batch_size=16):
    sentences = all_groups[:]
    random.shuffle(sentences)
    buckets = defaultdict(list)
    for sentence in sentences:
        x, y = vectorize_sentence(sentence, 10)
        bucket = x.size()[0]
        buckets[bucket].append((x,y))
        if len(buckets[bucket]) == batch_size:
            xs = torch.stack([x for x,y in buckets[bucket]])
            ys = torch.stack([y for x,y in buckets[bucket]])
            yield xs, ys

# print all_groups[0]
# print vectorize_sentence(all_groups[0], 10)
for x, y in epoch(12):
    print x
    break



Columns 0 to 12 
   42    34    43    33    44    50    49    45    34    86    16    24    24
   49    40    32    86    52    59    59    72    81    67    70    69    86
   30    59    71    67    78    86    48    73    68    59    67    72    86
   50    72    83    77    77    63    77    86    36    78    59    74    80
   49    73    77    63    74    62    73    78    65    86    22    86    43
   36    72    59    71    73    78    65    59    74    86    16    18    86
   47    59    78    80    67    61    69    86    16    23    86    23    86
   23    12    17    12    19    23    12    17    86    13    86    33    63
   36    67    72    72    67    74    65    68    59    71    86    19    19
   40    30    51    39    42    30    45    35    50    86    16    24    24
   35    34    51    49    44    38    51    86    21    20    86    25    19
   38    80    86    81    59    77    86    59    86    76    63    78    66

Columns 13 to 25 
   23    12    16    19   

In [94]:
# the model has a field-of-view of 2**n_layers + 1 chars

class NER(nn.Module):
    def __init__(self, n_chars=len(chars)+1, n_entities=len(entities), embedding_dim=64, conv_layers=5):
        super(NER, self).__init__()
        self.dropout = torch.nn.Dropout(0.9)
        self.convs = []
        self.embedding = torch.nn.Embedding(n_chars, embedding_dim)
        for i in xrange(conv_layers):
            ksize = 3
            dilation = 2 ** i
            padding = dilation * (ksize-1) / 2
            in_channels = embedding_dim if i == 0 else 256
            c = torch.nn.Conv1d(in_channels, 256, ksize, dilation=dilation, padding=padding)
            self.convs.append(c)
        self.final_conv = torch.nn.Conv1d(256, n_entities, 1)
        self.log_softmax = torch.nn.LogSoftmax()
        self.criterion = torch.nn.NLLLoss2d()
    
    def forward(self, x):
        x = self.embedding(x)
        x.data.transpose_(1, 2)
        for c in self.convs:
            x = self.dropout(F.relu(c(x)))
        x = self.final_conv(x)
        x.data.transpose_(1, 2) # [batch, seq_len, n_classes]
        x = self.log_softmax(x)
        
        x.data.transpose_(1, 2) # [batch, n_classes, seq_len]
        depth = x.size()[1]
        seq_len = x.size()[2]
        x = x.contiguous().view(-1, depth, 1, seq_len) # [batch, n_classes, 1, seq_len]
        return x
    
    def infer(self, x):
        x_batch = torch.stack([x])
        result = self.forward(x_batch).data.numpy()[0]
        ents = [entities[idx] for idx in np.argmax(result, axis=1)]
        return ents
    
    def compute_loss(self, xs, ys):
        x = self.forward(xs)
        seq_len = ys.size()[1]
        y = ys.contiguous().view(-1, 1, seq_len)
        return self.criterion(x, y)

# sent = all_groups[0]
# x, y = vectorize_sentence(sent, 10)
# print NER().infer(Variable(x))

net = NER()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.0001)

for i, (xs, ys) in enumerate(epoch()):
    xs = Variable(xs)
    ys = Variable(ys)
    optimizer.zero_grad()
    loss = net.compute_loss(xs, ys)
    loss.backward()
    optimizer.step()
    if i % 10 == 0:
        print 'Loss:', loss
    


RuntimeError: input and gradOutput shapes do not match: input [16 x 20 x 8], gradOutput [16 x 8 x 20] at /home/soumith/local/builder/wheel/pytorch-src/torch/lib/THNN/generic/LogSoftMax.c:78