In [1]:
import tensorflow as tf
import numpy as np
import os
from simple import *
import math
from collections import defaultdict
import random

In [2]:
path = '../data/NER-RNN/ner'
train_path = os.path.join(path, 'eng.train')
testa_path = os.path.join(path, 'eng.testa')
testb_path = os.path.join(path, 'eng.testb')


In [3]:
def read_corpus(path):
    sents = [[]]
    for line in open(path):
        parts = line.strip().split()
        if len(parts):
            sents[-1].append((parts[0], parts[-1]))
        else:
            sents.append([])
    return sents

all_sents = flatten([read_corpus(f) for f in [train_path, testa_path, testb_path]])[1:]
all_tokens = flatten(all_sents)

entities = list(set([tk[1] for tk in all_tokens]))
chars = list(set(flatten([tk[0] for tk in all_tokens]))) + [' ']


In [11]:
char_lookup = {c: i+1 for i, c in enumerate(chars)}
entity_lookup = {e: i for i, e in enumerate(entities)}

PADDING = 32 # should correspond roughly to field of view of the model

total_len = PADDING
for sent in all_sents:
    total_len += sum([len(tk) for tk, ent in sent]) + max(len(sent) - 1, 0) + PADDING

x = np.zeros(total_len, dtype=int)
y = np.zeros(total_len, dtype=int)

i = PADDING
for sent in all_sents:
    for j, (token, entity) in enumerate(sent):
        x[i:i+len(token)] = [char_lookup[c] for c in token]
        y[i:i+len(token)] = [entity_lookup[entity]] * len(token)
        i += len(token)
        if j+1 < len(sent):
            x[i] = char_lookup[' ']
            y[i] += entity_lookup[entity]
            i += 1
    i += PADDING


In [12]:
def tensor_to_text(t):
    return u''.join([chars[i-1] for i in t if i > 0])

print tensor_to_text(x[50:100])

call to boycott British lamb .


In [10]:
class Net(object):
    def __init__(self):
        self.global_step = tf.contrib.framework.get_or_create_global_step()
        
    session = None
    
    def setup(self, path=None):
        if self.session: self.session.close()
        self.session = tf.InteractiveSession()
        
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        self.session.run(init_op)
        tf.train.start_queue_runners(sess=self.session)
        
        self.saver = None
        self.path = path
        if path:
            if not os.path.exists(path):
                os.mkdir(path)
            self.saver = tf.train.Saver()
            ckpt = tf.train.get_checkpoint_state(path)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(self.session, ckpt.model_checkpoint_path)
                print 'Restored from checkpoint', ckpt.model_checkpoint_path
            else:
                print 'Did not restore from checkpoint'
        else:
            print 'Will not save progress'
    
    def save(self):
        if self.saver:
            step_ = self.session.run(self.global_step)
            saver.save(session, self.path + '/model.ckpt', global_step=step_)
            print 'Saved'
    
    def train_step(self, verbose):
        # should return the current step
        assert False, 'train_step() not implemented'
    
    def train(self, print_every=2, save_every=100):
        while True:
            step_ = self.session.run(self.global_step)
            verbose = (step_ % print_every == 0)
            self.train_step(verbose)
            if step_ % save_every == 1:
                self.save()


In [31]:
def identity(x):
    return x

class NER(Net):
    def __init__(self, x, y):
        with tf.variable_scope('ner8'):
            super(NER, self).__init__()
            
            batch_size = 32
            length = 64
            
            indices = [tf.random_uniform([], minval=0, maxval=len(x) - length, dtype=tf.int64) for _ in xrange(batch_size)]
            x_batch = tf.stack([tf.slice(x, [idx], [length]) for idx in indices])
            y_batch = tf.stack([tf.slice(y, [idx], [length]) for idx in indices])
            
            inferred = self.fwd(x_batch)

            loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_batch, logits=inferred))

            self.train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=self.global_step)

            self.loss = loss
            self.x_batch = x_batch
            self.y_batch = y_batch
            self.inferred = inferred

            self.losses = []
    
    def fwd(self, batch):
        with tf.variable_scope('fwd'):
            depth = len(chars) + 1
            length = batch.get_shape()[-1]
            batch = tf.one_hot(batch, depth)

            n_layers = 4
            fov = 1 + 2 ** (n_layers)
            print 'field of view:', fov
            for i in xrange(n_layers):
                channels = 32 * 2**i
                convolved = tf.layers.conv1d(batch, channels, 3, dilation_rate=2**i, padding='same', activation=tf.nn.relu)
                batch = tf.concat([batch, convolved], 2) # skip connection
                batch = tf.layers.batch_normalization(batch)
            batch = tf.layers.conv1d(batch, len(entities), 1, activation=identity)
            return batch
    
    def train_step(self, verbose):
        loss_, step_, _ = self.session.run([self.loss, self.global_step, self.train_op])
        self.losses.append(loss_)
        if verbose:
            print "{}: {}".format(step_, sum(self.losses) / len(self.losses))
            self.losses = []

n = NER(x, y)
print 'ok'

field of view: 17
ok
