In [1]:
import tensorflow as tf

# Load in original data

In [2]:
data_fname = '../wnut_ner_evaluation/data/train_notypes'
xs, ys = [], []
with open(data_fname, 'r') as f:
    x, y = [], []
    for i, line in enumerate(f):
        split = line.split()
        if split:
            x.append(split[0])
            y.append(split[1])
        else: 
            xs.append(x)
            ys.append(y)
            x, y = [], []

data_fname = '../wnut_ner_evaluation/data/dev_notypes'
dev_xs, dev_ys = [], []
with open(data_fname, 'r') as f:
    x, y = [], []
    for i, line in enumerate(f):
        split = line.split()
        if split:
            x.append(split[0])
            y.append(split[1])
        else: 
            dev_xs.append(x)
            dev_ys.append(y)
            x, y = [], []

# Word Level Model

In [3]:
from preprocess import *
from vocab import Vocab
from evaluation import macro_f1

In [4]:
all_words = [ word for x in xs for word in x ]
all_labels = [l for y in ys for l in y ]
xvocab = Vocab(all_words, min_count=0)
yvocab = Vocab(all_labels, min_count=0)
print xvocab.n

46469


In [5]:
all_xs = xs + dev_xs
all_ys = ys + dev_ys

all_xs = sequences_to_index(all_xs, xvocab)
all_xs, all_ws, max_time = pad_sequences(all_xs, xvocab.ipad)
all_ys = sequences_to_index(all_ys, yvocab)
all_ys, _, _ = pad_sequences(all_ys, yvocab.ipad)

train_xs, test_xs = all_xs[:len(xs)], all_xs[len(xs):]
train_ws, test_ws = all_ws[:len(xs)], all_ws[len(xs):]
train_ys, test_ys = all_ys[:len(xs)], all_ys[len(xs):]

print max_time
print len(train_xs), len(test_xs)

41
2394 1000


In [6]:
print train_xs[0]
print train_ws[0]
print train_ys[0]

[9115, 5972, 85, 50, 27, 47, 232, 96, 133, 1816, 30, 6666, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [7]:
from wordner import WordNER

In [9]:
tf.reset_default_graph()
session = tf.InteractiveSession()

params = {
    'learning_rate':.001,
    'xvocab':xvocab,
    'yvocab':yvocab,
    'max_time':max_time,
    'word_embed_size':50,
    'hidden_size':100,
    'dropout':.0
}
ner = WordNER(session, **params)

 Model Loading... Done


In [10]:
ner.fit(zip(train_xs, train_ws), train_ys, batch_size=64, n_epoch=5)


 Epoch 1/5 : Batch 38/38: Loss 9.9356
 Epoch 2/5 : Batch 38/38: Loss 4.1789
 Epoch 3/5 : Batch 38/38: Loss 1.9030
 Epoch 4/5 : Batch 38/38: Loss 0.8518
 Epoch 5/5 : Batch 38/38: Loss 0.4250


In [11]:
preds = ner.predict(zip(test_xs, test_ws))
preds = preds.tolist()

In [12]:
target_ys = depad_sequences(test_ys, yvocab.ipad)
predictions = depad_sequences(preds, test_ws)

In [13]:
flat_targets = [ yvocab.token(t) for y in target_ys for t in y ]
flat_predictions = [ yvocab.token(p) for y in predictions for p in y ]
stats = macro_f1(flat_targets, flat_predictions)

In [14]:
print stats

{'macro_recall': 0.33333333333333331, 'macro_f1': 0.32135652247775565, 'macro_precision': 0.31021052415800587, 'scores': {'I': {'recall': 0.0, 'support': 467, 'precision': 0.0, 'f1': 0.0}, 'B': {'recall': 0.0, 'support': 661, 'precision': 0.0, 'f1': 0.0}, 'O': {'recall': 1.0, 'support': 15133, 'precision': 0.9306315724740176, 'f1': 0.964069567433267}}}


# Character Level Model

In [15]:
all_chargrams = [ c for x in xs for g in chargrams(x) for c in g ]
xvocab = Vocab(all_chargrams, min_count=0)
yvocab = Vocab([l for y in ys for l in y ], min_count=0)
print xvocab.n

286687


In [16]:
all_xs = xs + dev_xs
all_ys = ys + dev_ys

all_xs, all_ws = sentences_to_chargrams(all_xs, xvocab)
all_xs, all_ws, max_time, max_chargrams = pad_chargrams(all_xs, all_ws, xvocab.ipad)
all_ys = sequences_to_index(all_ys, yvocab)
all_ys, _, _ = pad_sequences(all_ys, yvocab.ipad)

train_xs, test_xs = all_xs[:len(xs)], all_xs[len(xs):]
train_ws, test_ws = all_ws[:len(xs)], all_ws[len(xs):]
train_ys, test_ys = all_ys[:len(xs)], all_ys[len(xs):]

print max_time, max_chargrams
print len(train_xs), len(test_xs)

41 123
2394 1000


In [17]:
from charner import ChargramNER

In [18]:
tf.reset_default_graph()
session = tf.InteractiveSession()

params = {
    'learning_rate':.001,
    'xvocab':xvocab,
    'yvocab':yvocab,
    'max_time':max_time,
    'max_chargrams':max_chargrams,
    'char_embed_size':50,
    'hidden_size':100,
    'dropout':.0
}
ner = ChargramNER(session, **params)

Model Loading... Done


Exception AssertionError: AssertionError("Nesting violated for default stack of <type 'weakref'> objects",) in <bound method InteractiveSession.__del__ of <tensorflow.python.client.session.InteractiveSession object at 0x10fac8fd0>> ignored


In [19]:
ner.fit(zip(train_xs, train_ws), train_ys, batch_size=64, n_epoch=5)


 Epoch 1/5 : Batch 38/38: Loss 2.0062
 Epoch 2/5 : Batch 38/38: Loss 0.2833
 Epoch 3/5 : Batch 38/38: Loss 0.2075
 Epoch 4/5 : Batch 38/38: Loss 0.2057
 Epoch 5/5 : Batch 38/38: Loss 0.2117


In [20]:
preds = ner.predict(zip(test_xs, test_ws))
preds = preds.tolist()

In [21]:
target_ys = depad_sequences(test_ys, yvocab.ipad)
predictions = depad_sequences(preds, test_ws)

In [22]:
flat_targets = [ yvocab.token(t) for y in target_ys for t in y ]
flat_predictions = [ yvocab.token(p) for y in predictions for p in y ]
stats = macro_f1(flat_targets, flat_predictions)

In [23]:
print stats

{'macro_recall': 0.33333333333333331, 'macro_f1': 0.32135652247775565, 'macro_precision': 0.31021052415800587, 'scores': {'I': {'recall': 0.0, 'support': 467, 'precision': 0.0, 'f1': 0.0}, 'B': {'recall': 0.0, 'support': 661, 'precision': 0.0, 'f1': 0.0}, 'O': {'recall': 1.0, 'support': 15133, 'precision': 0.9306315724740176, 'f1': 0.964069567433267}}}
