In [1]:
!pip install dynet
!git clone https://github.com/neubig/nn4nlp-code.git

Collecting dynet
[?25l  Downloading https://files.pythonhosted.org/packages/4f/de/181a8380e9fdb89d9aa5838059336bb535503d5f2053e621438e69081407/dyNET-2.0.3-cp27-cp27mu-manylinux1_x86_64.whl (27.6MB)
[K    100% |████████████████████████████████| 27.6MB 857kB/s 
Collecting cython (from dynet)
[?25l  Downloading https://files.pythonhosted.org/packages/fe/d6/a097bd9913cc0fc974b968f5586d3f0609f46ca58b2aae3b8dfd51c1fe18/Cython-0.28.2-cp27-cp27mu-manylinux1_x86_64.whl (3.3MB)
[K    100% |████████████████████████████████| 3.3MB 6.3MB/s 
[?25hInstalling collected packages: cython, dynet
Successfully installed cython-0.28.2 dynet-2.0.3
fatal: destination path 'nn4nlp-code' already exists and is not an empty directory.


In [0]:
from collections import defaultdict
import time
import random
import dynet as dy
import numpy as np

In [0]:
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

# Read in the data
train = list(read_dataset("nn4nlp-code/data/classes/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("nn4nlp-code/data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

In [0]:
# Start DyNet and define trainer
model = dy.Model()
trainer = dy.AdamTrainer(model)

# Define the model
EMB_SIZE = 64
W_emb = model.add_lookup_parameters((nwords, 1, 1, EMB_SIZE)) # Word embeddings
WIN_SIZE = 3
FILTER_SIZE = 64
W_cnn = model.add_parameters((1, WIN_SIZE, EMB_SIZE, FILTER_SIZE)) # cnn weights
b_cnn = model.add_parameters((FILTER_SIZE)) # cnn bias

W_sm = model.add_parameters((ntags, FILTER_SIZE))          # Softmax weights
b_sm = model.add_parameters((ntags))                      # Softmax bias

In [0]:
def calc_scores(words):
    dy.renew_cg()
    W_cnn_express = dy.parameter(W_cnn)
    b_cnn_express = dy.parameter(b_cnn)
    W_sm_express = dy.parameter(W_sm)
    b_sm_express = dy.parameter(b_sm)
    if len(words) < WIN_SIZE:
      words += [0] * (WIN_SIZE-len(words))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in words], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn_express, b_cnn_express, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)
    return W_sm_express * pool_out + b_sm_express

In [7]:
for ITER in range(100):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    train_correct = 0.0
    start = time.time()
    for words, tag in train:
        scores = calc_scores(words)
        predict = np.argmax(scores.npvalue())
        if predict == tag:
            train_correct += 1

        my_loss = dy.pickneglogsoftmax(scores, tag)
        train_loss += my_loss.value()
        my_loss.backward()
        trainer.update()
    print("iter %r: train loss/sent=%.4f, acc=%.4f, time=%.2fs" % (ITER, train_loss/len(train), train_correct/len(train), time.time()-start))
    # Perform testing
    test_correct = 0.0
    for words, tag in dev:
        scores = calc_scores(words).npvalue()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=1.6019, acc=0.2983, time=29.13s
iter 0: test acc=0.3439
iter 1: train loss/sent=1.2562, acc=0.4945, time=29.20s
iter 1: test acc=0.3593
iter 2: train loss/sent=0.8432, acc=0.6857, time=29.63s
iter 2: test acc=0.3643
iter 3: train loss/sent=0.4574, acc=0.8406, time=29.07s
iter 3: test acc=0.3697
iter 4: train loss/sent=0.2109, acc=0.9329, time=29.09s
iter 4: test acc=0.3448
iter 5: train loss/sent=0.0857, acc=0.9760, time=29.20s
iter 5: test acc=0.3457
iter 6: train loss/sent=0.0309, acc=0.9915, time=29.79s
iter 6: test acc=0.3570
iter 7: train loss/sent=0.0131, acc=0.9966, time=29.06s
iter 7: test acc=0.3611
iter 8: train loss/sent=0.0088, acc=0.9979, time=29.09s
iter 8: test acc=0.3710
iter 9: train loss/sent=0.0046, acc=0.9986, time=29.08s
iter 9: test acc=0.3615
iter 10: train loss/sent=0.0060, acc=0.9984, time=29.42s
iter 10: test acc=0.3552
iter 11: train loss/sent=0.0036, acc=0.9991, time=29.06s
iter 11: test acc=0.3484
iter 12: train loss/sent=0.0030, acc

iter 34: test acc=0.3462
iter 35: train loss/sent=0.0028, acc=0.9995, time=29.29s
iter 35: test acc=0.3566
iter 36: train loss/sent=0.0031, acc=0.9988, time=29.26s
iter 36: test acc=0.3511
iter 37: train loss/sent=0.0027, acc=0.9992, time=29.31s
iter 37: test acc=0.3688
iter 38: train loss/sent=0.0031, acc=0.9992, time=29.94s
iter 38: test acc=0.3584
iter 39: train loss/sent=0.0032, acc=0.9988, time=37.57s
iter 39: test acc=0.3457
iter 40: train loss/sent=0.0040, acc=0.9989, time=29.17s
iter 40: test acc=0.3584
iter 41: train loss/sent=0.0033, acc=0.9991, time=29.37s
iter 41: test acc=0.3593
iter 42: train loss/sent=0.0024, acc=0.9991, time=29.00s
iter 42: test acc=0.3471
iter 43: train loss/sent=0.0058, acc=0.9980, time=29.17s
iter 43: test acc=0.3480
iter 44: train loss/sent=0.0023, acc=0.9989, time=29.06s
iter 44: test acc=0.3502
iter 45: train loss/sent=0.0028, acc=0.9992, time=29.35s
iter 45: test acc=0.3724
iter 46: train loss/sent=0.0068, acc=0.9977, time=29.07s
iter 46: test ac

iter 69: train loss/sent=0.0046, acc=0.9987, time=29.22s
iter 69: test acc=0.3480
iter 70: train loss/sent=0.0060, acc=0.9982, time=29.20s
iter 70: test acc=0.3416
iter 71: train loss/sent=0.0087, acc=0.9973, time=29.34s
iter 71: test acc=0.3452
iter 72: train loss/sent=0.0084, acc=0.9984, time=29.15s
iter 72: test acc=0.3421
iter 73: train loss/sent=0.0056, acc=0.9987, time=29.00s
iter 73: test acc=0.3597
iter 74: train loss/sent=0.0049, acc=0.9985, time=29.10s
iter 74: test acc=0.3516
iter 75: train loss/sent=0.0064, acc=0.9980, time=29.01s
iter 75: test acc=0.3385
iter 76: train loss/sent=0.0067, acc=0.9984, time=29.27s
iter 76: test acc=0.3566
iter 77: train loss/sent=0.0067, acc=0.9984, time=28.95s
iter 77: test acc=0.3570
iter 78: train loss/sent=0.0078, acc=0.9985, time=29.18s
iter 78: test acc=0.3471
iter 79: train loss/sent=0.0047, acc=0.9985, time=29.01s
iter 79: test acc=0.3443
iter 80: train loss/sent=0.0059, acc=0.9979, time=29.69s
iter 80: test acc=0.3615
iter 81: train l