In [1]:
!pip install dynet
!git clone https://github.com/neubig/nn4nlp-code.git

Looking in indexes: https://pypi.org/simple, https://legacy.pypi.org/simple
Collecting dynet
[?25l  Downloading https://files.pythonhosted.org/packages/4f/de/181a8380e9fdb89d9aa5838059336bb535503d5f2053e621438e69081407/dyNET-2.0.3-cp27-cp27mu-manylinux1_x86_64.whl (27.6MB)
[K    100% |████████████████████████████████| 27.6MB 834kB/s 
Collecting cython (from dynet)
[?25l  Downloading https://files.pythonhosted.org/packages/fe/d6/a097bd9913cc0fc974b968f5586d3f0609f46ca58b2aae3b8dfd51c1fe18/Cython-0.28.2-cp27-cp27mu-manylinux1_x86_64.whl (3.3MB)
[K    100% |████████████████████████████████| 3.3MB 12.6MB/s 
[?25hInstalling collected packages: cython, dynet
Successfully installed cython-0.28.2 dynet-2.0.3
fatal: destination path 'nn4nlp-code' already exists and is not an empty directory.


In [0]:
from collections import defaultdict
import time
import random
import dynet as dy
import numpy as np

In [0]:
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
  with open(filename, "r") as f:
    for line in f:
      tag, words = line.lower().strip().split(" ||| ")
      yield ([w2i[x] for x in words.split(" ")], t2i[tag])

# Read in the data
train = list(read_dataset("nn4nlp-code/data/classes/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("nn4nlp-code/data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

In [0]:
# Start DyNet and define trainer
model = dy.Model()
trainer = dy.AdamTrainer(model)

In [0]:
# Define the model
EMB_SIZE = 64
HID_SIZE = 64
HID_LAY = 2
W_emb = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word embeddings
W_h = [model.add_parameters((HID_SIZE, EMB_SIZE if lay == 0 else HID_SIZE)) for lay in range(HID_LAY)]
b_h = [model.add_parameters((HID_SIZE)) for lay in range(HID_LAY)]
W_sm = model.add_parameters((ntags, HID_SIZE))          # Softmax weights
b_sm = model.add_parameters((ntags))                      # Softmax bias

In [0]:
# A function to calculate scores for one value
def calc_scores(words):
  dy.renew_cg()
  h = dy.esum([dy.lookup(W_emb, x) for x in words])
  for W_h_i, b_h_i in zip(W_h, b_h):
    h = dy.tanh( dy.parameter(W_h_i) * h + dy.parameter(b_h_i) )
  return dy.parameter(W_sm) * h + dy.parameter(b_sm)

In [9]:
for ITER in range(100):
  # Perform training
  random.shuffle(train)
  train_loss = 0.0
  start = time.time()
  for words, tag in train:
    my_loss = dy.pickneglogsoftmax(calc_scores(words), tag)
    train_loss += my_loss.value()
    my_loss.backward()
    trainer.update()
  print("iter %r: train loss/sent=%.4f, time=%.2fs" % (ITER, train_loss/len(train), time.time()-start))
  # Perform training
  test_correct = 0.0
  for words, tag in dev:
    scores = calc_scores(words).npvalue()
    predict = np.argmax(scores)
    if predict == tag:
      test_correct += 1
  print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=1.5440, time=0.74s
iter 0: test acc=0.3520
iter 1: train loss/sent=1.2655, time=0.77s
iter 1: test acc=0.3738
iter 2: train loss/sent=1.0374, time=0.76s
iter 2: test acc=0.3887
iter 3: train loss/sent=0.8177, time=0.78s
iter 3: test acc=0.3932
iter 4: train loss/sent=0.6163, time=0.77s
iter 4: test acc=0.3787
iter 5: train loss/sent=0.4603, time=0.76s
iter 5: test acc=0.3801
iter 6: train loss/sent=0.3376, time=0.78s
iter 6: test acc=0.3674
iter 7: train loss/sent=0.2424, time=0.78s
iter 7: test acc=0.3765
iter 8: train loss/sent=0.1654, time=0.75s
iter 8: test acc=0.3674
iter 9: train loss/sent=0.1260, time=0.76s
iter 9: test acc=0.3796
iter 10: train loss/sent=0.0820, time=0.75s
iter 10: test acc=0.3557
iter 11: train loss/sent=0.0621, time=0.77s
iter 11: test acc=0.3529
iter 12: train loss/sent=0.0502, time=0.75s
iter 12: test acc=0.3561
iter 13: train loss/sent=0.0355, time=0.75s
iter 13: test acc=0.3674
iter 14: train loss/sent=0.0317, time=0.77s
iter 14: t

iter 53: train loss/sent=0.0281, time=0.76s
iter 53: test acc=0.3534
iter 54: train loss/sent=0.0312, time=0.77s
iter 54: test acc=0.3792
iter 55: train loss/sent=0.0251, time=0.77s
iter 55: test acc=0.3674
iter 56: train loss/sent=0.0380, time=0.77s
iter 56: test acc=0.3520
iter 57: train loss/sent=0.0254, time=0.75s
iter 57: test acc=0.3701
iter 58: train loss/sent=0.0355, time=0.75s
iter 58: test acc=0.3647
iter 59: train loss/sent=0.0286, time=0.77s
iter 59: test acc=0.3674
iter 60: train loss/sent=0.0246, time=0.75s
iter 60: test acc=0.3629
iter 61: train loss/sent=0.0209, time=0.76s
iter 61: test acc=0.3633
iter 62: train loss/sent=0.0189, time=0.76s
iter 62: test acc=0.3756
iter 63: train loss/sent=0.0275, time=0.76s
iter 63: test acc=0.3624
iter 64: train loss/sent=0.0204, time=0.76s
iter 64: test acc=0.3588
iter 65: train loss/sent=0.0217, time=0.74s
iter 65: test acc=0.3606
iter 66: train loss/sent=0.0220, time=0.75s
iter 66: test acc=0.3652
iter 67: train loss/sent=0.0279, t