In [1]:
!pip install torch torchtext
!git clone https://github.com/neubig/nn4nlp-code.git

Collecting torch
[?25l  Downloading https://files.pythonhosted.org/packages/69/43/380514bd9663f1bf708abeb359b8b48d3fabb1c8e95bb3427a980a064c57/torch-0.4.0-cp36-cp36m-manylinux1_x86_64.whl (484.0MB)
[K    100% |████████████████████████████████| 484.0MB 24kB/s 
tcmalloc: large alloc 1073750016 bytes == 0x5b048000 @  0x7f4c4fd9d1c4 0x46d6a4 0x5fcbcc 0x4c494d 0x54f3c4 0x553aaf 0x54e4c8 0x54f4f6 0x553aaf 0x54efc1 0x54f24d 0x553aaf 0x54efc1 0x54f24d 0x553aaf 0x54efc1 0x54f24d 0x551ee0 0x54e4c8 0x54f4f6 0x553aaf 0x54efc1 0x54f24d 0x551ee0 0x54efc1 0x54f24d 0x551ee0 0x54e4c8 0x54f4f6 0x553aaf 0x54e4c8
[?25hCollecting torchtext
[?25l  Downloading https://files.pythonhosted.org/packages/78/90/474d5944d43001a6e72b9aaed5c3e4f77516fbef2317002da2096fd8b5ea/torchtext-0.2.3.tar.gz (42kB)
[K    100% |████████████████████████████████| 51kB 16.0MB/s 
[?25hCollecting tqdm (from torchtext)
[?25l  Downloading https://files.pythonhosted.org/packages/93/24/6ab1df969db228aed36a648a8959d1027099ce45fad675

remote: Total 372 (delta 0), reused 0 (delta 0), pack-reused 372[K
Receiving objects: 100% (372/372), 6.33 MiB | 23.05 MiB/s, done.
Resolving deltas: 100% (131/131), done.


In [0]:
from collections import defaultdict
import time
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [0]:
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

# Read in the data
train = list(read_dataset("nn4nlp-code/data/classes/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("nn4nlp-code/data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

In [0]:
class CNNModel(nn.Module):
  def __init__(self, vocab_size, emb_size, win_size, filter_size):
    super(CNNModel, self).__init__()
    self.filter_size = filter_size
    self.embeddings = nn.Embedding(vocab_size, emb_size)
    self.conv1 = nn.Conv1d(emb_size, filter_size, win_size)
    self.maxpool1 = nn.AdaptiveAvgPool1d(1)
    self.fcl = nn.Linear(filter_size, ntags)
  
  def forward(self, x):
    x = self.embeddings(x)
    x = self.conv1(x.transpose(1,2))
    x = self.maxpool1(x).view(-1, self.filter_size)
    x = self.fcl(x)
    return x

In [0]:
# Define the model
EMB_SIZE = 64
# W_emb = model.add_lookup_parameters((nwords, 1, 1, EMB_SIZE)) # Word embeddings
WIN_SIZE = 3
FILTER_SIZE = 64
# W_cnn = model.add_parameters((1, WIN_SIZE, EMB_SIZE, FILTER_SIZE)) # cnn weights
# b_cnn = model.add_parameters((FILTER_SIZE)) # cnn bias

# W_sm = model.add_parameters((ntags, FILTER_SIZE))          # Softmax weights
# b_sm = model.add_parameters((ntags))                      # Softmax bias

# Start DyNet and define trainer
model = CNNModel(nwords, EMB_SIZE, WIN_SIZE, FILTER_SIZE)
trainer = torch.optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()

In [0]:
def calc_scores(words):
    if len(words) < WIN_SIZE:
      words += [0] * (WIN_SIZE-len(words))

    logit = model(torch.tensor(words).view(1, -1))
    return logit
#     cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in words], d=1)
#     cnn_out = dy.conv2d_bias(cnn_in, W_cnn_express, b_cnn_express, stride=(1, 1), is_valid=False)
#     pool_out = dy.max_dim(cnn_out, d=1)
#     pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
#     pool_out = dy.rectify(pool_out)
#     return W_sm_express * pool_out + b_sm_express

In [0]:
for ITER in range(100):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    train_correct = 0.0
    start = time.time()
    for words, tag in train:
        scores = calc_scores(words)
        predict = np.argmax(scores.detach().numpy())
        if predict == tag:
            train_correct += 1
        
#         print(scores, tag)
        my_loss = F.cross_entropy(scores, torch.LongTensor([tag]))
#         my_loss = dy.pickneglogsoftmax(scores, tag)
        train_loss += my_loss.detach().numpy()
        my_loss.backward()
        trainer.step()
    print("iter %r: train loss/sent=%.4f, acc=%.4f, time=%.2fs" % (ITER, train_loss/len(train), train_correct/len(train), time.time()-start))
    # Perform testing
    test_correct = 0.0
    for words, tag in dev:
        scores = calc_scores(words)
        predict = np.argmax(scores.detach().numpy())
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=1301.1692, acc=0.3641, time=64.08s
iter 0: test acc=0.3050
iter 1: train loss/sent=2634.9131, acc=0.5372, time=58.52s
iter 1: test acc=0.3367
iter 2: train loss/sent=4796.3728, acc=0.6945, time=57.26s
iter 2: test acc=0.3344
