In [1]:
%matplotlib inline
import argparse
import codecs
import collections
import random
import re
import time
import matplotlib.pyplot as plt
import numpy as np

import chainer
from chainer import cuda
import chainer.functions as F
import chainer.links as L
from chainer import optimizers, serializers

plt.style.use('ggplot')

In [2]:
#学習数
n_epoch = 400  
# 中間層の数
n_units   = 30
#バッチサイズ
batchsize = 25      # minibatch size
#ラベル(肯定、否定、、、)の数
n_label = 5 
# number of epochs per evaluation
epoch_per_eval = 5  

In [3]:
class SexpParser(object):

    def __init__(self, line):
        self.tokens = re.findall(r'\(|\)|[^\(\) ]+', line)
        self.pos = 0

    def parse(self):
        assert self.pos < len(self.tokens)
        token = self.tokens[self.pos]
        assert token != ')'
        self.pos += 1

        if token == '(':
            children = []
            while True:
                assert self.pos < len(self.tokens)
                if self.tokens[self.pos] == ')':
                    self.pos += 1
                    break
                else:
                    children.append(self.parse())
            return children
        else:
            return token

In [4]:
def convert_tree(vocab, exp):
    assert isinstance(exp, list) and (len(exp) == 2 or len(exp) == 3)

    if len(exp) == 2:
        label, leaf = exp
        if leaf not in vocab:
            vocab[leaf] = len(vocab)
        return {'label': int(label), 'node': vocab[leaf]}
    elif len(exp) == 3:
        label, left, right = exp
        node = (convert_tree(vocab, left), convert_tree(vocab, right))
        return {'label': int(label), 'node': node}

In [5]:
def read_corpus(path, vocab, max_size):
    with codecs.open(path, encoding='utf-8') as f:
        trees = []
        for line in f:
            line = line.strip()
            tree = SexpParser(line).parse()
            trees.append(convert_tree(vocab, tree))
            if max_size and len(trees) >= max_size:
                break

        return trees

In [6]:
class RecursiveNet(chainer.Chain):

    def __init__(self, n_vocab, n_units):
        super(RecursiveNet, self).__init__(
            embed=L.EmbedID(n_vocab, n_units),
            l=L.Linear(n_units * 2, n_units),
            w=L.Linear(n_units, n_label))

    def leaf(self, x):
        return self.embed(x)

    def node(self, left, right):
        return F.tanh(self.l(F.concat((left, right))))

    def label(self, v):
        return self.w(v)

In [7]:
def traverse(model, node, train=True, evaluate=None, root=True):
    if isinstance(node['node'], int):
        # leaf node
        word = np.array([node['node']], np.int32)
        loss = 0
        x = chainer.Variable(word, volatile=not train)
        v = model.leaf(x)
    else:
        # internal node
        left_node, right_node = node['node']
        left_loss, left = traverse(
            model, left_node, train=train, evaluate=evaluate, root=False)
        right_loss, right = traverse(
            model, right_node, train=train, evaluate=evaluate, root=False)
        v = model.node(left, right)
        loss = left_loss + right_loss

    y = model.label(v)

    if train:
        label = np.array([node['label']], np.int32)
        t = chainer.Variable(label, volatile=not train)
        loss += F.softmax_cross_entropy(y, t)

    if evaluate is not None:
        predict = cuda.to_cpu(y.data.argmax(1))
        if predict[0] == node['label']:
            evaluate['correct_node'] += 1
        evaluate['total_node'] += 1

        if root:
            if predict[0] == node['label']:
                evaluate['correct_root'] += 1
            evaluate['total_root'] += 1

    return loss, v

In [8]:
def evaluate(model, test_trees):
    m = model.copy()
    m.volatile = True
    result = collections.defaultdict(lambda: 0)
    for tree in test_trees:
        traverse(m, tree, train=False, evaluate=result)

    acc_node = 100.0 * result['correct_node'] / result['total_node']
    acc_root = 100.0 * result['correct_root'] / result['total_root']
    print(' Node accuracy: {0:.2f} %% ({1:,d}/{2:,d})'.format(
        acc_node, result['correct_node'], result['total_node']))
    print(' Root accuracy: {0:.2f} %% ({1:,d}/{2:,d})'.format(
        acc_root, result['correct_root'], result['total_root']))

In [9]:
vocab = {}
max_size=10
train_trees = read_corpus('trees/train.txt', vocab, max_size)
test_trees = read_corpus('trees/test.txt', vocab, max_size)
develop_trees = read_corpus('trees/dev.txt', vocab, max_size)

In [10]:
#Setup model
model = RecursiveNet(len(vocab), n_units)

In [11]:
# Setup optimizer
optimizer = optimizers.AdaGrad(lr=0.1)
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

In [12]:
accum_loss = 0
count = 0
start_at = time.time()
cur_at = start_at

In [13]:
for epoch in range(n_epoch):
    print('Epoch: {0:d}'.format(epoch))
    total_loss = 0
    cur_at = time.time()
    random.shuffle(train_trees)
    for tree in train_trees:
        loss, v = traverse(model, tree, train=True)
        accum_loss += loss
        count += 1

        if count >= batchsize:
            model.zerograds()
            accum_loss.backward()
            optimizer.update()
            total_loss += float(accum_loss.data)

            accum_loss = 0
            count = 0

    print('loss: {:.2f}'.format(total_loss))

    now = time.time()
    throuput = float(len(train_trees)) / (now - cur_at)
    print('{:.2f} iters/sec, {:.2f} sec'.format(throuput, now - cur_at))
    print()

    if (epoch + 1) % epoch_per_eval == 0:
        print('Train data evaluation:')
        evaluate(model, train_trees)
        print('Develop data evaluation:')
        evaluate(model, develop_trees)
        print('')

Epoch: 0
loss: 0.00
19.36 iters/sec, 0.52 sec

Epoch: 1
loss: 0.00
20.23 iters/sec, 0.49 sec

Epoch: 2
loss: 2054.80
12.83 iters/sec, 0.78 sec

Epoch: 3
loss: 0.00
20.59 iters/sec, 0.49 sec

Epoch: 4
loss: 1286.08
12.21 iters/sec, 0.82 sec

Train data evaluation:
 Node accuracy: 68.47 %% (304/444)
 Root accuracy: 20.00 %% (2/10)
Develop data evaluation:
 Node accuracy: 47.54 %% (174/366)
 Root accuracy: 20.00 %% (2/10)

Epoch: 5
loss: 0.00
20.98 iters/sec, 0.48 sec

Epoch: 6
loss: 0.00
21.37 iters/sec, 0.47 sec

Epoch: 7
loss: 1102.45
13.01 iters/sec, 0.77 sec

Epoch: 8
loss: 0.00
12.97 iters/sec, 0.77 sec

Epoch: 9
loss: 1286.13
8.29 iters/sec, 1.21 sec

Train data evaluation:
 Node accuracy: 80.18 %% (356/444)
 Root accuracy: 40.00 %% (4/10)
Develop data evaluation:
 Node accuracy: 47.81 %% (175/366)
 Root accuracy: 10.00 %% (1/10)

Epoch: 10
loss: 0.00
20.70 iters/sec, 0.48 sec

Epoch: 11
loss: 0.00
8.94 iters/sec, 1.12 sec

Epoch: 12
loss: 776.58
11.91 iters/sec, 0.84 sec

Epoch: 1

In [14]:
print('Test evaluateion')
evaluate(model, test_trees)

Test evaluateion
 Node accuracy: 61.86 %% (193/312)
 Root accuracy: 50.00 %% (5/10)


In [15]:
# Save the model and the optimizer
print('save the model')
serializers.save_npz('mnist.model', model)
print('save the optimizer')
serializers.save_npz('mnist.state', optimizer)

save the model
save the optimizer


In [16]:
# load the model and the optimizer
model = RecursiveNet(len(vocab), n_units)
optimizer = optimizers.AdaGrad(lr=0.1)
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))
print('load the model')
serializers.load_npz('mnist.model',  model)
print('load the optimizer')
serializers.load_npz('mnist.state', optimizer)

load the model
load the optimizer


In [32]:
train_trees [0]

{'label': 2,
 'node': ({'label': 2,
   'node': ({'label': 2, 'node': 80},
    {'label': 2,
     'node': ({'label': 2, 'node': 81},
      {'label': 2,
       'node': ({'label': 2, 'node': 82},
        {'label': 2,
         'node': ({'label': 2, 'node': 83}, {'label': 2, 'node': 84})})})})},
  {'label': 2,
   'node': ({'label': 2, 'node': 85},
    {'label': 2,
     'node': ({'label': 2,
       'node': ({'label': 2, 'node': 86},
        {'label': 1,
         'node': ({'label': 2, 'node': 87},
          {'label': 2,
           'node': ({'label': 2,
             'node': ({'label': 2, 'node': 88},
              {'label': 1,
               'node': ({'label': 2, 'node': 89},
                {'label': 2,
                 'node': ({'label': 2, 'node': 37},
                  {'label': 2,
                   'node': ({'label': 2, 'node': 90},
                    {'label': 2,
                     'node': ({'label': 2, 'node': 91},
                      {'label': 1, 'node': 92})})})})})},
           

In [44]:
train_trees[0]['node'].

({'label': 2,
  'node': ({'label': 2, 'node': 80},
   {'label': 2,
    'node': ({'label': 2, 'node': 81},
     {'label': 2,
      'node': ({'label': 2, 'node': 82},
       {'label': 2,
        'node': ({'label': 2, 'node': 83}, {'label': 2, 'node': 84})})})})},
 {'label': 2,
  'node': ({'label': 2, 'node': 85},
   {'label': 2,
    'node': ({'label': 2,
      'node': ({'label': 2, 'node': 86},
       {'label': 1,
        'node': ({'label': 2, 'node': 87},
         {'label': 2,
          'node': ({'label': 2,
            'node': ({'label': 2, 'node': 88},
             {'label': 1,
              'node': ({'label': 2, 'node': 89},
               {'label': 2,
                'node': ({'label': 2, 'node': 37},
                 {'label': 2,
                  'node': ({'label': 2, 'node': 90},
                   {'label': 2,
                    'node': ({'label': 2, 'node': 91},
                     {'label': 1, 'node': 92})})})})})},
           {'label': 4,
            'node': ({'label': 2, '