In [1]:
# -*- coding: utf8 -*-
from __future__ import print_function
import os
# import sys
import time
import json
import argparse
import random
random.seed(49999)
from collections import OrderedDict

import numpy
numpy.random.seed(49999)
import tensorflow as tf
tf.set_random_seed(49999)
import keras
import keras.backend as K
from keras.models import Sequential, Model
from keras.layers import *
from keras.optimizers import Adadelta
from keras.initializers import VarianceScaling
from keras.regularizers import *

Using TensorFlow backend.


In [2]:
from utils import *
import inputs
import metrics
from losses import *
from optimizers import *

In [3]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config = config)

In [4]:
# Model specific imports
from models.model import BasicModel
from layers.DynamicMaxPooling import *
from utils.utility import *

In [5]:
class CrossATT(Layer):

    def __init__(self, output_dim, c_maxlen, q_maxlen, dropout, **kwargs):
        self.output_dim=output_dim
        self.c_maxlen = c_maxlen
        self.q_maxlen = q_maxlen
        self.dropout = dropout
        super(CrossATT, self).__init__(**kwargs)

    def build(self, input_shape):
        # input_shape: [(None, ?, 128), (None, ?, 128)]
        init = VarianceScaling(scale=1.0, mode='fan_in', distribution='normal')
        self.W0 = self.add_weight(name='W0',
                                  shape=(input_shape[0][-1], self.output_dim),
                                  initializer=init,
                                  regularizer=l2(3e-7),
                                  trainable=True)
        self.W1 = self.add_weight(name='W1',
                                  shape=(input_shape[1][-1], self.output_dim),
                                  initializer=init,
                                  regularizer=l2(3e-7),
                                  trainable=True)

        super(CrossATT, self).build(input_shape)

    def mask_logits(self, inputs, mask, mask_value = -1e30):
        mask = tf.cast(mask, tf.float32)
        return inputs + mask_value * (1 - mask)

    def call(self, x, mask=None):
        x_cont, x_ques, c_mask, q_mask = x
        S = K.batch_dot(x_cont, K.permute_dimensions(x_ques, pattern=(0, 2, 1)))
        S_ = tf.nn.softmax(S)
        S_n = tf.expand_dims(S_, 3)
        vs = K.tile(S_n, [1, 1, 1, self.output_dim])
        v0 = tf.expand_dims(x_ques, 1)
        v1 = K.tile(v0, [1, self.c_maxlen, 1, 1])
        c2q = tf.multiply(vs, v1)
        v11 = K.sum(c2q, axis=2)
        v2 = K.dot(v11, self.W1)
        v3 = K.dot(x_cont, self.W0)
        result = v2 + v3

        return result

    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], input_shape[0][1], self.output_dim)

class context2query_attention(Layer):

    def __init__(self, output_dim, c_maxlen, q_maxlen, dropout, **kwargs):
        self.output_dim=output_dim
        self.c_maxlen = c_maxlen
        self.q_maxlen = q_maxlen
        self.dropout = dropout
        super(context2query_attention, self).__init__(**kwargs)

    def build(self, input_shape):
        # input_shape: [(None, ?, 128), (None, ?, 128)]
        init = VarianceScaling(scale=1.0, mode='fan_in', distribution='normal')
        self.W0 = self.add_weight(name='W0',
                                  shape=(input_shape[0][-1], 1),
                                  initializer=init,
                                  regularizer=l2(3e-7),
                                  trainable=True)
        self.W1 = self.add_weight(name='W1',
                                  shape=(input_shape[1][-1], 1),
                                  initializer=init,
                                  regularizer=l2(3e-7),
                                  trainable=True)
        self.W2 = self.add_weight(name='W2',
                                  shape=(1, 1, input_shape[0][-1]),
                                  initializer=init,
                                  regularizer=l2(3e-7),
                                  trainable=True)
        self.bias = self.add_weight(name='linear_bias',
                                    shape=([1]),
                                    initializer='zero',
                                    regularizer=l2(3e-7),
                                    trainable=True)
        super(context2query_attention, self).build(input_shape)

    def mask_logits(self, inputs, mask, mask_value = -1e30):
        mask = tf.cast(mask, tf.float32)
        return inputs + mask_value * (1 - mask)

    def call(self, x, mask=None):
        x_cont, x_ques, c_mask, q_mask = x

        # get similarity matrix S
        subres0 = K.tile(K.dot(x_cont, self.W0), [1, 1, self.q_maxlen])
        subres1 = K.tile(K.permute_dimensions(K.dot(x_ques, self.W1), pattern=(0, 2, 1)), [1, self.c_maxlen, 1])
        subres2 = K.batch_dot(x_cont * self.W2, K.permute_dimensions(x_ques, pattern=(0, 2, 1)))
        S = subres0 + subres1 + subres2
        S += self.bias
        q_mask = tf.expand_dims(q_mask, 1)
        S_ = tf.nn.softmax(self.mask_logits(S, q_mask))
        c_mask = tf.expand_dims(c_mask, 2)
        S_T = K.permute_dimensions(tf.nn.softmax(self.mask_logits(S, c_mask), axis=1), (0, 2, 1))
        c2q = tf.matmul(S_, x_ques)
        q2c = tf.matmul(tf.matmul(S_, S_T), x_cont)
        result = K.concatenate([x_cont, c2q, x_cont * c2q, x_cont * q2c], axis=-1)

        return result

    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], input_shape[0][1], self.output_dim)

class AttLayer(Layer):
    def __init__(self, attention_dim):
        self.init = initializers.get('normal')
        self.supports_masking = True
        self.attention_dim = attention_dim
        super(AttLayer, self).__init__()

    def build(self, input_shape):
        # assert len(input_shape) == 3
        print("input_shape", input_shape)
        self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
        self.b = K.variable(self.init((self.attention_dim, )))
        self.u = K.variable(self.init((self.attention_dim, 1)))
        self.trainable_weights = [self.W, self.b, self.u]
        super(AttLayer, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        # print("x", x.get_shape().as_list())
        # print("W", self.W.get_shape().as_list())
        # print("b", self.b.get_shape().as_list())
        # print("u", self.u.get_shape().as_list())
        # # v1 = tf.matmul(x, self.W)
        # W = K.variable(self.init((x.get_shape().as_list()[-1], self.attention_dim)))
        # print("W1", W.get_shape().as_list())
        v1 = K.dot(x, self.W)
        v2 = K.bias_add(v1, self.b)
        uit = K.tanh(v2)
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)
        # 自然对数为底的指数
        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        # ait是概率List
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)
        return output

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])
    
class AttLayernew(Layer):
    def __init__(self, attention_dim):
        self.init = initializers.get('normal')
        self.supports_masking = True
        self.attention_dim = attention_dim
        super(AttLayernew, self).__init__()

    def build(self, input_shape):
        # assert len(input_shape) == 3
        print("input_shape", input_shape)
        self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
        self.b = K.variable(self.init((self.attention_dim, )))
        self.u = K.variable(self.init((self.attention_dim, 1)))
        self.trainable_weights = [self.W, self.b, self.u]
        super(AttLayernew, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, x, mask=None):
        v1 = K.dot(x, self.W)
        v2 = K.bias_add(v1, self.b)
        uit = K.tanh(v2)
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)
        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        # ait是概率List
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)
        return [ait, output]

    def compute_output_shape(self, input_shape):
        return [(input_shape[0], input_shape[1]), (input_shape[0], input_shape[-1])]

class TileLayer(Layer):
    def __init__(self, dim):
        self.dim = dim
        super(TileLayer, self).__init__()

    def call(self, q_embed, mask=None):
        q_emb_exp = K.expand_dims(q_embed, axis=1)
        show_layer_info('exp 1', q_emb_exp)
        q_emb_reshape = K.tile(q_emb_exp, (1, self.dim, 1))
        show_layer_info('tile 1', q_emb_reshape)
        return q_emb_reshape
    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.dim, input_shape[1])


class SqueezeLayer(Layer):
    def __init__(self, dim):
        # self.init = initializers.get('normal')
        # self.supports_masking = True
        self.dim = dim
        super(SqueezeLayer, self).__init__()

    def call(self, q_embed, mask=None):
        q_emb_exp = K.squeeze(q_embed, axis=self.dim)
        # q_emb_exp = K.squeeze(q_embed)
        show_layer_info('squeeze 1', q_emb_exp)
        return q_emb_exp

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[2], input_shape[3])

class MYMODEL(BasicModel):
    def __init__(self, config):
        super(MYMODEL, self).__init__(config)
        self.__name = 'MYMODEL'
        self.check_list = [ 'text1_maxlen', 'text2_maxlen',
                   'embed', 'embed_size', 'train_embed',  'vocab_size',
                   'hidden_size', 'topk', 'dropout_rate']
        self.embed_trainable = config['train_embed']

        self.setup(config)
        if not self.check():
            raise TypeError('[MYMODEL] parameter check wrong')
        self.sent_num = int(self.config['text2_maxlen']/self.config['text1_maxlen'])
        print('[MYMODEL] init done', end='\n')

    def setup(self, config):
        if not isinstance(config, dict):
            raise TypeError('parameter config should be dict:', config)

        self.set_default('hidden_size', 32)
        self.set_default('topk', 100)
        self.set_default('dropout_rate', 0)
        self.config.update(config)

    def build(self):

        query = Input(name='query', shape=(self.config['text1_maxlen'],))
        show_layer_info('Input', query)
        doc = Input(name='doc', shape=(self.config['text2_maxlen'],))
        show_layer_info('Input', doc)
        sent = Input(shape=(self.config['text1_maxlen'],))

        embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable = self.embed_trainable)
        q_embed = embedding(query)
        show_layer_info('Embedding', q_embed)
        # d_embed = embedding(doc)
        # show_layer_info('Embedding', d_embed)
        s_embed = embedding(sent)
        show_layer_info('Embedding', s_embed)

        q_rep = Bidirectional(LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(q_embed)
        show_layer_info('Bidirectional-LSTM', q_rep)
        # d_rep = Bidirectional(GRU(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(d_embed)
        # show_layer_info('Bidirectional-LSTM', d_rep)
        s_rep = Bidirectional(LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate']))(s_embed)
        show_layer_info('Bidirectional-LSTM', s_rep)


        c_mask = Lambda(lambda x: tf.cast(x, tf.bool))(doc) # [bs, c_len]
        q_mask = Lambda(lambda x: tf.cast(x, tf.bool))(query)
        s_mask = Lambda(lambda x: tf.cast(x, tf.bool))(sent)
        # cont_len = Lambda(lambda x: tf.expand_dims(tf.reduce_sum(tf.cast(x, tf.int32), axis=1), axis=1))(c_mask)
        # ques_len = Lambda(lambda x: tf.expand_dims(tf.reduce_sum(tf.cast(x, tf.int32), axis=1), axis=1))(q_mask)
        x = context2query_attention(8 * self.config['hidden_size'], self.config['text1_maxlen'], self.config['text1_maxlen'], self.config['dropout_rate'])([s_rep, q_rep, s_mask, q_mask])
        # x = CrossATT(100, self.config['text1_maxlen'], self.config['text1_maxlen'], self.config['dropout_rate'])([s_rep, q_rep, s_mask, q_mask])
        show_layer_info('context2query_attention', x)
        
#         l_att = AttLayer(2 * self.config['hidden_size'])(x)
        l_att = AttLayer(2 * self.config['hidden_size'])(x)
        show_layer_info('att 1', l_att)
        sentEncoder = Model([sent, query], l_att)

        query4 = TileLayer(self.sent_num)(query)
        query4_s = Reshape((self.sent_num, self.config['text1_maxlen']))(query4)
        show_layer_info('query4_s', query4_s)
        doc4 = Reshape((self.sent_num, self.config['text1_maxlen']))(doc)
        show_layer_info('doc4', doc4)

        concat = concatenate([query4_s, doc4])
        show_layer_info('concat 1', concat)
        out_model = TimeDistributed(Lambda(lambda x: sentEncoder([x[:,:self.config['text1_maxlen']], x[:, self.config['text1_maxlen']:]])))(concat)
        l_att_sent_weights, l_att_sent = AttLayernew(2 * self.config['hidden_size'])(out_model)
        show_layer_info('att 2', l_att_sent)
        s_att_d = Dense(2 * self.config['hidden_size'], activation='relu')(l_att_sent)

        q_att = AttLayer(2 * self.config['hidden_size'])(q_rep)
        show_layer_info('att q', q_att)
        cross = multiply([q_att, s_att_d])
        # -1 flatten 
        cross_reshape = Reshape((-1, ))(cross)
        show_layer_info('Reshape', cross_reshape)

        # mm_k = Lambda(lambda x: K.tf.nn.top_k(x, k=self.config['topk'], sorted=True)[0])(cross_reshape)
        # show_layer_info('Lambda-topk', mm_k)

        pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(cross_reshape)
        show_layer_info('Dropout', pool1_flat_drop)

        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation='softmax')(pool1_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            #temp1 = Dense(self.config['hidden_size']//2, activation='relu')(pool1_flat_drop)
            #temp2 = Dense(self.config['hidden_size']//10, activation='relu')(temp1)
            out_ = Dense(1)(pool1_flat_drop)
        show_layer_info('Dense', out_)

        #model = Model(inputs=[query, doc, dpool_index], outputs=out_)
        model = Model(inputs=[query, doc], outputs=[out_, l_att_sent_weights])
#         model = Model(inputs=[query, doc], outputs=out_)
        return model

In [6]:
def rank_hinge_loss(kwargs=None):
    margin = 1.
    if isinstance(kwargs, dict) and 'margin' in kwargs:
        margin = kwargs['margin']

    def _margin_loss(y_true, y_pred):
        # output_shape = K.int_shape(y_pred)
        y_pos = Lambda(lambda a: a[::2, :], output_shape= (1,))(y_pred)
        y_neg = Lambda(lambda a: a[1::2, :], output_shape= (1,))(y_pred)
        loss = K.maximum(0., margin + y_neg - y_pos)
        return K.mean(loss)
    return _margin_loss


def hinge_loss_over_sentence(num_sent, margin=0.1):
    def _margin_loss_sent(y_true, y_pred):
        # sentence loss only over positive samples
        y_pos = Lambda(lambda a: a[::2, :1], output_shape= (1,))(y_pred)
        y_neg = Lambda(lambda a: a[::2, 1:], output_shape= (num_sent - 1,))(y_pred)
        y_neg_sum = K.sum(y_neg, axis=-1, keepdims=False)
        show_layer_info('y pos', y_pos)
        show_layer_info('y neg', y_neg)
        show_layer_info('y neg sum', y_neg_sum)
        loss = K.maximum(0., margin + y_neg_sum - y_pos)
        return K.mean(loss)
    return _margin_loss_sent

In [7]:
def load_model(config):
    model_config = config['model']['setting']
    model_config.update(config['inputs']['share'])
    model = MYMODEL(model_config)
    mo = model.build()
    return mo

In [8]:
def train(config):

    print(json.dumps(config, indent=2), end='\n')
    # read basic config
    global_conf = config["global"]
#     optimizer = global_conf['optimizer']
#     optimizer = optimizers.get(optimizer)
#     K.set_value(optimizer.lr, global_conf['learning_rate'])
    weights_file = str(global_conf['weights_file']) + '.%d'
    display_interval = int(global_conf['display_interval'])
    num_iters = int(global_conf['num_iters'])
    save_weights_iters = int(global_conf['save_weights_iters'])

    # read input config
    input_conf = config['inputs']
    share_input_conf = input_conf['share']


    # collect embedding
    if 'embed_path' in share_input_conf:
        embed_dict = read_embedding(filename=share_input_conf['embed_path'])
        _PAD_ = share_input_conf['vocab_size'] - 1
        embed_dict[_PAD_] = np.zeros((share_input_conf['embed_size'], ), dtype=np.float32)
        embed = np.float32(np.random.uniform(-0.2, 0.2, [share_input_conf['vocab_size'], share_input_conf['embed_size']]))
        share_input_conf['embed'] = convert_embed_2_numpy(embed_dict, embed = embed)
    else:
        embed = np.float32(np.random.uniform(-0.2, 0.2, [share_input_conf['vocab_size'], share_input_conf['embed_size']]))
        share_input_conf['embed'] = embed
    print('[Embedding] Embedding Load Done.', end='\n')

    # list all input tags and construct tags config
    input_train_conf = OrderedDict()
    input_eval_conf = OrderedDict()
    for tag in input_conf.keys():
        if 'phase' not in input_conf[tag]:
            continue
        if input_conf[tag]['phase'] == 'TRAIN':
            input_train_conf[tag] = {}
            input_train_conf[tag].update(share_input_conf)
            input_train_conf[tag].update(input_conf[tag])
        elif input_conf[tag]['phase'] == 'EVAL':
            input_eval_conf[tag] = {}
            input_eval_conf[tag].update(share_input_conf)
            input_eval_conf[tag].update(input_conf[tag])
    print('[Input] Process Input Tags. %s in TRAIN, %s in EVAL.' % (input_train_conf.keys(), input_eval_conf.keys()), end='\n')

    # collect dataset identification
    dataset = {}
    for tag in input_conf:
        if tag != 'share' and input_conf[tag]['phase'] == 'PREDICT':
            continue
        if 'text1_corpus' in input_conf[tag]:
            datapath = input_conf[tag]['text1_corpus']
            if datapath not in dataset:
                dataset[datapath], _ = read_data(datapath)
        if 'text2_corpus' in input_conf[tag]:
            datapath = input_conf[tag]['text2_corpus']
            if datapath not in dataset:
                dataset[datapath], _ = read_data(datapath)
    print('[Dataset] %s Dataset Load Done.' % len(dataset), end='\n')

    # initial data generator
    train_gen = OrderedDict()
    eval_gen = OrderedDict()

    for tag, conf in input_train_conf.items():
        print(conf, end='\n')
        conf['data1'] = dataset[conf['text1_corpus']]
        conf['data2'] = dataset[conf['text2_corpus']]
        generator = inputs.get(conf['input_type'])
        train_gen[tag] = generator( config = conf )

    for tag, conf in input_eval_conf.items():
        print(conf, end='\n')
        conf['data1'] = dataset[conf['text1_corpus']]
        conf['data2'] = dataset[conf['text2_corpus']]
        generator = inputs.get(conf['input_type'])
        eval_gen[tag] = generator( config = conf )

    ######### Load Model #########
    model = load_model(config)

    loss = []
#     loss.append(rank_hinge_loss(config['losses'][0]['object_params']))
#     loss.append(hinge_loss_over_sentence(int(config['inputs']['share']['text2_maxlen'] / config['inputs']['share']['text1_maxlen'])))
    for lobj in config['losses']:
        if lobj['object_name'] in mz_specialized_losses:
            loss.append(rank_losses.get(lobj['object_name'])(lobj['object_params']))
        else:
            loss.append(rank_losses.get(lobj['object_name']))
    eval_metrics = OrderedDict()
    for mobj in config['metrics']:
        mobj = mobj.lower()
        if '@' in mobj:
            mt_key, mt_val = mobj.split('@', 1)
            eval_metrics[mobj] = metrics.get(mt_key)(int(mt_val))
        else:
            eval_metrics[mobj] = metrics.get(mobj)
    optimizer = Adadelta(lr=config["global"]["learning_rate"], rho=0.95)
    model.compile(optimizer = optimizer, loss=loss)
    print('[Model] Model Compile Done.', end='\n')

    for i_e in range(num_iters):
        for tag, generator in train_gen.items():
            genfun = generator.get_batch_generator()
            print('[%s]\t[Train:%s] ' % (time.strftime('%m-%d-%Y %H:%M:%S', time.localtime(time.time())), tag), end='')
            history = model.fit_generator(
                    genfun,
                    steps_per_epoch = display_interval,
                    epochs = 1,
                    shuffle=False,
                    verbose = 0
                ) #callbacks=[eval_map])
            print('Iter:%d\tloss=%.6f' % (i_e, history.history['loss'][0]), end='\n')

#         for tag, generator in eval_gen.items():
#             genfun = generator.get_batch_generator()
#             print('[%s]\t[Eval:%s] ' % (time.strftime('%m-%d-%Y %H:%M:%S', time.localtime(time.time())), tag), end='')
#             res = dict([[k,0.] for k in eval_metrics.keys()])
#             num_valid = 0
#             for input_data, y_true in genfun:
#                 y_pred = model.predict(input_data, batch_size=len(y_true))
#                 if issubclass(type(generator), inputs.list_generator.ListBasicGenerator):
#                     list_counts = input_data['list_counts']
#                     for k, eval_func in eval_metrics.items():
#                         for lc_idx in range(len(list_counts)-1):
#                             pre = list_counts[lc_idx]
#                             suf = list_counts[lc_idx+1]
#                             res[k] += eval_func(y_true = y_true[pre:suf], y_pred = y_pred[pre:suf])
#                     num_valid += len(list_counts) - 1
#                 else:
#                     for k, eval_func in eval_metrics.items():
#                         res[k] += eval_func(y_true = y_true, y_pred = y_pred)
#                     num_valid += 1
#             generator.reset()
#             print('Iter:%d\t%s' % (i_e, '\t'.join(['%s=%f'%(k,v/num_valid) for k, v in res.items()])), end='\n')
#             sys.stdout.flush()
        if (i_e+1) % save_weights_iters == 0:
            model.save_weights(weights_file % (i_e+1))

In [9]:
config = {
  "net_name": "MYMODEL",
  "global":{
      "model_type": "PY",
      "weights_file": "../examples/pinfo/weights/mymodel_gru.pinfo.weights",
      "save_weights_iters": 10,
      "num_iters": 1000,
      "display_interval": 10,
      "test_weights_iters": 1000,
      "optimizer": "adadelta",
      "learning_rate": 2.0
  },
  "inputs": {
    "share": {
        "text1_corpus": "../data/pinfo/corpus_preprocessed.txt",
        "text2_corpus": "../data/pinfo/corpus_preprocessed.txt",
        "use_dpool": False,
        "embed_size": 50,
        "embed_path": "../data/pinfo/embed_glove_d50",
        "vocab_size": 17601,
        "train_embed": False,
        "target_mode": "ranking",
        "text1_maxlen": 15,
        "text2_maxlen": 300
    },
    "train": {
        "input_type": "PairGenerator", 
        "phase": "TRAIN",
        "use_iter": False,
        "query_per_iter": 2,
        "batch_per_iter": 1,
        "batch_size": 20,
        "relation_file": "../data/pinfo/relation_train.txt"
    },
    "valid": {
        "input_type": "ListGenerator", 
        "phase": "EVAL",
        "batch_list": 10,
        "relation_file": "../data/pinfo/relation_valid.txt"
    },
    "test": {
        "input_type": "ListGenerator", 
        "phase": "EVAL",
        "batch_list": 10,
        "relation_file": "../data/pinfo/relation_test.txt"
    },
    "predict": {
        "input_type": "ListGenerator", 
        "phase": "PREDICT",
        "batch_list": 10,
        "relation_file": "../data/pinfo/relation_test.txt"
    }
  },
  "outputs": {
    "predict": {
      "save_format": "TREC",
      "save_path": "predict.test.mymodel_gru.pinfo.txt"
    }
  },
  "model": {
    "model_path": "./matchzoo/models/",
    "model_py": "mymodel_gru.MYMODEL",
    "setting": {
        "hidden_size": 150,
        "topk": 100,
        "dropout_rate": 0.2
    }
  },
  "losses": [ 
    {
       "object_name": "rank_hinge_loss",
       "object_params": {
            "margin": 1.0
       }
    },
    {
        "object_name": "hinge_loss_over_sentence" ,
        "object_params": {
            "margin": 20.0
        }
    }
  ],
  "metrics": [ "ndcg@3", "ndcg@5", "ndcg@10", "map", "recall@1", "recall@3", "recall@5"]
}



In [10]:
phase = 'train'
# model_file = '../examples/wikiqa/config/mvlstm_wikiqa.config'
# with open(model_file, 'r') as f:
#     config = json.load(f)
    
if phase == 'train':
    train(config)
elif phase == 'predict':
    predict(config)
else:
    print('Phase Error.', end='\n')

{
  "net_name": "MYMODEL",
  "global": {
    "model_type": "PY",
    "weights_file": "../examples/pinfo/weights/mymodel_gru.pinfo.weights",
    "save_weights_iters": 10,
    "num_iters": 1000,
    "display_interval": 10,
    "test_weights_iters": 1000,
    "optimizer": "adadelta",
    "learning_rate": 2.0
  },
  "inputs": {
    "share": {
      "text1_corpus": "../data/pinfo/corpus_preprocessed.txt",
      "text2_corpus": "../data/pinfo/corpus_preprocessed.txt",
      "use_dpool": false,
      "embed_size": 50,
      "embed_path": "../data/pinfo/embed_glove_d50",
      "vocab_size": 17601,
      "train_embed": false,
      "target_mode": "ranking",
      "text1_maxlen": 15,
      "text2_maxlen": 300
    },
    "train": {
      "input_type": "PairGenerator",
      "phase": "TRAIN",
      "use_iter": false,
      "query_per_iter": 2,
      "batch_per_iter": 1,
      "batch_size": 20,
      "relation_file": "../data/pinfo/relation_train.txt"
    },
    "valid": {
      "input_type": "List

TypeError: unsupported operand type(s) for -: 'dict' and 'int'