diff --git a/._download.sh b/._download.sh
new file mode 100755
index 0000000..c830264
Binary files /dev/null and b/._download.sh differ
diff --git a/.gitignore b/.gitignore
index 1a31245..f3cd12a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -103,4 +103,7 @@ ENV/
 code/models/*
 code/logs/*
 data/*
-
+*/models/
+models/
+*.log
+*.DS_Store
diff --git a/README.md b/README.md
index 2192901..d1b5f93 100755
--- a/README.md
+++ b/README.md
@@ -9,7 +9,11 @@ WikiQA, TrecQA, InsuranceQA
 
 #### data preprocess on WikiQA
 
-`run preprocess_wiki.ipynb`
+
+```
+bash download.sh
+python preprocess_wiki.py
+```
 
 ### Pointwise Style
 
@@ -17,6 +21,8 @@ WikiQA, TrecQA, InsuranceQA
 
 This model is a simple complementation of a Siamese NN QA model with a pointwise way.
 
+[To this repo](./siamese_nn)
+
 ##### train model
 
 `python siamese.py --train --model NN`
@@ -29,6 +35,8 @@ This model is a simple complementation of a Siamese NN QA model with a pointwise
 
 This model is a simple complementation of a Siamese CNN QA model with a pointwise way.
 
+[To this repo](./siamese_cnn)
+
 ##### train model
 
 `python siamese.py --train --model CNN`
@@ -41,6 +49,8 @@ This model is a simple complementation of a Siamese CNN QA model with a pointwis
 
 This model is a simple complementation of a Siamese RNN/LSTM/GRU QA model with a pointwise way.
 
+[To this repo](./siamese_rnn)
+
 ##### train model
 
 `python siamese.py --train --model RNN`
@@ -60,6 +70,10 @@ All these three models above are based on the vanilla siamese structure. You can
 
 Given a question, a positive answer and a negative answer, this pairwise model can rank two answers with higher ranking in terms of the right answer.
 
+Refer to 《APPLYING DEEP LEARNING TO ANSWER SELECTION:A STUDY AND AN OPEN TASK》
+
+[To this repo](./qacnn)
+
 ##### train model
 
 `python qacnn.py --train`
@@ -68,11 +82,49 @@ Given a question, a positive answer and a negative answer, this pairwise model c
 
 `python qacnn.py --test`
 
-### Listwise Style
+### Listwise Style(also can be transformed to pointwise style)
 
-#### Compare-Aggregate model
+#### Decomposable Attention Model
 
-To be done
+Refer to 《A Decomposable Attention Model for Natural Language Inference》
+
+[To this repo](./decomposable_att_model)
+
+##### train model
+
+`python decomp_att.py --train`
+
+##### test model
+
+`python decomp_att.py --test`
+
+#### Compare-Aggregate Model with Multi-Compare
+
+Refer to 《A COMPARE-AGGREGATE MODEL FOR MATCHING TEXT SEQUENCES》
+
+[To this repo](./seq_match_seq)
+
+##### train model
+
+`python seq_match_seq.py --train`
+
+##### test model
+
+`python seq_match_seq.py --test`
+
+#### BiMPM
+
+Refer to 《Bilateral Multi-Perspective Matching for Natural Language Sentence》
+
+[To this repo](./bimpm)
+
+##### train model
+
+`python bimpm.py --train`
+
+##### test model
+
+`python bimpm.py --test`
 
 ## Machine Reading Comprehension
 
@@ -104,6 +156,12 @@ SQuAD, MS MARCO
 
 To be done
 
+#### QANet
+
+Refer to 《QANet: Combining Local Convolution with Global Self-Attention for Reading Comprehension》
+
+[To this repo](./QANet)
+
 ### Answer Selection Style
 
 #### Dataset
@@ -112,4 +170,4 @@ RACE dataset
 
 ## Information
 
-For more information, please visit http://skyhigh233.com/blog/2018/04/26/cqa-intro/.
\ No newline at end of file
+For more information, please visit http://skyhigh233.com/blog/2018/04/26/cqa-intro/.
diff --git a/bimpm/README.me b/bimpm/README.me
new file mode 100755
index 0000000..edc608a
--- /dev/null
+++ b/bimpm/README.me
@@ -0,0 +1,23 @@
+# 复现《Bilateral Multi-Perspective Matching for Natural Language Sentences》中的模型完成问答任务
+
+## 准备
+
+#### 下载词向量文件[glove](../download.sh)。
+
+```
+cd ..
+bash download.sh
+```
+
+#### 预处理wiki数据
+
+```
+cd ..
+python preprocess_wiki.py
+```
+
+## 运行
+
+```
+bash run.sh
+```
diff --git a/bimpm/bimpm.py b/bimpm/bimpm.py
new file mode 100755
index 0000000..f135df1
--- /dev/null
+++ b/bimpm/bimpm.py
@@ -0,0 +1,175 @@
+# -*- encoding:utf8 -*-
+import tensorflow as tf
+import numpy as np
+import os
+import sys
+from copy import deepcopy
+stdout = sys.stdout
+reload(sys)
+sys.stdout = stdout
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+import cPickle as pkl
+from utils import *
+from models import BiMPM
+
+
+class BiMPMConfig(object):
+    def __init__(self, vocab_size, embeddings=None):
+        # 输入问题(句子)长度
+        self.max_q_length = 200
+        # 输入答案长度
+        self.max_a_length = 200
+        # 循环数
+        self.num_epochs = 100
+        # batch大小
+        self.batch_size = 128
+        # 词表大小
+        self.vocab_size = vocab_size
+        # 词向量大小
+        self.embeddings = embeddings
+        self.embedding_size = 100
+        if self.embeddings is not None:
+            self.embedding_size = embeddings.shape[1]
+        # keep_prob=1-dropout
+        self.keep_prob = 0.6
+        # 学习率
+        self.lr = 0.0003
+        self.grad_clip = 1
+
+        self.reg = 0
+        self.mem_dim = 128
+        self.cov_dim = 128
+        self.filter_sizes = [2, 3, 4, 5]
+        self.comp_type = 'mul'
+
+        self.cf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
+        self.cf.gpu_options.per_process_gpu_memory_fraction = 0.2
+
+
+def train(train_corpus, config, val_corpus, eval_train_corpus=None):
+    iterator = Iterator(train_corpus)
+
+    with tf.Session(config=config.cf) as sess:
+        model = BiMPM(config)
+        saver = tf.train.Saver()
+        sess.run(tf.initialize_all_variables())
+        for epoch in xrange(config.num_epochs):
+            count = 0
+            for batch_x in iterator.next(config.batch_size, shuffle=True):
+                batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
+                batch_q = np.asarray(batch_q)
+                batch_ap = np.asarray(batch_ap)
+                labels = np.asarray(labels).astype(np.int32)
+                _, loss = sess.run([model.train_op, model.total_loss], 
+                                   feed_dict={model.q:batch_q, 
+                                              model.a:batch_ap,
+                                              model.y:labels,
+                                              model.keep_prob:config.keep_prob})
+                count += 1
+                if count % 10 == 0:
+                    print('[epoch {}, batch {}]Loss:{}'.format(epoch, count, loss))
+            saver.save(sess,'{}/my_model'.format(model_path), global_step=epoch)
+            if eval_train_corpus is not None:
+                train_res = evaluate(sess, model, eval_train_corpus, config)
+                print('[train] ' + train_res)
+            if val_corpus is not None:
+                val_res = evaluate(sess, model, val_corpus, config)
+                print('[eval] ' + val_res)
+
+
+def evaluate(sess, model, corpus, config):
+    iterator = Iterator(corpus)
+
+    count = 0
+    total_qids = []
+    total_aids = []
+    total_pred = []
+    total_labels = []
+    total_loss = 0.
+    for batch_x in iterator.next(config.batch_size, shuffle=False):
+        batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
+        batch_q = np.asarray(batch_q)
+        batch_ap = np.asarray(batch_ap)
+        y_hat, loss = sess.run([model.y_hat, model.total_loss], 
+                           feed_dict={model.q:batch_q, 
+                                      model.a:batch_ap, 
+                                      model.y:labels,
+                                      model.keep_prob:1.})
+        y_hat = np.argmax(y_hat, axis=-1)
+        total_loss += loss
+        count += 1
+        total_qids.append(batch_qids)
+        total_aids.append(batch_aids)
+        total_pred.append(y_hat)
+        total_labels.append(labels)
+        # print(batch_qids[0], [id2word[_] for _ in batch_q[0]], 
+        #     batch_aids[0], [id2word[_] for _ in batch_ap[0]])
+    total_qids = np.concatenate(total_qids, axis=0)
+    total_aids = np.concatenate(total_aids, axis=0)
+    total_pred = np.concatenate(total_pred, axis=0)
+    total_labels = np.concatenate(total_labels, axis=0)
+    MAP, MRR = eval_map_mrr(total_qids, total_aids, total_pred, total_labels)
+    # print('Eval loss:{}'.format(total_loss / count))
+    return 'MAP:{}, MRR:{}'.format(MAP, MRR)
+                
+
+def test(corpus, config):
+    with tf.Session(config=config.cf) as sess:
+        model = BiMPM(config)
+        saver = tf.train.Saver()
+        saver.restore(sess, tf.train.latest_checkpoint(model_path))
+        print('[test] ' + evaluate(sess, model, corpus, config))
+                    
+
+def main(args):
+    max_q_length = 25
+    max_a_length = 90
+
+    with open(os.path.join(processed_data_path, 'pointwise_corpus.pkl'), 'r') as fr:
+        train_corpus, val_corpus, test_corpus = pkl.load(fr)
+
+    embeddings = build_embedding(embedding_path, word2id)
+    
+    train_qids, train_q, train_aids, train_ap, train_labels = zip(*train_corpus)
+    train_q = padding(train_q, max_q_length)
+    train_ap = padding(train_ap, max_a_length)
+    train_corpus = zip(train_qids, train_q, train_aids, train_ap, train_labels)
+
+
+    val_qids, val_q, val_aids, val_ap, labels = zip(*val_corpus)
+    val_q = padding(val_q, max_q_length)
+    val_ap = padding(val_ap, max_a_length)
+    val_corpus = zip(val_qids, val_q, val_aids, val_ap, labels)
+
+
+    test_qids, test_q, test_aids, test_ap, labels = zip(*test_corpus)
+    test_q = padding(test_q, max_q_length)
+    test_ap = padding(test_ap, max_a_length)
+    test_corpus = zip(test_qids, test_q, test_aids, test_ap, labels)
+
+    config = BiMPMConfig(max(word2id.values()) + 1, embeddings=embeddings)
+    config.max_q_length = max_q_length
+    config.max_a_length = max_a_length
+    if args.train:
+        train(deepcopy(train_corpus), config, val_corpus, deepcopy(train_corpus))
+    elif args.test:
+        test(test_corpus, config)
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--train",  help="whether to train", action='store_true')
+    parser.add_argument("--test",  help="whether to test", action='store_true')
+    args = parser.parse_args()
+
+    raw_data_path = '../data/WikiQA/raw'
+    processed_data_path = '../data/WikiQA/processed'
+    embedding_path = '../data/embedding/glove.6B.300d.txt'
+    model_path = 'models'
+
+    with open(os.path.join(processed_data_path, 'vocab.pkl'), 'r') as fr:
+        word2id, id2word = pkl.load(fr)
+    main(args)
diff --git a/bimpm/models.py b/bimpm/models.py
new file mode 100755
index 0000000..804d6a1
--- /dev/null
+++ b/bimpm/models.py
@@ -0,0 +1,754 @@
+# -*- encoding:utf-8 -*-
+import tensorflow as tf
+from tensorflow.python.ops import nn_ops
+import numpy as np
+
+
+class BiMPM(object):
+    def __init__(self, config):
+        self.config = config
+        # 输入
+        self.add_placeholders()
+        # [batch_size, sequence_size, embed_size]
+        q_embed, a_embed = self.add_embeddings()
+        # 上下文编码
+        q_encode, a_encode = self.context_encoding(q_embed, a_embed)
+        # attention层
+        h_a = self.attend(q_encode, a_encode)
+        # compose层
+        t = self.compare(a_encode, h_a)
+        # aggregate层
+        agg_out = self.aggregate(t)
+        pred = self.soft_out(agg_out)
+        # 预测概率分布与损失
+        self.y_hat, self.total_loss = self.add_loss_op(pred)
+        # 训练节点
+        self.train_op = self.add_train_op(self.total_loss)
+
+    def add_placeholders(self):
+        # 问题
+        self.q = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_q_length],
+                name='Question')
+        # 回答
+        self.a = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_a_length],
+                name='Ans')
+        self.y = tf.placeholder(tf.int32, shape=[None, ], name='label')
+        # drop_out
+        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
+        self.batch_size = tf.shape(self.q)[0]
+
+    def add_embeddings(self):
+        with tf.variable_scope('embedding'):
+            if self.config.embeddings is not None:
+                embeddings = tf.Variable(self.config.embeddings, 
+                    name="embeddings", trainable=False)
+            else:
+                embeddings = tf.get_variable('embeddings', 
+                    shape=[self.config.vocab_size, self.config.embedding_size], 
+                    initializer=tf.uniform_unit_scaling_initializer())
+            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
+            a_embed = tf.nn.embedding_lookup(embeddings, self.a)
+            return q_embed, a_embed
+
+    def context_encoding(self, q, a):
+        """
+        q: [batch_size, q_length, embedding_dim]
+        a: [batch_size, a_length, embedding_dim]
+        """
+        with tf.variable_scope('context_encoding') as scope:
+            q_encode = self.proj_layer(q, 'proj_layer', reuse=None)
+            a_encode = self.proj_layer(a, 'proj_layer', reuse=True)
+        return q_encode, a_encode
+
+
+    def attend(self, q, a):
+        """
+        q: [batch_size, q_length, represent_dim]
+        a: [batch_size, a_length, represent_dim]
+        """
+        q_proj = self.mlp(q, self.config.mem_dim, 1, None, 
+                    'att_q_proj', reuse=None)
+        # [batch_size, q_length, a_length]
+        att_inner_product = tf.matmul(q_proj, tf.transpose(a, (0, 2, 1)))
+        # [batch_size, a_length, q_length]
+        q_weights = tf.nn.softmax(
+                        tf.transpose(
+                            att_inner_product, (0, 2, 1)), dim=-1)
+        output_a = tf.matmul(q_weights, q)
+        return output_a
+
+    def compare(self, a, h_a):
+        """
+        a: [batch_size, a_length, mem_dim]
+        a_att: [batch_size, a_length, mem_dim]
+        """
+        if self.config.comp_type == 'mul':
+            out = a * h_a
+        else:
+            raise ValueError('{} method is not implemented!'.format(
+                self.config.comp_type))
+
+        return out
+
+    def aggregate(self, t):
+        """
+        t: [batch_size, a_length, mem_dim]
+        """
+        pool_t = []
+        for i, filter_size in enumerate(self.config.filter_sizes):
+            with tf.variable_scope('filter{}'.format(filter_size)):
+                # 卷积
+                out_t = tf.layers.Conv1D(self.config.cov_dim,
+                                         filter_size,
+                                         strides=1,
+                                         padding='valid',
+                                         activation=tf.nn.relu, name='conv')(t)
+                # 池化
+                out_t = tf.layers.MaxPooling1D(
+                    self.config.max_a_length - filter_size + 1, 
+                    1, name='max_pool')(out_t)
+                out_t = tf.reshape(out_t, 
+                    (tf.shape(out_t)[0], out_t.get_shape().as_list()[2]))
+                pool_t.append(out_t)
+        # [batch_size, n * mem_dim]
+        out = tf.concat(pool_t, axis=-1)
+        # [batch_size, mem_dim]
+        out = self.mlp(out, self.config.mem_dim, 1, 
+                        tf.nn.tanh, 'pre_out', use_dropout=False, reuse=None)
+        return out
+
+    def soft_out(self, x):
+        out = self.mlp(x, 2, 1, None, 
+            'soft_out', use_dropout=False, reuse=None)
+        return out
+
+    def mlp(self, bottom, size, layer_num, activation, name, use_dropout=True, reuse=None):
+        """
+        bottom: 上层输入
+        size: 神经元大小
+        layer_num: 神经网络层数
+        name: mlp的名称
+        reuse: 是否复用层
+        """
+        now = bottom
+        if use_dropout:
+            now = tf.nn.dropout(now, keep_prob=self.keep_prob)
+        for i in xrange(layer_num):
+            now = tf.layers.dense(now, size, 
+                                  activation=activation, 
+                                  name=name + '_{}'.format(i), 
+                                  reuse=reuse)
+        return now
+
+    def proj_layer(self, seq, name, reuse=None):
+        out1 = self.mlp(seq, self.config.mem_dim, 1, 
+                    tf.nn.sigmoid, name + '_sigmoid', reuse=reuse)
+        out2 = self.mlp(seq, self.config.mem_dim, 1, 
+                    tf.nn.tanh, name + '_tanh', reuse=reuse)
+        out = out1 * out2
+        return out
+
+    def add_loss_op(self, pred):
+        """
+        损失节点
+        """
+        # [batch_size, 2]
+        y_hat = tf.nn.softmax(pred, dim=-1)
+        loss = tf.reduce_mean(
+            tf.losses.sparse_softmax_cross_entropy(self.y, pred))
+        tf.add_to_collection('total_loss', loss)
+        total_loss = tf.add_n(tf.get_collection('total_loss'))
+        return y_hat, total_loss
+
+    def add_train_op(self, loss):
+        """
+        训练节点
+        """
+        with tf.name_scope('train_op'):
+            # 记录训练步骤
+            self.global_step = tf.Variable(0, 
+                    name='global_step', trainable=False)
+            opt = tf.train.AdamOptimizer(self.config.lr)
+            # train_op = opt.minimize(loss, self.global_step)
+            train_variables = tf.trainable_variables()
+            grads_vars = opt.compute_gradients(loss, train_variables)
+            for i, (grad, var) in enumerate(grads_vars):
+                grads_vars[i] = (
+                    tf.clip_by_norm(grad, self.config.grad_clip), var)
+            train_op = opt.apply_gradients(
+                grads_vars, global_step=self.global_step)
+            return train_op
+
+
+
+
+
+
+# 以下代码参考https://github.com/zhiguowang/BiMPM/blob/master/src/layer_utils.py
+
+def my_lstm_layer(input_reps, lstm_dim, input_lengths=None, scope_name=None, reuse=False, is_training=True,
+                  dropout_rate=0.2, use_cudnn=True):
+    '''
+    :param inputs: [batch_size, seq_len, feature_dim]
+    :param lstm_dim:
+    :param scope_name:
+    :param reuse:
+    :param is_training:
+    :param dropout_rate:
+    :return:
+    '''
+    input_reps = dropout_layer(input_reps, dropout_rate, is_training=is_training)
+    with tf.variable_scope(scope_name, reuse=reuse):
+        if use_cudnn:
+            inputs = tf.transpose(input_reps, [1, 0, 2])
+            lstm = tf.contrib.cudnn_rnn.CudnnLSTM(1, lstm_dim, direction="bidirectional",
+                                    name="{}_cudnn_bi_lstm".format(scope_name), dropout=dropout_rate if is_training else 0)
+            outputs, _ = lstm(inputs)
+            outputs = tf.transpose(outputs, [1, 0, 2])
+            f_rep = outputs[:, :, 0:lstm_dim]
+            b_rep = outputs[:, :, lstm_dim:2*lstm_dim]
+        else:
+            context_lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(lstm_dim)
+            context_lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(lstm_dim)
+            if is_training:
+                context_lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(context_lstm_cell_fw, output_keep_prob=(1 - dropout_rate))
+                context_lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(context_lstm_cell_bw, output_keep_prob=(1 - dropout_rate))
+            context_lstm_cell_fw = tf.nn.rnn_cell.MultiRNNCell([context_lstm_cell_fw])
+            context_lstm_cell_bw = tf.nn.rnn_cell.MultiRNNCell([context_lstm_cell_bw])
+
+            (f_rep, b_rep), _ = tf.nn.bidirectional_dynamic_rnn(
+                context_lstm_cell_fw, context_lstm_cell_bw, input_reps, dtype=tf.float32,
+                sequence_length=input_lengths)  # [batch_size, question_len, context_lstm_dim]
+            outputs = tf.concat(axis=2, values=[f_rep, b_rep])
+    return (f_rep,b_rep, outputs)
+
+def dropout_layer(input_reps, dropout_rate, is_training=True):
+    if is_training:
+        output_repr = tf.nn.dropout(input_reps, (1 - dropout_rate))
+    else:
+        output_repr = input_reps
+    return output_repr
+
+def cosine_distance(y1,y2, cosine_norm=True, eps=1e-6):
+    # cosine_norm = True
+    # y1 [....,a, 1, d]
+    # y2 [....,1, b, d]
+    cosine_numerator = tf.reduce_sum(tf.multiply(y1, y2), axis=-1)
+    if not cosine_norm:
+        return tf.tanh(cosine_numerator)
+    y1_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(y1), axis=-1), eps))
+    y2_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(y2), axis=-1), eps))
+    return cosine_numerator / y1_norm / y2_norm
+
+def euclidean_distance(y1, y2, eps=1e-6):
+    distance = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(y1 - y2), axis=-1), eps))
+    return distance
+
+def cross_entropy(logits, truth, mask=None):
+    # logits: [batch_size, passage_len]
+    # truth: [batch_size, passage_len]
+    # mask: [batch_size, passage_len]
+    if mask is not None: logits = tf.multiply(logits, mask)
+    xdev = tf.subtract(logits, tf.expand_dims(tf.reduce_max(logits, 1), -1))
+    log_predictions = tf.subtract(xdev, tf.expand_dims(tf.log(tf.reduce_sum(tf.exp(xdev),-1)),-1))
+    result = tf.multiply(truth, log_predictions) # [batch_size, passage_len]
+    if mask is not None: result = tf.multiply(result, mask) # [batch_size, passage_len]
+    return tf.multiply(-1.0,tf.reduce_sum(result, -1)) # [batch_size]
+
+def projection_layer(in_val, input_size, output_size, activation_func=tf.tanh, scope=None):
+    # in_val: [batch_size, passage_len, dim]
+    input_shape = tf.shape(in_val)
+    batch_size = input_shape[0]
+    passage_len = input_shape[1]
+#     feat_dim = input_shape[2]
+    in_val = tf.reshape(in_val, [batch_size * passage_len, input_size])
+    with tf.variable_scope(scope or "projection_layer"):
+        full_w = tf.get_variable("full_w", [input_size, output_size], dtype=tf.float32)
+        full_b = tf.get_variable("full_b", [output_size], dtype=tf.float32)
+        outputs = activation_func(tf.nn.xw_plus_b(in_val, full_w, full_b))
+    outputs = tf.reshape(outputs, [batch_size, passage_len, output_size])
+    return outputs # [batch_size, passage_len, output_size]
+
+def highway_layer(in_val, output_size, activation_func=tf.tanh, scope=None):
+    # in_val: [batch_size, passage_len, dim]
+    input_shape = tf.shape(in_val)
+    batch_size = input_shape[0]
+    passage_len = input_shape[1]
+#     feat_dim = input_shape[2]
+    in_val = tf.reshape(in_val, [batch_size * passage_len, output_size])
+    with tf.variable_scope(scope or "highway_layer"):
+        highway_w = tf.get_variable("highway_w", [output_size, output_size], dtype=tf.float32)
+        highway_b = tf.get_variable("highway_b", [output_size], dtype=tf.float32)
+        full_w = tf.get_variable("full_w", [output_size, output_size], dtype=tf.float32)
+        full_b = tf.get_variable("full_b", [output_size], dtype=tf.float32)
+        trans = activation_func(tf.nn.xw_plus_b(in_val, full_w, full_b))
+        gate = tf.nn.sigmoid(tf.nn.xw_plus_b(in_val, highway_w, highway_b))
+        outputs = tf.add(tf.multiply(trans, gate), tf.multiply(in_val, tf.subtract(1.0, gate)), "y")
+    outputs = tf.reshape(outputs, [batch_size, passage_len, output_size])
+    return outputs
+
+def multi_highway_layer(in_val, output_size, num_layers, activation_func=tf.tanh, scope_name=None, reuse=False):
+    with tf.variable_scope(scope_name, reuse=reuse):
+        for i in xrange(num_layers):
+            cur_scope_name = scope_name + "-{}".format(i)
+            in_val = highway_layer(in_val, output_size,activation_func=activation_func, scope=cur_scope_name)
+    return in_val
+
+def collect_representation(representation, positions):
+    # representation: [batch_size, node_num, feature_dim]
+    # positions: [batch_size, neigh_num]
+    return collect_probs(representation, positions)
+
+def collect_final_step_of_lstm(lstm_representation, lengths):
+    # lstm_representation: [batch_size, passsage_length, dim]
+    # lengths: [batch_size]
+    lengths = tf.maximum(lengths, tf.zeros_like(lengths, dtype=tf.int32))
+
+    batch_size = tf.shape(lengths)[0]
+    batch_nums = tf.range(0, limit=batch_size) # shape (batch_size)
+    indices = tf.stack((batch_nums, lengths), axis=1) # shape (batch_size, 2)
+    result = tf.gather_nd(lstm_representation, indices, name='last-forwar-lstm')
+    return result # [batch_size, dim]
+
+def collect_probs(probs, positions):
+    # probs [batch_size, chunks_size]
+    # positions [batch_size, pair_size]
+    batch_size = tf.shape(probs)[0]
+    pair_size = tf.shape(positions)[1]
+    batch_nums = tf.range(0, limit=batch_size) # shape (batch_size)
+    batch_nums = tf.reshape(batch_nums, shape=[-1, 1]) # [batch_size, 1]
+    batch_nums = tf.tile(batch_nums, multiples=[1, pair_size]) # [batch_size, pair_size]
+
+    indices = tf.stack((batch_nums, positions), axis=2) # shape (batch_size, pair_size, 2)
+    pair_probs = tf.gather_nd(probs, indices)
+    # pair_probs = tf.reshape(pair_probs, shape=[batch_size, pair_size])
+    return pair_probs
+
+
+def calcuate_attention(in_value_1, in_value_2, feature_dim1, feature_dim2, scope_name='att',
+                       att_type='symmetric', att_dim=20, remove_diagnoal=False, mask1=None, mask2=None, is_training=False, dropout_rate=0.2):
+    input_shape = tf.shape(in_value_1)
+    batch_size = input_shape[0]
+    len_1 = input_shape[1]
+    len_2 = tf.shape(in_value_2)[1]
+
+    in_value_1 = dropout_layer(in_value_1, dropout_rate, is_training=is_training)
+    in_value_2 = dropout_layer(in_value_2, dropout_rate, is_training=is_training)
+    with tf.variable_scope(scope_name):
+        # calculate attention ==> a: [batch_size, len_1, len_2]
+        atten_w1 = tf.get_variable("atten_w1", [feature_dim1, att_dim], dtype=tf.float32)
+        if feature_dim1 == feature_dim2: atten_w2 = atten_w1
+        else: atten_w2 = tf.get_variable("atten_w2", [feature_dim2, att_dim], dtype=tf.float32)
+        atten_value_1 = tf.matmul(tf.reshape(in_value_1, [batch_size * len_1, feature_dim1]), atten_w1)  # [batch_size*len_1, feature_dim]
+        atten_value_1 = tf.reshape(atten_value_1, [batch_size, len_1, att_dim])
+        atten_value_2 = tf.matmul(tf.reshape(in_value_2, [batch_size * len_2, feature_dim2]), atten_w2)  # [batch_size*len_2, feature_dim]
+        atten_value_2 = tf.reshape(atten_value_2, [batch_size, len_2, att_dim])
+
+
+        if att_type == 'additive':
+            atten_b = tf.get_variable("atten_b", [att_dim], dtype=tf.float32)
+            atten_v = tf.get_variable("atten_v", [1, att_dim], dtype=tf.float32)
+            atten_value_1 = tf.expand_dims(atten_value_1, axis=2, name="atten_value_1")  # [batch_size, len_1, 'x', feature_dim]
+            atten_value_2 = tf.expand_dims(atten_value_2, axis=1, name="atten_value_2")  # [batch_size, 'x', len_2, feature_dim]
+            atten_value = atten_value_1 + atten_value_2  # + tf.expand_dims(tf.expand_dims(tf.expand_dims(atten_b, axis=0), axis=0), axis=0)
+            atten_value = nn_ops.bias_add(atten_value, atten_b)
+            atten_value = tf.tanh(atten_value)  # [batch_size, len_1, len_2, feature_dim]
+            atten_value = tf.reshape(atten_value, [-1, att_dim]) * atten_v  # tf.expand_dims(atten_v, axis=0) # [batch_size*len_1*len_2, feature_dim]
+            atten_value = tf.reduce_sum(atten_value, axis=-1)
+            atten_value = tf.reshape(atten_value, [batch_size, len_1, len_2])
+        else:
+            atten_value_1 = tf.tanh(atten_value_1)
+            # atten_value_1 = tf.nn.relu(atten_value_1)
+            atten_value_2 = tf.tanh(atten_value_2)
+            # atten_value_2 = tf.nn.relu(atten_value_2)
+            diagnoal_params = tf.get_variable("diagnoal_params", [1, 1, att_dim], dtype=tf.float32)
+            atten_value_1 = atten_value_1 * diagnoal_params
+            atten_value = tf.matmul(atten_value_1, atten_value_2, transpose_b=True) # [batch_size, len_1, len_2]
+
+        # normalize
+        if remove_diagnoal:
+            diagnoal = tf.ones([len_1], tf.float32)  # [len1]
+            diagnoal = 1.0 - tf.diag(diagnoal)  # [len1, len1]
+            diagnoal = tf.expand_dims(diagnoal, axis=0)  # ['x', len1, len1]
+            atten_value = atten_value * diagnoal
+        if mask1 is not None: atten_value = tf.multiply(atten_value, tf.expand_dims(mask1, axis=-1))
+        if mask2 is not None: atten_value = tf.multiply(atten_value, tf.expand_dims(mask2, axis=1))
+        atten_value = tf.nn.softmax(atten_value, name='atten_value')  # [batch_size, len_1, len_2]
+        if remove_diagnoal: atten_value = atten_value * diagnoal
+        if mask1 is not None: atten_value = tf.multiply(atten_value, tf.expand_dims(mask1, axis=-1))
+        if mask2 is not None: atten_value = tf.multiply(atten_value, tf.expand_dims(mask2, axis=1))
+
+    return atten_value
+
+def weighted_sum(atten_scores, in_values):
+    '''
+    :param atten_scores: # [batch_size, len1, len2]
+    :param in_values: [batch_size, len2, dim]
+    :return:
+    '''
+    return tf.matmul(atten_scores, in_values)
+
+def cal_relevancy_matrix(in_question_repres, in_passage_repres):
+    in_question_repres_tmp = tf.expand_dims(in_question_repres, 1) # [batch_size, 1, question_len, dim]
+    in_passage_repres_tmp = tf.expand_dims(in_passage_repres, 2) # [batch_size, passage_len, 1, dim]
+    relevancy_matrix = cosine_distance(in_question_repres_tmp,in_passage_repres_tmp) # [batch_size, passage_len, question_len]
+    return relevancy_matrix
+
+def mask_relevancy_matrix(relevancy_matrix, question_mask, passage_mask):
+    # relevancy_matrix: [batch_size, passage_len, question_len]
+    # question_mask: [batch_size, question_len]
+    # passage_mask: [batch_size, passsage_len]
+    if question_mask is not None:
+        relevancy_matrix = tf.multiply(relevancy_matrix, tf.expand_dims(question_mask, 1))
+    relevancy_matrix = tf.multiply(relevancy_matrix, tf.expand_dims(passage_mask, 2))
+    return relevancy_matrix
+
+def compute_gradients(tensor, var_list):
+  grads = tf.gradients(tensor, var_list)
+  return [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(var_list, grads)]
+
+
+
+
+
+# 以下代码参考https://github.com/zhiguowang/BiMPM/blob/master/src/match_utils.py
+
+eps = 1e-6
+def cosine_distance(y1,y2):
+    # y1 [....,a, 1, d]
+    # y2 [....,1, b, d]
+    cosine_numerator = tf.reduce_sum(tf.multiply(y1, y2), axis=-1)
+    y1_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(y1), axis=-1), eps))
+    y2_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(y2), axis=-1), eps)) 
+    return cosine_numerator / y1_norm / y2_norm
+
+def cal_relevancy_matrix(in_question_repres, in_passage_repres):
+    in_question_repres_tmp = tf.expand_dims(in_question_repres, 1) # [batch_size, 1, question_len, dim]
+    in_passage_repres_tmp = tf.expand_dims(in_passage_repres, 2) # [batch_size, passage_len, 1, dim]
+    relevancy_matrix = cosine_distance(in_question_repres_tmp,in_passage_repres_tmp) # [batch_size, passage_len, question_len]
+    return relevancy_matrix
+
+def mask_relevancy_matrix(relevancy_matrix, question_mask, passage_mask):
+    # relevancy_matrix: [batch_size, passage_len, question_len]
+    # question_mask: [batch_size, question_len]
+    # passage_mask: [batch_size, passsage_len]
+    relevancy_matrix = tf.multiply(relevancy_matrix, tf.expand_dims(question_mask, 1))
+    relevancy_matrix = tf.multiply(relevancy_matrix, tf.expand_dims(passage_mask, 2))
+    return relevancy_matrix
+
+def multi_perspective_expand_for_3D(in_tensor, decompose_params):
+    in_tensor = tf.expand_dims(in_tensor, axis=2) #[batch_size, passage_len, 'x', dim]
+    decompose_params = tf.expand_dims(tf.expand_dims(decompose_params, axis=0), axis=0) # [1, 1, decompse_dim, dim]
+    return tf.multiply(in_tensor, decompose_params)#[batch_size, passage_len, decompse_dim, dim]
+
+def multi_perspective_expand_for_2D(in_tensor, decompose_params):
+    in_tensor = tf.expand_dims(in_tensor, axis=1) #[batch_size, 'x', dim]
+    decompose_params = tf.expand_dims(decompose_params, axis=0) # [1, decompse_dim, dim]
+    return tf.multiply(in_tensor, decompose_params) # [batch_size, decompse_dim, dim]
+
+
+def cal_maxpooling_matching(passage_rep, question_rep, decompose_params):
+    # passage_representation: [batch_size, passage_len, dim]
+    # qusetion_representation: [batch_size, question_len, dim]
+    # decompose_params: [decompose_dim, dim]
+    
+    def singel_instance(x):
+        p = x[0]
+        q = x[1]
+        # p: [pasasge_len, dim], q: [question_len, dim]
+        p = multi_perspective_expand_for_2D(p, decompose_params) # [pasasge_len, decompose_dim, dim]
+        q = multi_perspective_expand_for_2D(q, decompose_params) # [question_len, decompose_dim, dim]
+        p = tf.expand_dims(p, 1) # [pasasge_len, 1, decompose_dim, dim]
+        q = tf.expand_dims(q, 0) # [1, question_len, decompose_dim, dim]
+        return cosine_distance(p, q) # [passage_len, question_len, decompose]
+    elems = (passage_rep, question_rep)
+    matching_matrix = tf.map_fn(singel_instance, elems, dtype=tf.float32) # [batch_size, passage_len, question_len, decompse_dim]
+    return tf.concat(axis=2, values=[tf.reduce_max(matching_matrix, axis=2), tf.reduce_mean(matching_matrix, axis=2)])# [batch_size, passage_len, 2*decompse_dim]
+
+def cross_entropy(logits, truth, mask):
+    # logits: [batch_size, passage_len]
+    # truth: [batch_size, passage_len]
+    # mask: [batch_size, passage_len]
+
+#     xdev = x - x.max()
+#     return xdev - T.log(T.sum(T.exp(xdev)))
+    logits = tf.multiply(logits, mask)
+    xdev = tf.sub(logits, tf.expand_dims(tf.reduce_max(logits, 1), -1))
+    log_predictions = tf.sub(xdev, tf.expand_dims(tf.log(tf.reduce_sum(tf.exp(xdev),-1)),-1))
+#     return -T.sum(targets * log_predictions)
+    result = tf.multiply(tf.multiply(truth, log_predictions), mask) # [batch_size, passage_len]
+    return tf.multiply(-1.0,tf.reduce_sum(result, -1)) # [batch_size]
+    
+def highway_layer(in_val, output_size, scope=None):
+    # in_val: [batch_size, passage_len, dim]
+    input_shape = tf.shape(in_val)
+    batch_size = input_shape[0]
+    passage_len = input_shape[1]
+#     feat_dim = input_shape[2]
+    in_val = tf.reshape(in_val, [batch_size * passage_len, output_size])
+    with tf.variable_scope(scope or "highway_layer"):
+        highway_w = tf.get_variable("highway_w", [output_size, output_size], dtype=tf.float32)
+        highway_b = tf.get_variable("highway_b", [output_size], dtype=tf.float32)
+        full_w = tf.get_variable("full_w", [output_size, output_size], dtype=tf.float32)
+        full_b = tf.get_variable("full_b", [output_size], dtype=tf.float32)
+        trans = tf.nn.tanh(tf.nn.xw_plus_b(in_val, full_w, full_b))
+        gate = tf.nn.sigmoid(tf.nn.xw_plus_b(in_val, highway_w, highway_b))
+        outputs = trans * gate + in_val * (1.0 - gate)
+    outputs = tf.reshape(outputs, [batch_size, passage_len, output_size])
+    return outputs
+
+def multi_highway_layer(in_val, output_size, num_layers, scope=None):
+    scope_name = 'highway_layer'
+    if scope is not None: scope_name = scope
+    for i in xrange(num_layers):
+        cur_scope_name = scope_name + "-{}".format(i)
+        in_val = highway_layer(in_val, output_size, scope=cur_scope_name)
+    return in_val
+
+def cal_max_question_representation(question_representation, atten_scores):
+    atten_positions = tf.argmax(atten_scores, axis=2, output_type=tf.int32)  # [batch_size, passage_len]
+    max_question_reps = layer_utils.collect_representation(question_representation, atten_positions)
+    return max_question_reps
+
+def multi_perspective_match(feature_dim, repres1, repres2, is_training=True, dropout_rate=0.2,
+                            options=None, scope_name='mp-match', reuse=False):
+    '''
+        :param repres1: [batch_size, len, feature_dim]
+        :param repres2: [batch_size, len, feature_dim]
+        :return:
+    '''
+    input_shape = tf.shape(repres1)
+    batch_size = input_shape[0]
+    seq_length = input_shape[1]
+    matching_result = []
+    with tf.variable_scope(scope_name, reuse=reuse):
+        match_dim = 0
+        if options.with_cosine:
+            cosine_value = layer_utils.cosine_distance(repres1, repres2, cosine_norm=False)
+            cosine_value = tf.reshape(cosine_value, [batch_size, seq_length, 1])
+            matching_result.append(cosine_value)
+            match_dim += 1
+
+        if options.with_mp_cosine:
+            mp_cosine_params = tf.get_variable("mp_cosine", shape=[options.cosine_MP_dim, feature_dim], dtype=tf.float32)
+            mp_cosine_params = tf.expand_dims(mp_cosine_params, axis=0)
+            mp_cosine_params = tf.expand_dims(mp_cosine_params, axis=0)
+            repres1_flat = tf.expand_dims(repres1, axis=2)
+            repres2_flat = tf.expand_dims(repres2, axis=2)
+            mp_cosine_matching = layer_utils.cosine_distance(tf.multiply(repres1_flat, mp_cosine_params),
+                                                             repres2_flat,cosine_norm=False)
+            matching_result.append(mp_cosine_matching)
+            match_dim += options.cosine_MP_dim
+
+    matching_result = tf.concat(axis=2, values=matching_result)
+    return (matching_result, match_dim)
+
+
+def match_passage_with_question(passage_reps, question_reps, passage_mask, question_mask, passage_lengths, question_lengths,
+                                context_lstm_dim, scope=None,
+                                with_full_match=True, with_maxpool_match=True, with_attentive_match=True, with_max_attentive_match=True,
+                                is_training=True, options=None, dropout_rate=0, forward=True):
+    passage_reps = tf.multiply(passage_reps, tf.expand_dims(passage_mask,-1))
+    question_reps = tf.multiply(question_reps, tf.expand_dims(question_mask,-1))
+    all_question_aware_representatins = []
+    dim = 0
+    with tf.variable_scope(scope or "match_passage_with_question"):
+        relevancy_matrix = cal_relevancy_matrix(question_reps, passage_reps)
+        relevancy_matrix = mask_relevancy_matrix(relevancy_matrix, question_mask, passage_mask)
+        # relevancy_matrix = layer_utils.calcuate_attention(passage_reps, question_reps, context_lstm_dim, context_lstm_dim,
+        #             scope_name="fw_attention", att_type=options.att_type, att_dim=options.att_dim,
+        #             remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate)
+
+        all_question_aware_representatins.append(tf.reduce_max(relevancy_matrix, axis=2,keep_dims=True))
+        all_question_aware_representatins.append(tf.reduce_mean(relevancy_matrix, axis=2,keep_dims=True))
+        dim += 2
+        if with_full_match:
+            if forward:
+                question_full_rep = layer_utils.collect_final_step_of_lstm(question_reps, question_lengths - 1)
+            else:
+                question_full_rep = question_reps[:,0,:]
+
+            passage_len = tf.shape(passage_reps)[1]
+            question_full_rep = tf.expand_dims(question_full_rep, axis=1)
+            question_full_rep = tf.tile(question_full_rep, [1, passage_len, 1])  # [batch_size, pasasge_len, feature_dim]
+
+            (attentive_rep, match_dim) = multi_perspective_match(context_lstm_dim,
+                                passage_reps, question_full_rep, is_training=is_training, dropout_rate=options.dropout_rate,
+                                options=options, scope_name='mp-match-full-match')
+            all_question_aware_representatins.append(attentive_rep)
+            dim += match_dim
+
+        if with_maxpool_match:
+            maxpooling_decomp_params = tf.get_variable("maxpooling_matching_decomp",
+                                                          shape=[options.cosine_MP_dim, context_lstm_dim], dtype=tf.float32)
+            maxpooling_rep = cal_maxpooling_matching(passage_reps, question_reps, maxpooling_decomp_params)
+            all_question_aware_representatins.append(maxpooling_rep)
+            dim += 2*options.cosine_MP_dim
+
+        if with_attentive_match:
+            atten_scores = layer_utils.calcuate_attention(passage_reps, question_reps, context_lstm_dim, context_lstm_dim,
+                    scope_name="attention", att_type=options.att_type, att_dim=options.att_dim,
+                    remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate)
+            att_question_contexts = tf.matmul(atten_scores, question_reps)
+            (attentive_rep, match_dim) = multi_perspective_match(context_lstm_dim,
+                    passage_reps, att_question_contexts, is_training=is_training, dropout_rate=options.dropout_rate,
+                    options=options, scope_name='mp-match-att_question')
+            all_question_aware_representatins.append(attentive_rep)
+            dim += match_dim
+
+        if with_max_attentive_match:
+            max_att = cal_max_question_representation(question_reps, relevancy_matrix)
+            (max_attentive_rep, match_dim) = multi_perspective_match(context_lstm_dim,
+                    passage_reps, max_att, is_training=is_training, dropout_rate=options.dropout_rate,
+                    options=options, scope_name='mp-match-max-att')
+            all_question_aware_representatins.append(max_attentive_rep)
+            dim += match_dim
+
+        all_question_aware_representatins = tf.concat(axis=2, values=all_question_aware_representatins)
+    return (all_question_aware_representatins, dim)
+    
+def bilateral_match_func(in_question_repres, in_passage_repres,
+                        question_lengths, passage_lengths, question_mask, passage_mask, input_dim, is_training, options=None):
+
+    question_aware_representatins = []
+    question_aware_dim = 0
+    passage_aware_representatins = []
+    passage_aware_dim = 0
+
+    # ====word level matching======
+    (match_reps, match_dim) = match_passage_with_question(in_passage_repres, in_question_repres, passage_mask, question_mask, passage_lengths,
+                                question_lengths, input_dim, scope="word_match_forward",
+                                with_full_match=False, with_maxpool_match=options.with_maxpool_match,
+                                with_attentive_match=options.with_attentive_match,
+                                with_max_attentive_match=options.with_max_attentive_match,
+                                is_training=is_training, options=options, dropout_rate=options.dropout_rate, forward=True)
+    question_aware_representatins.append(match_reps)
+    question_aware_dim += match_dim
+
+    (match_reps, match_dim) = match_passage_with_question(in_question_repres, in_passage_repres, question_mask, passage_mask, question_lengths,
+                                passage_lengths, input_dim, scope="word_match_backward",
+                                with_full_match=False, with_maxpool_match=options.with_maxpool_match,
+                                with_attentive_match=options.with_attentive_match,
+                                with_max_attentive_match=options.with_max_attentive_match,
+                                is_training=is_training, options=options, dropout_rate=options.dropout_rate, forward=False)
+    passage_aware_representatins.append(match_reps)
+    passage_aware_dim += match_dim
+
+    with tf.variable_scope('context_MP_matching'):
+        for i in xrange(options.context_layer_num): # support multiple context layer
+            with tf.variable_scope('layer-{}'.format(i)):
+                # contextual lstm for both passage and question
+                in_question_repres = tf.multiply(in_question_repres, tf.expand_dims(question_mask, axis=-1))
+                in_passage_repres = tf.multiply(in_passage_repres, tf.expand_dims(passage_mask, axis=-1))
+                (question_context_representation_fw, question_context_representation_bw,
+                 in_question_repres) = layer_utils.my_lstm_layer(
+                        in_question_repres, options.context_lstm_dim, input_lengths= question_lengths,scope_name="context_represent",
+                        reuse=False, is_training=is_training, dropout_rate=options.dropout_rate, use_cudnn=options.use_cudnn)
+                (passage_context_representation_fw, passage_context_representation_bw, 
+                 in_passage_repres) = layer_utils.my_lstm_layer(
+                        in_passage_repres, options.context_lstm_dim, input_lengths=passage_lengths, scope_name="context_represent",
+                        reuse=True, is_training=is_training, dropout_rate=options.dropout_rate, use_cudnn=options.use_cudnn)
+
+                # Multi-perspective matching
+                with tf.variable_scope('left_MP_matching'):
+                    (match_reps, match_dim) = match_passage_with_question(passage_context_representation_fw,
+                                question_context_representation_fw, passage_mask, question_mask, passage_lengths,
+                                question_lengths, options.context_lstm_dim, scope="forward_match",
+                                with_full_match=options.with_full_match, with_maxpool_match=options.with_maxpool_match,
+                                with_attentive_match=options.with_attentive_match,
+                                with_max_attentive_match=options.with_max_attentive_match,
+                                is_training=is_training, options=options, dropout_rate=options.dropout_rate, forward=True)
+                    question_aware_representatins.append(match_reps)
+                    question_aware_dim += match_dim
+                    (match_reps, match_dim) = match_passage_with_question(passage_context_representation_bw,
+                                question_context_representation_bw, passage_mask, question_mask, passage_lengths,
+                                question_lengths, options.context_lstm_dim, scope="backward_match",
+                                with_full_match=options.with_full_match, with_maxpool_match=options.with_maxpool_match,
+                                with_attentive_match=options.with_attentive_match,
+                                with_max_attentive_match=options.with_max_attentive_match,
+                                is_training=is_training, options=options, dropout_rate=options.dropout_rate, forward=False)
+                    question_aware_representatins.append(match_reps)
+                    question_aware_dim += match_dim
+
+                with tf.variable_scope('right_MP_matching'):
+                    (match_reps, match_dim) = match_passage_with_question(question_context_representation_fw,
+                                passage_context_representation_fw, question_mask, passage_mask, question_lengths,
+                                passage_lengths, options.context_lstm_dim, scope="forward_match",
+                                with_full_match=options.with_full_match, with_maxpool_match=options.with_maxpool_match,
+                                with_attentive_match=options.with_attentive_match,
+                                with_max_attentive_match=options.with_max_attentive_match,
+                                is_training=is_training, options=options, dropout_rate=options.dropout_rate, forward=True)
+                    passage_aware_representatins.append(match_reps)
+                    passage_aware_dim += match_dim
+                    (match_reps, match_dim) = match_passage_with_question(question_context_representation_bw,
+                                passage_context_representation_bw, question_mask, passage_mask, question_lengths,
+                                passage_lengths, options.context_lstm_dim, scope="backward_match",
+                                with_full_match=options.with_full_match, with_maxpool_match=options.with_maxpool_match,
+                                with_attentive_match=options.with_attentive_match,
+                                with_max_attentive_match=options.with_max_attentive_match,
+                                is_training=is_training, options=options, dropout_rate=options.dropout_rate, forward=False)
+                    passage_aware_representatins.append(match_reps)
+                    passage_aware_dim += match_dim
+
+    question_aware_representatins = tf.concat(axis=2, values=question_aware_representatins) # [batch_size, passage_len, question_aware_dim]
+    passage_aware_representatins = tf.concat(axis=2, values=passage_aware_representatins) # [batch_size, question_len, question_aware_dim]
+
+    if is_training:
+        question_aware_representatins = tf.nn.dropout(question_aware_representatins, (1 - options.dropout_rate))
+        passage_aware_representatins = tf.nn.dropout(passage_aware_representatins, (1 - options.dropout_rate))
+        
+    # ======Highway layer======
+    if options.with_match_highway:
+        with tf.variable_scope("left_matching_highway"):
+            question_aware_representatins = multi_highway_layer(question_aware_representatins, question_aware_dim,
+                                                                options.highway_layer_num)
+        with tf.variable_scope("right_matching_highway"):
+            passage_aware_representatins = multi_highway_layer(passage_aware_representatins, passage_aware_dim,
+                                                               options.highway_layer_num)
+
+    #========Aggregation Layer======
+    aggregation_representation = []
+    aggregation_dim = 0
+    
+    qa_aggregation_input = question_aware_representatins
+    pa_aggregation_input = passage_aware_representatins
+    with tf.variable_scope('aggregation_layer'):
+        for i in xrange(options.aggregation_layer_num): # support multiple aggregation layer
+            qa_aggregation_input = tf.multiply(qa_aggregation_input, tf.expand_dims(passage_mask, axis=-1))
+            (fw_rep, bw_rep, cur_aggregation_representation) = layer_utils.my_lstm_layer(
+                        qa_aggregation_input, options.aggregation_lstm_dim, input_lengths=passage_lengths, scope_name='left_layer-{}'.format(i),
+                        reuse=False, is_training=is_training, dropout_rate=options.dropout_rate,use_cudnn=options.use_cudnn)
+            fw_rep = layer_utils.collect_final_step_of_lstm(fw_rep, passage_lengths - 1)
+            bw_rep = bw_rep[:, 0, :]
+            aggregation_representation.append(fw_rep)
+            aggregation_representation.append(bw_rep)
+            aggregation_dim += 2* options.aggregation_lstm_dim
+            qa_aggregation_input = cur_aggregation_representation# [batch_size, passage_len, 2*aggregation_lstm_dim]
+
+            pa_aggregation_input = tf.multiply(pa_aggregation_input, tf.expand_dims(question_mask, axis=-1))
+            (fw_rep, bw_rep, cur_aggregation_representation) = layer_utils.my_lstm_layer(
+                        pa_aggregation_input, options.aggregation_lstm_dim,
+                        input_lengths=question_lengths, scope_name='right_layer-{}'.format(i),
+                        reuse=False, is_training=is_training, dropout_rate=options.dropout_rate, use_cudnn=options.use_cudnn)
+            fw_rep = layer_utils.collect_final_step_of_lstm(fw_rep, question_lengths - 1)
+            bw_rep = bw_rep[:, 0, :]
+            aggregation_representation.append(fw_rep)
+            aggregation_representation.append(bw_rep)
+            aggregation_dim += 2* options.aggregation_lstm_dim
+            pa_aggregation_input = cur_aggregation_representation# [batch_size, passage_len, 2*aggregation_lstm_dim]
+
+    aggregation_representation = tf.concat(axis=1, values=aggregation_representation) # [batch_size, aggregation_dim]
+
+    # ======Highway layer======
+    if options.with_aggregation_highway:
+        with tf.variable_scope("aggregation_highway"):
+            agg_shape = tf.shape(aggregation_representation)
+            batch_size = agg_shape[0]
+            aggregation_representation = tf.reshape(aggregation_representation, [1, batch_size, aggregation_dim])
+            aggregation_representation = multi_highway_layer(aggregation_representation, aggregation_dim, options.highway_layer_num)
+            aggregation_representation = tf.reshape(aggregation_representation, [batch_size, aggregation_dim])
+    
+    return (aggregation_representation, aggregation_dim)
+
diff --git a/bimpm/run.sh b/bimpm/run.sh
new file mode 100755
index 0000000..66f1f68
--- /dev/null
+++ b/bimpm/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+
+echo "train model"
+python bimpm.py --train
+
+
+echo "test model"
+python bimpm.py --test
diff --git a/code/utils.py b/bimpm/utils.py
similarity index 100%
rename from code/utils.py
rename to bimpm/utils.py
diff --git a/code/models.py b/code/models.py
deleted file mode 100755
index ebb652f..0000000
--- a/code/models.py
+++ /dev/null
@@ -1,488 +0,0 @@
-# -*- encoding:utf-8 -*-
-import tensorflow as tf
-import numpy as np
-
-class SiameseNN(object):
-    def __init__(self, config):
-        self.config = config
-        # 输入
-        self.add_placeholders()
-        # [batch_size, sequence_size, embed_size]
-        q_embed, a_embed = self.add_embeddings()
-        with tf.variable_scope('siamese') as scope:
-            self.q_trans = self.network(q_embed)
-            scope.reuse_variables()
-            self.a_trans = self.network(a_embed)
-        # 损失和精确度
-        self.total_loss = self.add_loss_op(self.q_trans, self.a_trans)
-        # 训练节点
-        self.train_op = self.add_train_op(self.total_loss)
-
-    # 输入
-    def add_placeholders(self):
-        # 问题
-        self.q = tf.placeholder(tf.int32,
-                shape=[None, self.config.max_q_length],
-                name='Question')
-        # 回答
-        self.a = tf.placeholder(tf.int32,
-                shape=[None, self.config.max_a_length],
-                name='Ans')
-        self.y = tf.placeholder(tf.float32, shape=[None, ], name='label')
-        # drop_out
-        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
-        self.batch_size = tf.shape(self.q)[0]
-
-    # word embeddings
-    def add_embeddings(self):
-        with tf.variable_scope('embedding'):
-            if self.config.embeddings is not None:
-                embeddings = tf.Variable(self.config.embeddings, name="embeddings", trainable=False)
-            else:
-                embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
-            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
-            a_embed = tf.nn.embedding_lookup(embeddings, self.a)
-            q_embed = tf.nn.dropout(q_embed, keep_prob=self.keep_prob)
-            a_embed = tf.nn.dropout(a_embed, keep_prob=self.keep_prob)
-            return q_embed, a_embed
-
-    def network(self, x):
-        # (batch_size * max_len, embed_size)
-        max_len = tf.shape(x)[1]
-        x = tf.reshape(x, (-1, x.get_shape()[-1]))
-        fc1 = self.fc_layer(x, self.config.hidden_size, "fc1")
-        ac1 = tf.nn.relu(fc1)
-        fc2 = self.fc_layer(ac1, self.config.hidden_size, "fc2")
-        ac2 = tf.nn.relu(fc2)
-        # (batch_size, max_len, embed_size)
-        ac3 = tf.reshape(ac2, (self.batch_size, max_len, ac2.get_shape()[1]))
-        # (batch_size, embed_size)
-        ac3 = tf.reduce_mean(ac3, axis=1)
-        fc3 = self.fc_layer(ac3, self.config.output_size, "fc3")
-        return fc3
-
-    def fc_layer(self, bottom, n_weight, name):
-        assert len(bottom.get_shape()) == 2
-        n_prev_weight = bottom.get_shape()[1]
-        initer = tf.truncated_normal_initializer(stddev=0.01)
-        W = tf.get_variable(name+'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=initer)
-        b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.01, shape=[n_weight], dtype=tf.float32))
-        fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
-        return fc
-
-    # 损失节点
-    def add_loss_op(self, o1, o2):
-        # 此处用cos距离
-        norm_o1 = tf.nn.l2_normalize(o1, dim=1)
-        norm_o2 = tf.nn.l2_normalize(o2, dim=1)
-        self.q_a_cosine = tf.reduce_sum(tf.multiply(o1, o2), 1)
-
-        loss = self.contrastive_loss(self.q_a_cosine, self.y)
-        tf.add_to_collection('total_loss', loss)
-        total_loss = tf.add_n(tf.get_collection('total_loss'))
-        return total_loss
-
-    def contrastive_loss(self, Ew, y):
-        l_1 = self.config.pos_weight * tf.square(1 - Ew)
-        l_0 = tf.square(tf.maximum(Ew, 0))
-        loss = tf.reduce_mean(y * l_1 + (1 - y) * l_0)
-        return loss
-
-    # 训练节点
-    def add_train_op(self, loss):
-        with tf.name_scope('train_op'):
-            # 记录训练步骤
-            self.global_step = tf.Variable(0, name='global_step', trainable=False)
-            opt = tf.train.AdamOptimizer(self.config.lr)
-            train_op = opt.minimize(loss, self.global_step)
-            return train_op
-
-
-class SiameseCNN(object):
-    def __init__(self, config):
-        self.config = config
-        # 输入
-        self.add_placeholders()
-        # [batch_size, sequence_size, embed_size]
-        q_embed, a_embed = self.add_embeddings()
-        with tf.variable_scope('siamese') as scope:
-            self.q_trans = self.network(q_embed, reuse=False)
-            scope.reuse_variables()
-            self.a_trans = self.network(a_embed, reuse=True)
-        # 损失和精确度
-        self.total_loss = self.add_loss_op(self.q_trans, self.a_trans)
-        # 训练节点
-        self.train_op = self.add_train_op(self.total_loss)
-
-    # 输入
-    def add_placeholders(self):
-        # 问题
-        self.q = tf.placeholder(tf.int32,
-                shape=[None, self.config.max_q_length],
-                name='Question')
-        # 回答
-        self.a = tf.placeholder(tf.int32,
-                shape=[None, self.config.max_a_length],
-                name='Ans')
-        self.y = tf.placeholder(tf.float32, shape=[None, ], name='label')
-        # drop_out
-        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
-        self.batch_size = tf.shape(self.q)[0]
-
-    # word embeddings
-    def add_embeddings(self):
-        with tf.variable_scope('embedding'):
-            if self.config.embeddings is not None:
-                embeddings = tf.Variable(self.config.embeddings, name="embeddings", trainable=False)
-            else:
-                embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
-            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
-            a_embed = tf.nn.embedding_lookup(embeddings, self.a)
-            q_embed = tf.nn.dropout(q_embed, keep_prob=self.keep_prob)
-            a_embed = tf.nn.dropout(a_embed, keep_prob=self.keep_prob)
-            return q_embed, a_embed
-
-    def network(self, x, reuse=False):
-        # (batch_size, conv_size)
-        conv1 = self.conv_layer(x, reuse=reuse)
-        # (batch_size, hidden_size)
-        fc1 = self.fc_layer(conv1, self.config.hidden_size, "fc1")
-        ac1 = tf.nn.relu(fc1)
-        # (batch_size, output_size)
-        fc2 = self.fc_layer(ac1, self.config.output_size, "fc2")
-        return fc2
-
-    def fc_layer(self, bottom, n_weight, name):
-        assert len(bottom.get_shape()) == 2
-        n_prev_weight = bottom.get_shape()[1]
-        initer = tf.truncated_normal_initializer(stddev=0.01)
-        W = tf.get_variable(name+'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=initer)
-        b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.0, shape=[n_weight], dtype=tf.float32))
-        fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
-        return fc
-
-    def conv_layer(self, h, reuse=False):
-        pool = list()
-        max_len = h.get_shape()[1]
-        h = tf.reshape(h, [-1, max_len, h.get_shape()[2], 1])
-        for i, filter_size in enumerate(self.config.filter_sizes):
-            with tf.variable_scope('filter{}'.format(filter_size)):
-                conv1_W = tf.get_variable('conv_W', shape=[filter_size, self.config.embedding_size, 1, self.config.num_filters], initializer=tf.truncated_normal_initializer(.0, .01))
-                conv1_b = tf.get_variable('conv_b', initializer=tf.constant(0.0, shape=[self.config.num_filters]))
-                # pooling层的bias,Q和A分开
-                pool_b = tf.get_variable('pool_b', initializer=tf.constant(0.0, shape=[self.config.num_filters]))
-                # 卷积
-                out = tf.nn.relu((tf.nn.conv2d(h, conv1_W, [1,1,1,1], padding='VALID')+conv1_b))
-                # 池化
-                out = tf.nn.max_pool(out, [1,max_len-filter_size+1,1,1], [1,1,1,1], padding='VALID')
-                out = tf.nn.tanh(out+pool_b)
-                pool.append(out)
-                # 加入正则项
-                if not reuse:
-                    tf.add_to_collection('total_loss', 0.5 * self.config.l2_reg_lambda * tf.nn.l2_loss(conv1_W))
-
-        total_channels = len(self.config.filter_sizes) * self.config.num_filters
-        real_pool = tf.reshape(tf.concat(pool, 3), [self.batch_size, total_channels])
-        return real_pool
-
-    # 损失节点
-    def add_loss_op(self, o1, o2):
-        # 此处用cos距离
-        norm_o1 = tf.nn.l2_normalize(o1, dim=1)
-        norm_o2 = tf.nn.l2_normalize(o2, dim=1)
-        self.q_a_cosine = tf.reduce_sum(tf.multiply(o1, o2), 1)
-
-        loss = self.contrastive_loss(self.q_a_cosine, self.y)
-        tf.add_to_collection('total_loss', loss)
-        total_loss = tf.add_n(tf.get_collection('total_loss'))
-        return total_loss
-
-    def contrastive_loss(self, Ew, y):
-        l_1 = self.config.pos_weight * tf.square(1 - Ew)
-        l_0 = tf.square(tf.maximum(Ew, 0))
-        loss = tf.reduce_mean(y * l_1 + (1 - y) * l_0)
-        return loss
-
-    # 训练节点
-    def add_train_op(self, loss):
-        with tf.name_scope('train_op'):
-            # 记录训练步骤
-            self.global_step = tf.Variable(0, name='global_step', trainable=False)
-            opt = tf.train.AdamOptimizer(self.config.lr)
-            train_op = opt.minimize(loss, self.global_step)
-            return train_op
-
-
-class SiameseRNN(object):
-    def __init__(self, config):
-        self.config = config
-        # 输入
-        self.add_placeholders()
-        # [batch_size, sequence_size, embed_size]
-        q_embed, a_embed = self.add_embeddings()
-        with tf.variable_scope('siamese') as scope:
-            self.q_trans = self.network(q_embed)
-            tf.get_variable_scope().reuse_variables()
-            self.a_trans = self.network(a_embed)
-        # 损失和精确度
-        self.total_loss = self.add_loss_op(self.q_trans, self.a_trans)
-        # 训练节点
-        self.train_op = self.add_train_op(self.total_loss)
-
-    # 输入
-    def add_placeholders(self):
-        # 问题
-        self.q = tf.placeholder(tf.int32,
-                shape=[None, self.config.max_q_length],
-                name='Question')
-        # 回答
-        self.a = tf.placeholder(tf.int32,
-                shape=[None, self.config.max_a_length],
-                name='Ans')
-        self.y = tf.placeholder(tf.float32, shape=[None, ], name='label')
-        # drop_out
-        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
-        self.batch_size = tf.shape(self.q)[0]
-
-    # word embeddings
-    def add_embeddings(self):
-        with tf.variable_scope('embedding'):
-            if self.config.embeddings is not None:
-                embeddings = tf.Variable(self.config.embeddings, name="embeddings", trainable=False)
-            else:
-                embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
-            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
-            a_embed = tf.nn.embedding_lookup(embeddings, self.a)
-            q_embed = tf.nn.dropout(q_embed, keep_prob=self.keep_prob)
-            a_embed = tf.nn.dropout(a_embed, keep_prob=self.keep_prob)
-            return q_embed, a_embed
-
-    def network(self, x):
-        sequence_length = x.get_shape()[1]
-        # (batch_size, time_step, embed_size) -> (time_step, batch_size, embed_size)
-        inputs = tf.transpose(x, [1, 0, 2])
-        inputs = tf.reshape(inputs, [-1, self.config.embedding_size])
-        inputs = tf.split(inputs, sequence_length, 0)
-        # (batch_size, rnn_output_size)
-        rnn1 = self.rnn_layer(inputs)
-        # (batch_size, hidden_size)
-        fc1 = self.fc_layer(rnn1, self.config.hidden_size, "fc1")
-        ac1 = tf.nn.relu(fc1)
-        # (batch_size, output_size)
-        fc2 = self.fc_layer(ac1, self.config.output_size, "fc2")
-        return fc2
-
-    def fc_layer(self, bottom, n_weight, name):
-        assert len(bottom.get_shape()) == 2
-        n_prev_weight = bottom.get_shape()[1]
-        initer = tf.truncated_normal_initializer(stddev=0.01)
-        W = tf.get_variable(name+'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=initer)
-        b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.01, shape=[n_weight], dtype=tf.float32))
-        fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
-        return fc
-
-    def rnn_layer(self, h):
-        if self.config.cell_type == 'lstm':
-            birnn_fw, birnn_bw = self.bi_lstm(self.config.rnn_size, self.config.layer_size, self.config.keep_prob)
-        else:
-            birnn_fw, birnn_bw = self.bi_gru(self.config.rnn_size, self.config.layer_size, self.config.keep_prob)
-        outputs_x1, _, _ = tf.contrib.rnn.static_bidirectional_rnn(birnn_fw, birnn_bw, h, dtype=tf.float32)
-        # (time_step, batch_size, 2*rnn_size) -> (batch_size, 2*rnn_size)
-        output_x1 = tf.reduce_mean(outputs_x1, 0)
-        return output_x1
-
-    def bi_lstm(self, rnn_size, layer_size, keep_prob):
-
-        # forward rnn
-        with tf.name_scope('fw_rnn'), tf.variable_scope('fw_rnn'):
-            lstm_fw_cell_list = [tf.contrib.rnn.LSTMCell(rnn_size) for _ in xrange(layer_size)]
-            lstm_fw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(lstm_fw_cell_list), output_keep_prob=keep_prob)
-
-        # backward rnn
-        with tf.name_scope('bw_rnn'), tf.variable_scope('bw_rnn'):
-            lstm_bw_cell_list = [tf.contrib.rnn.LSTMCell(rnn_size) for _ in xrange(layer_size)]
-            lstm_bw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(lstm_fw_cell_list), output_keep_prob=keep_prob)
-
-        return lstm_fw_cell_m, lstm_bw_cell_m
-
-    def bi_gru(self, rnn_size, layer_size, keep_prob):
-
-        # forward rnn
-        with tf.name_scope('fw_rnn'), tf.variable_scope('fw_rnn'):
-            gru_fw_cell_list = [tf.contrib.rnn.GRUCell(rnn_size) for _ in xrange(layer_size)]
-            gru_fw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(gru_fw_cell_list), output_keep_prob=keep_prob)
-
-        # backward rnn
-        with tf.name_scope('bw_rnn'), tf.variable_scope('bw_rnn'):
-            gru_bw_cell_list = [tf.contrib.rnn.GRUCell(rnn_size) for _ in xrange(layer_size)]
-            gru_bw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(gru_bw_cell_list), output_keep_prob=keep_prob)
-
-        return gru_fw_cell_m, gru_bw_cell_m
-
-    # 损失节点
-    def add_loss_op(self, o1, o2):
-        # 此处用cos距离
-        norm_o1 = tf.nn.l2_normalize(o1, dim=1)
-        norm_o2 = tf.nn.l2_normalize(o2, dim=1)
-        self.q_a_cosine = tf.reduce_sum(tf.multiply(o1, o2), 1)
-
-        loss = self.contrastive_loss(self.q_a_cosine, self.y)
-        tf.add_to_collection('total_loss', loss)
-        total_loss = tf.add_n(tf.get_collection('total_loss'))
-        return total_loss
-
-    def contrastive_loss(self, Ew, y):
-        l_1 = self.config.pos_weight * tf.square(1 - Ew)
-        l_0 = tf.square(tf.maximum(Ew, 0))
-        loss = tf.reduce_mean(y * l_1 + (1 - y) * l_0)
-        return loss
-
-    # 训练节点
-    def add_train_op(self, loss):
-        with tf.name_scope('train_op'):
-            # 记录训练步骤
-            self.global_step = tf.Variable(0, name='global_step', trainable=False)
-            opt = tf.train.AdamOptimizer(self.config.lr)
-            train_op = opt.minimize(loss, self.global_step)
-            return train_op
-
-
-class QACNN(object):
-    """
-    pairwise学习模型
-    """
-    def __init__(self, config):
-        self.config = config
-        # 输入
-        self.add_placeholders()
-        # [batch_size, sequence_size, embed_size]
-        q_embed, aplus_embed, aminus_embed = self.add_embeddings()
-        # [batch_size, sequence_size, hidden_size, 1]
-        self.h_q, self.h_ap, self.h_am = self.add_hl(q_embed, aplus_embed, aminus_embed)
-        # [batch_size, total_channels]
-        real_pool_q, real_pool_ap, real_pool_am = self.add_model(q_embed, aplus_embed, aminus_embed)
-        # [batch_size, 1]
-        self.q_ap_cosine, self.q_am_cosine = self.calc_cosine(real_pool_q, real_pool_ap, real_pool_am)
-        # 损失和精确度
-        self.total_loss, self.loss, self.accu = self.add_loss_op(self.q_ap_cosine, self.q_am_cosine)
-        # 训练节点
-        self.train_op = self.add_train_op(self.total_loss)
-
-
-    # 输入
-    def add_placeholders(self):
-        # 问题
-        self.q = tf.placeholder(tf.int32,
-                shape=[None, self.config.max_q_length],
-                name='Question')
-        # 正向回答
-        self.aplus = tf.placeholder(tf.int32,
-                shape=[None, self.config.max_a_length],
-                name='PosAns')
-        # 负向回答
-        self.aminus = tf.placeholder(tf.int32,
-                shape=[None, self.config.max_a_length],
-                name='NegAns')
-        # drop_out
-        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
-        self.batch_size = tf.shape(self.q)[0]
-
-    # word embeddings
-    def add_embeddings(self):
-        with tf.variable_scope('embedding'):
-            if self.config.embeddings is not None:
-                embeddings = tf.Variable(self.config.embeddings, name="embeddings", trainable=False)
-            else:
-                embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
-            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
-            aplus_embed = tf.nn.embedding_lookup(embeddings, self.aplus)
-            aminus_embed = tf.nn.embedding_lookup(embeddings, self.aminus)
-            q_embed = tf.nn.dropout(q_embed, keep_prob=self.keep_prob)
-            aplus_embed = tf.nn.dropout(aplus_embed, keep_prob=self.keep_prob)
-            aminus_embed = tf.nn.dropout(aminus_embed, keep_prob=self.keep_prob)
-            return q_embed, aplus_embed, aminus_embed
-
-    # Hidden Layer
-    def add_hl(self, q_embed, aplus_embed, aminus_embed):
-        with tf.variable_scope('HL'):
-            W = tf.get_variable('weights', shape=[self.config.embedding_size, self.config.hidden_size], initializer=tf.uniform_unit_scaling_initializer())
-            b = tf.get_variable('biases', initializer=tf.constant(0.1, shape=[self.config.hidden_size]))
-            h_q = tf.reshape(tf.nn.tanh(tf.matmul(tf.reshape(q_embed, [-1, self.config.embedding_size]), W)+b), [-1, self.config.max_q_length, self.config.hidden_size])
-            h_ap = tf.reshape(tf.nn.tanh(tf.matmul(tf.reshape(aplus_embed, [-1, self.config.embedding_size]), W)+b), [-1, self.config.max_a_length, self.config.hidden_size])
-            h_am = tf.reshape(tf.nn.tanh(tf.matmul(tf.reshape(aminus_embed, [-1, self.config.embedding_size]), W)+b), [-1, self.config.max_a_length, self.config.hidden_size])
-            tf.add_to_collection('total_loss', 0.5*self.config.l2_reg_lambda*tf.nn.l2_loss(W))
-            return h_q, h_ap, h_am
-
-    # CNN层
-    def add_model(self, h_q, h_ap, h_am):
-        pool_q = list()
-        pool_ap = list()
-        pool_am = list()
-        h_q = tf.reshape(h_q, [-1, self.config.max_q_length, self.config.embedding_size, 1])
-        h_ap = tf.reshape(h_ap, [-1, self.config.max_a_length, self.config.embedding_size, 1])
-        h_am = tf.reshape(h_am, [-1, self.config.max_a_length, self.config.embedding_size, 1])
-        for i, filter_size in enumerate(self.config.filter_sizes):
-            with tf.variable_scope('filter{}'.format(filter_size)):
-                conv1_W = tf.get_variable('W_q', shape=[filter_size, self.config.embedding_size, 1, self.config.num_filters], initializer=tf.truncated_normal_initializer(.0, .1))
-                conv2_W = tf.get_variable('W_a', shape=[filter_size, self.config.embedding_size, 1, self.config.num_filters], initializer=tf.truncated_normal_initializer(.0, .1))
-                conv1_b = tf.get_variable('conv_qb', initializer=tf.constant(0.1, shape=[self.config.num_filters]))
-                conv2_b = tf.get_variable('conv_ab', initializer=tf.constant(0.1, shape=[self.config.num_filters]))
-                # pooling层的bias,Q和A分开
-                pool_qb = tf.get_variable('pool_qb', initializer=tf.constant(0.1, shape=[self.config.num_filters]))
-                pool_ab = tf.get_variable('pool_ab', initializer=tf.constant(0.1, shape=[self.config.num_filters]))
-                # 卷积
-                out_q = tf.nn.relu((tf.nn.conv2d(h_q, conv1_W, [1,1,1,1], padding='VALID')+conv1_b))
-                # 池化
-                out_q = tf.nn.max_pool(out_q, [1,self.config.max_q_length-filter_size+1,1,1], [1,1,1,1], padding='VALID')
-                out_q = tf.nn.tanh(out_q+pool_qb)
-                pool_q.append(out_q)
-
-                out_ap = tf.nn.relu((tf.nn.conv2d(h_ap, conv2_W, [1,1,1,1], padding='VALID')+conv2_b))
-                out_ap = tf.nn.max_pool(out_ap, [1,self.config.max_a_length-filter_size+1,1,1], [1,1,1,1], padding='VALID')
-                out_ap = tf.nn.tanh(out_ap+pool_ab)
-                pool_ap.append(out_ap)
-
-                out_am = tf.nn.relu((tf.nn.conv2d(h_am, conv2_W, [1,1,1,1], padding='VALID')+conv2_b))
-                out_am = tf.nn.max_pool(out_am, [1,self.config.max_a_length-filter_size+1,1,1], [1,1,1,1], padding='VALID')
-                out_am = tf.nn.tanh(out_am+pool_ab)
-                pool_am.append(out_am)
-
-                # 加入正则项
-                tf.add_to_collection('total_loss', 0.5*self.config.l2_reg_lambda*tf.nn.l2_loss(conv1_W))
-                tf.add_to_collection('total_loss', 0.5*self.config.l2_reg_lambda*tf.nn.l2_loss(conv2_W))
-
-        total_channels = len(self.config.filter_sizes)*self.config.num_filters
-        real_pool_q = tf.reshape(tf.concat(pool_q, 3), [-1, total_channels])
-        real_pool_ap = tf.reshape(tf.concat(pool_ap, 3), [-1, total_channels])
-        real_pool_am = tf.reshape(tf.concat(pool_am, 3), [-1, total_channels])
-
-        return real_pool_q, real_pool_ap, real_pool_am
-
-    # 计算cosine
-    def calc_cosine(self, real_pool_q, real_pool_ap, real_pool_am):
-        normalized_q_h_pool = tf.nn.l2_normalize(real_pool_q, dim=1)
-        normalized_pos_h_pool = tf.nn.l2_normalize(real_pool_ap, dim=1)
-        normalized_neg_h_pool = tf.nn.l2_normalize(real_pool_am, dim=1)
-        q_ap_cosine = tf.reduce_sum(tf.multiply(normalized_q_h_pool, normalized_pos_h_pool), 1)
-        q_am_cosine = tf.reduce_sum(tf.multiply(normalized_q_h_pool, normalized_neg_h_pool), 1)
-
-        return q_ap_cosine, q_am_cosine
-
-    # 损失节点
-    def add_loss_op(self, q_ap_cosine, q_am_cosine):
-        original_loss = self.config.m - q_ap_cosine + q_am_cosine
-        l = tf.maximum(tf.zeros_like(original_loss), original_loss)
-        loss = tf.reduce_sum(l)
-        tf.add_to_collection('total_loss', loss)
-        total_loss = tf.add_n(tf.get_collection('total_loss'))
-        accu = tf.reduce_mean(tf.cast(tf.equal(0., l), tf.float32))
-        return total_loss, loss, accu
-
-    # 训练节点
-    def add_train_op(self, loss):
-        with tf.name_scope('train_op'):
-            # 记录训练步骤
-            self.global_step = tf.Variable(0, name='global_step', trainable=False)
-            opt = tf.train.AdamOptimizer(self.config.lr)
-            train_op = opt.minimize(loss, self.global_step)
-            return train_op
diff --git a/decomposable_att_model/README.me b/decomposable_att_model/README.me
new file mode 100755
index 0000000..bd58fa0
--- /dev/null
+++ b/decomposable_att_model/README.me
@@ -0,0 +1,23 @@
+# 复现《A Decomposable Attention Model for Natural Language Inference》中的模型完成问答任务
+
+## 准备
+
+#### 下载词向量文件[glove](../download.sh)。
+
+```
+cd ..
+bash download.sh
+```
+
+#### 预处理wiki数据
+
+```
+cd ..
+python preprocess_wiki.py
+```
+
+## 运行
+
+```
+bash run.sh
+```
diff --git a/decomposable_att_model/decomp_att.py b/decomposable_att_model/decomp_att.py
new file mode 100755
index 0000000..bf3ea38
--- /dev/null
+++ b/decomposable_att_model/decomp_att.py
@@ -0,0 +1,176 @@
+# -*- encoding:utf8 -*-
+import tensorflow as tf
+import numpy as np
+import os
+import sys
+from copy import deepcopy
+stdout = sys.stdout
+reload(sys)
+sys.stdout = stdout
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+import cPickle as pkl
+from utils import *
+from models import DecompAtt
+
+
+class DecompAttConfig(object):
+    def __init__(self, vocab_size, embeddings=None):
+        # 输入问题(句子)长度
+        self.max_q_length = 200
+        # 输入答案长度
+        self.max_a_length = 200
+        # 循环数
+        self.num_epochs = 100
+        # batch大小
+        self.batch_size = 128
+        # 词表大小
+        self.vocab_size = vocab_size
+        # 词向量大小
+        self.embeddings = embeddings
+        self.embedding_size = 100
+        if self.embeddings is not None:
+            self.embedding_size = embeddings.shape[1]
+        # RNN单元类型和大小与堆叠层数
+        self.cell_type = 'GRU'
+        self.rnn_size = 128
+        self.layer_size = 1
+        # 隐层大小
+        self.hidden_size = 128
+        self.output_size = 128
+        # keep_prob=1-dropout
+        self.keep_prob = 0.6
+        # 学习率
+        self.lr = 0.0003
+        self.grad_clip = 1.
+
+        self.cf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
+        self.cf.gpu_options.per_process_gpu_memory_fraction = 0.2
+
+
+def train(train_corpus, config, val_corpus, eval_train_corpus=None):
+    iterator = Iterator(train_corpus)
+
+    with tf.Session(config=config.cf) as sess:
+        model = DecompAtt(config)
+        saver = tf.train.Saver()
+        sess.run(tf.initialize_all_variables())
+        for epoch in xrange(config.num_epochs):
+            count = 0
+            for batch_x in iterator.next(config.batch_size, shuffle=True):
+                batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
+                batch_q = np.asarray(batch_q)
+                batch_ap = np.asarray(batch_ap)
+                labels = np.asarray(labels).astype(np.int32)
+                _, loss = sess.run([model.train_op, model.total_loss], 
+                                   feed_dict={model.q:batch_q, 
+                                              model.a:batch_ap,
+                                              model.y:labels,
+                                              model.keep_prob:config.keep_prob})
+                count += 1
+                if count % 10 == 0:
+                    print('[epoch {}, batch {}]Loss:{}'.format(epoch, count, loss))
+            saver.save(sess,'{}/my_model'.format(model_path), global_step=epoch)
+            if eval_train_corpus is not None:
+                train_res = evaluate(sess, model, eval_train_corpus, config)
+                print('[train] ' + train_res)
+            if val_corpus is not None:
+                val_res = evaluate(sess, model, val_corpus, config)
+                print('[eval] ' + val_res)
+
+
+def evaluate(sess, model, corpus, config):
+    iterator = Iterator(corpus)
+
+    count = 0
+    total_qids = []
+    total_aids = []
+    total_pred = []
+    total_labels = []
+    total_loss = 0.
+    for batch_x in iterator.next(config.batch_size, shuffle=False):
+        batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
+        batch_q = np.asarray(batch_q)
+        batch_ap = np.asarray(batch_ap)
+        y_hat, loss = sess.run([model.y_hat, model.total_loss], 
+                           feed_dict={model.q:batch_q, 
+                                      model.a:batch_ap, 
+                                      model.y:labels,
+                                      model.keep_prob:1.})
+        y_hat = np.argmax(y_hat, axis=-1)
+        total_loss += loss
+        count += 1
+        total_qids.append(batch_qids)
+        total_aids.append(batch_aids)
+        total_pred.append(y_hat)
+        total_labels.append(labels)
+        # print(batch_qids[0], [id2word[_] for _ in batch_q[0]], 
+        #     batch_aids[0], [id2word[_] for _ in batch_ap[0]])
+    total_qids = np.concatenate(total_qids, axis=0)
+    total_aids = np.concatenate(total_aids, axis=0)
+    total_pred = np.concatenate(total_pred, axis=0)
+    total_labels = np.concatenate(total_labels, axis=0)
+    MAP, MRR = eval_map_mrr(total_qids, total_aids, total_pred, total_labels)
+    # print('Eval loss:{}'.format(total_loss / count))
+    return 'MAP:{}, MRR:{}'.format(MAP, MRR)
+                
+
+def test(corpus, config):
+    with tf.Session(config=config.cf) as sess:
+        model = DecompAtt(config)
+        saver = tf.train.Saver()
+        saver.restore(sess, tf.train.latest_checkpoint(model_path))
+        print('[test] ' + evaluate(sess, model, corpus, config))
+                    
+
+def main(args):
+    max_q_length = 25
+    max_a_length = 90
+
+    with open(os.path.join(processed_data_path, 'pointwise_corpus.pkl'), 'r') as fr:
+        train_corpus, val_corpus, test_corpus = pkl.load(fr)
+
+    embeddings = build_embedding(embedding_path, word2id)
+    
+    train_qids, train_q, train_aids, train_ap, train_labels = zip(*train_corpus)
+    train_q = padding(train_q, max_q_length)
+    train_ap = padding(train_ap, max_a_length)
+    train_corpus = zip(train_qids, train_q, train_aids, train_ap, train_labels)
+
+
+    val_qids, val_q, val_aids, val_ap, labels = zip(*val_corpus)
+    val_q = padding(val_q, max_q_length)
+    val_ap = padding(val_ap, max_a_length)
+    val_corpus = zip(val_qids, val_q, val_aids, val_ap, labels)
+
+
+    test_qids, test_q, test_aids, test_ap, labels = zip(*test_corpus)
+    test_q = padding(test_q, max_q_length)
+    test_ap = padding(test_ap, max_a_length)
+    test_corpus = zip(test_qids, test_q, test_aids, test_ap, labels)
+
+    config = DecompAttConfig(max(word2id.values()) + 1, embeddings=embeddings)
+    config.max_q_length = max_q_length
+    config.max_a_length = max_a_length
+    if args.train:
+        train(deepcopy(train_corpus), config, val_corpus, deepcopy(train_corpus))
+    elif args.test:
+        test(test_corpus, config)
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--train",  help="whether to train", action='store_true')
+    parser.add_argument("--test",  help="whether to test", action='store_true')
+    args = parser.parse_args()
+
+    raw_data_path = '../data/WikiQA/raw'
+    processed_data_path = '../data/WikiQA/processed'
+    embedding_path = '../data/embedding/glove.6B.300d.txt'
+    model_path = 'models'
+
+    with open(os.path.join(processed_data_path, 'vocab.pkl'), 'r') as fr:
+        word2id, id2word = pkl.load(fr)
+    main(args)
diff --git a/decomposable_att_model/models.py b/decomposable_att_model/models.py
new file mode 100755
index 0000000..e9fac8b
--- /dev/null
+++ b/decomposable_att_model/models.py
@@ -0,0 +1,203 @@
+# -*- encoding:utf-8 -*-
+import tensorflow as tf
+import numpy as np
+
+
+class DecompAtt(object):
+    def __init__(self, config):
+        self.config = config
+        # 输入
+        self.add_placeholders()
+        # [batch_size, sequence_size, embed_size]
+        q_embed, a_embed = self.add_embeddings()
+        # 上下文编码
+        q_encode, a_encode = self.context_encoding(q_embed, a_embed)
+        # attention层
+        q_attend, a_attend = self.attend(q_encode, a_encode)
+        # compose层
+        q_comp, a_comp = self.compare(q_encode, a_encode, q_attend, a_attend)
+        # aggregate层
+        pred = self.aggregate(q_comp, a_comp)
+        # 预测概率分布与损失
+        self.y_hat, self.total_loss = self.add_loss_op(pred)
+        # 训练节点
+        self.train_op = self.add_train_op(self.total_loss)
+
+
+    def attend(self, q, a):
+        """
+        q: [batch_size, q_length, represent_dim]
+        a: [batch_size, a_length, represent_dim]
+        """
+
+        q = tf.nn.dropout(q, keep_prob=self.keep_prob)
+        a = tf.nn.dropout(a, keep_prob=self.keep_prob)
+        q_map = tf.layers.dense(q, 128, activation=tf.nn.relu, name='embed_map')
+        a_map = tf.layers.dense(a, 128, activation=tf.nn.relu, name='embed_map', reuse=True)
+        # [batch_size, q_length, a_length]
+        att_inner_product = tf.matmul(
+            q_map,
+            tf.transpose(a_map, [0, 2, 1]))
+        # [batch_size, a_length, q_length]
+        q_weights = tf.nn.softmax(
+                        tf.transpose(
+                            att_inner_product, (0, 2, 1)), dim=-1)
+        # [batch_size, q_length, a_length]
+        a_weights = tf.nn.softmax(att_inner_product, dim=-1)
+
+        output_a = tf.matmul(q_weights, q)
+        output_q = tf.matmul(a_weights, a)
+
+        return output_q, output_a
+
+    def compare(self, q, a, q_att, a_att):
+        """
+        q: [batch_size, q_length, represent_dim]
+        a: [batch_size, a_length, represent_dim]
+        q_att: [batch_size, q_length, represent_dim]
+        a_att: [batch_size, a_length, represent_dim]
+        """
+        q_combine = tf.concat([q, q_att], axis=-1)
+        a_combine = tf.concat([a, a_att], axis=-1)
+        q_combine = tf.nn.dropout(q_combine, keep_prob=self.keep_prob)
+        a_combine = tf.nn.dropout(a_combine, keep_prob=self.keep_prob)
+        q_map = self.mlp(q_combine, self.config.hidden_size, 2, 'embed_compare')
+        a_map = self.mlp(a_combine, self.config.hidden_size, 2, 'embed_compare', reuse=True)
+        return q_map, a_map
+
+    def aggregate(self, q, a):
+        """
+        q: [batch_size, q_length, represent_dim]
+        a: [batch_size, a_length, represent_dim]
+        """
+        # 输出shape为[batch_size, represent_dim]
+        q_sum = tf.reduce_sum(q, 1)
+        a_sum = tf.reduce_sum(a, 1)
+        q_sum = tf.nn.dropout(q_sum, keep_prob=self.keep_prob)
+        a_sum = tf.nn.dropout(a_sum, keep_prob=self.keep_prob)
+        q_a_rep = tf.concat([q_sum, a_sum], axis=-1)
+        pred = self.mlp(q_a_rep, self.config.output_size, 2, 'embed_aggregate')
+        pred = tf.layers.dense(pred, 2, activation=None, name='prediction')
+        return pred
+
+    def add_placeholders(self):
+        # 问题
+        self.q = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_q_length],
+                name='Question')
+        # 回答
+        self.a = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_a_length],
+                name='Ans')
+        self.y = tf.placeholder(tf.int32, shape=[None, ], name='label')
+        # drop_out
+        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
+        self.batch_size = tf.shape(self.q)[0]
+
+    def add_embeddings(self):
+        with tf.variable_scope('embedding'):
+            if self.config.embeddings is not None:
+                embeddings = tf.Variable(self.config.embeddings, name="embeddings", trainable=False)
+            else:
+                embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
+            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
+            a_embed = tf.nn.embedding_lookup(embeddings, self.a)
+            return q_embed, a_embed
+
+    def context_encoding(self, q, a):
+        """
+        q: [batch_size, q_length, embedding_dim]
+        a: [batch_size, a_length, embedding_dim]
+        """
+        with tf.variable_scope('context_encoding') as scope:
+            q = tf.nn.dropout(q, keep_prob=self.keep_prob)
+            a = tf.nn.dropout(a, keep_prob=self.keep_prob)
+            q_encode = self.rnn_layer(q)
+            tf.get_variable_scope().reuse_variables() 
+            a_encode = self.rnn_layer(a)
+        return q_encode, a_encode
+
+    def mlp(self, bottom, size, layer_num, name, reuse=None):
+        """
+        bottom: 上层输入
+        size: 神经元大小
+        layer_num: 神经网络层数
+        name: mlp的名称
+        reuse: 是否复用层
+        """
+        now = bottom
+        for i in xrange(layer_num):
+            now = tf.layers.dense(now, 128, 
+                                  activation=tf.nn.relu, 
+                                  name=name + '_{}'.format(i), 
+                                  reuse=reuse)
+        return now
+
+    def rnn_layer(self, h):
+        sequence_length = h.get_shape()[1]
+        # (batch_size, time_step, embed_size) -> (time_step, batch_size, embed_size)
+        inputs = tf.transpose(h, [1, 0, 2])
+        inputs = tf.reshape(inputs, [-1, self.config.embedding_size])
+        inputs = tf.split(inputs, sequence_length, 0)
+
+        if self.config.cell_type == 'lstm':
+            birnn_fw, birnn_bw = self.bi_lstm(self.config.rnn_size, self.config.layer_size, self.config.keep_prob)
+        else:
+            birnn_fw, birnn_bw = self.bi_gru(self.config.rnn_size, self.config.layer_size, self.config.keep_prob)
+        outputs_x1, _, _ = tf.contrib.rnn.static_bidirectional_rnn(birnn_fw, birnn_bw, inputs, dtype=tf.float32)
+        # (time_step, batch_size, 2*rnn_size) -> (batch_size, time_step, 2*rnn_size)
+        output_x1 = tf.transpose(outputs_x1, (1, 0, 2))
+        return output_x1
+
+    def bi_lstm(self, rnn_size, layer_size, keep_prob):
+        # forward rnn
+        with tf.name_scope('fw_rnn'), tf.variable_scope('fw_rnn'):
+            lstm_fw_cell_list = [tf.contrib.rnn.LSTMCell(rnn_size) for _ in xrange(layer_size)]
+            lstm_fw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(lstm_fw_cell_list), output_keep_prob=keep_prob)
+
+        # backward rnn
+        with tf.name_scope('bw_rnn'), tf.variable_scope('bw_rnn'):
+            lstm_bw_cell_list = [tf.contrib.rnn.LSTMCell(rnn_size) for _ in xrange(layer_size)]
+            lstm_bw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(lstm_fw_cell_list), output_keep_prob=keep_prob)
+
+        return lstm_fw_cell_m, lstm_bw_cell_m
+
+    def bi_gru(self, rnn_size, layer_size, keep_prob):
+        # forward rnn
+        with tf.name_scope('fw_rnn'), tf.variable_scope('fw_rnn'):
+            gru_fw_cell_list = [tf.contrib.rnn.GRUCell(rnn_size) for _ in xrange(layer_size)]
+            gru_fw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(gru_fw_cell_list), output_keep_prob=keep_prob)
+
+        # backward rnn
+        with tf.name_scope('bw_rnn'), tf.variable_scope('bw_rnn'):
+            gru_bw_cell_list = [tf.contrib.rnn.GRUCell(rnn_size) for _ in xrange(layer_size)]
+            gru_bw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(gru_bw_cell_list), output_keep_prob=keep_prob)
+
+        return gru_fw_cell_m, gru_bw_cell_m
+
+    def add_loss_op(self, pred):
+        """
+        损失节点
+        """
+        # [batch_size, 2]
+        y_hat = tf.nn.softmax(pred, dim=-1)
+        loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(self.y, pred))
+        tf.add_to_collection('total_loss', loss)
+        total_loss = tf.add_n(tf.get_collection('total_loss'))
+        return y_hat, total_loss
+
+    def add_train_op(self, loss):
+        """
+        训练节点
+        """
+        with tf.name_scope('train_op'):
+            # 记录训练步骤
+            self.global_step = tf.Variable(0, name='global_step', trainable=False)
+            opt = tf.train.AdamOptimizer(self.config.lr)
+            # train_op = opt.minimize(loss, self.global_step)
+            train_variables = tf.trainable_variables()
+            grads_vars = opt.compute_gradients(loss, train_variables)
+            for i, (grad, var) in enumerate(grads_vars):
+                grads_vars[i] = (tf.clip_by_norm(grad, self.config.grad_clip), var)
+            train_op = opt.apply_gradients(grads_vars, global_step=self.global_step)
+            return train_op
diff --git a/decomposable_att_model/run.sh b/decomposable_att_model/run.sh
new file mode 100755
index 0000000..26df3c0
--- /dev/null
+++ b/decomposable_att_model/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+
+echo "train model"
+python decomp_att.py --train
+
+
+echo "test model"
+python decomp_att.py --test
diff --git a/decomposable_att_model/utils.py b/decomposable_att_model/utils.py
new file mode 100755
index 0000000..8b28136
--- /dev/null
+++ b/decomposable_att_model/utils.py
@@ -0,0 +1,125 @@
+# -*- encoding:utf-8 -*-
+import numpy as np
+import tensorflow as tf
+
+
+def padding(data, max_len):
+    return tf.keras.preprocessing.sequence.pad_sequences(data, max_len, padding='post', truncating='post')
+
+def eval_map_mrr(qids, aids, preds, labels):
+	# 衡量map指标和mrr指标
+    dic = dict()
+    pre_dic = dict()
+    for qid, aid, pred, label in zip(qids, aids, preds, labels):
+        pre_dic.setdefault(qid, [])
+        pre_dic[qid].append([aid, pred, label])
+    for qid in pre_dic:
+        dic[qid] = sorted(pre_dic[qid], key=lambda k: k[1], reverse=True)
+        aid2rank = {aid:[label, rank] for (rank, (aid, pred, label)) in enumerate(dic[qid])}
+        dic[qid] = aid2rank
+    # correct = 0
+    # total = 0
+    # for qid in dic:
+    #     cur_correct = 0
+    #     for aid in dic[qid]:
+    #         if dic[qid][aid][0] == 1:
+    #             cur_correct += 1
+    #     if cur_correct > 0:
+    #         correct += 1
+    #     total += 1
+    # print(correct * 1. / total)
+
+    MAP = 0.0
+    MRR = 0.0
+    useful_q_len = 0
+    for q_id in dic:
+        sort_rank = sorted(dic[q_id].items(), key=lambda k: k[1][1], reverse=False)
+        correct = 0
+        total = 0
+        AP = 0.0
+        mrr_mark = False
+        for i in range(len(sort_rank)):
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+        if correct == 0:
+            continue
+        useful_q_len += 1
+        correct = 0
+        for i in range(len(sort_rank)):
+            # compute MRR
+            if sort_rank[i][1][0] == 1 and mrr_mark == False:
+                MRR += 1.0 / float(i + 1)
+                mrr_mark = True
+            # compute MAP
+            total += 1
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+                AP += float(correct) / float(total)
+        
+        AP /= float(correct)
+        MAP += AP
+
+    MAP /= useful_q_len
+    MRR /= useful_q_len
+    return MAP, MRR
+
+def build_embedding(in_file, word_dict):
+	# 构建预训练的embedding矩阵
+    num_words = max(word_dict.values()) + 1
+    dim = int(in_file.split('.')[-2][:-1])
+    embeddings = np.zeros((num_words, dim))
+
+    if in_file is not None:
+        pre_trained = 0
+        initialized = {}
+        avg_sigma = 0
+        avg_mu = 0
+        for line in open(in_file).readlines():
+            sp = line.split()
+            assert len(sp) == dim + 1
+            if sp[0] in word_dict:
+                initialized[sp[0]] = True
+                pre_trained += 1
+                embeddings[word_dict[sp[0]]] = [float(x) for x in sp[1:]]
+                mu = embeddings[word_dict[sp[0]]].mean()
+                #print embeddings[word_dict[sp[0]]]
+                sigma = np.std(embeddings[word_dict[sp[0]]])
+                avg_mu += mu
+                avg_sigma += sigma
+        avg_sigma /= 1. * pre_trained
+        avg_mu /= 1. * pre_trained
+        for w in word_dict:
+            if w not in initialized:
+                embeddings[word_dict[w]] = np.random.normal(avg_mu, avg_sigma, (dim,))
+        print('Pre-trained: %d (%.2f%%)' %
+                     (pre_trained, pre_trained * 100.0 / num_words))
+    return embeddings.astype(np.float32)
+
+
+class Iterator(object):
+    """
+    数据迭代器
+    """
+    def __init__(self, x):
+        self.x = x
+        self.sample_num = len(self.x)
+
+    def next_batch(self, batch_size, shuffle=True):
+        # produce X, Y_out, Y_in, X_len, Y_in_len, Y_out_len
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = np.random.randint(0, self.sample_num - batch_size + 1)
+        r = l + batch_size
+        x_part = self.x[l:r]
+        return x_part
+
+    def next(self, batch_size, shuffle=False):
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = 0
+        while l < self.sample_num:
+            r = min(l + batch_size, self.sample_num)
+            batch_size = r - l
+            x_part = self.x[l:r]
+            l += batch_size
+            yield x_part
diff --git a/download.sh b/download.sh
new file mode 100755
index 0000000..26915c2
--- /dev/null
+++ b/download.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+PWD=$(pwd)
+
+# Download GloVe
+GLOVE_DIR=$PWD/data/embedding
+mkdir -p $GLOVE_DIR
+wget http://nlp.stanford.edu/data/glove.6B.300d.zip -O $GLOVE_DIR/glove.6B.300d.zip
+unzip $GLOVE_DIR/glove.6B.300d.zip -d $GLOVE_DIR
+
+# Download Glove Character Embedding
+# wget https://raw.githubusercontent.com/minimaxir/char-embeddings/master/glove.840B.300d-char.txt -O $GLOVE_DIR/glove.840B.300d-char.txt
+
+# Download fasttext
+# FASTTEXT_DIR=~/data/fasttext
+# mkdir -p $FASTTEXT_DIR
+# wget https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki-news-300d-1M.vec.zip -O $FASTTEXT_DIR/wiki-news-300d-1M.vec.zip
+# unzip $FASTTEXT_DIR/wiki-news-300d-1M.vec.zip -d $FASTTEXT_DIR
\ No newline at end of file
diff --git a/code/preprocess_wiki.ipynb b/preprocess_wiki.ipynb
old mode 100644
new mode 100755
similarity index 100%
rename from code/preprocess_wiki.ipynb
rename to preprocess_wiki.ipynb
diff --git a/preprocess_wiki.py b/preprocess_wiki.py
new file mode 100755
index 0000000..e7aafea
--- /dev/null
+++ b/preprocess_wiki.py
@@ -0,0 +1,165 @@
+
+# coding: utf-8
+
+# In[1]:
+
+
+import os
+import sys
+stdout = sys.stdout
+reload(sys)
+sys.stdout = stdout
+
+import cPickle as pkl
+
+from collections import Counter
+from nltk import sent_tokenize, word_tokenize
+from nltk.corpus import stopwords, wordnet
+from nltk.stem import WordNetLemmatizer
+import jieba
+# jieba.enable_parallel(8)
+lemma = WordNetLemmatizer()
+
+raw_data_path = './data/WikiQA/raw'
+processed_data_path = './data/WikiQA/processed'
+
+if not os.path.exists(processed_data_path):
+    os.mkdir(processed_data_path)
+
+
+# In[8]:
+
+
+# 分词、词干化处理
+def segment(filename, use_lemma=True):
+    processed_qa = []
+    count = 0
+    with open(os.path.join(raw_data_path, filename), 'r') as fr:
+        fr.readline()
+        for line in fr:
+            items = line.strip().split('\t')
+            qid, q, aid, a, label = items[0], items[1], items[4], items[5], items[6]
+            if use_lemma:
+                q = ' '.join([lemma.lemmatize(_) for _ in jieba.cut(q)]).lower()
+                a = ' '.join([lemma.lemmatize(_) for _ in jieba.cut(a)]).lower()
+            else:
+                q = ' '.join(jieba.cut(q)).lower()
+                q = ' '.join(jieba.cut(a)).lower()
+            processed_qa.append('\t'.join([qid, q, aid, a, label]))
+            count += 1
+            if count % 1000 == 0:
+                print('Finished {}'.format(count))
+    return processed_qa
+
+# 构建词典
+def build_vocab(corpus, topk=None):
+    vocab = Counter()
+    for line in corpus:
+        qid, q, aid, a, label = line.strip().split('\t')
+        vocab.update(q.split())
+        vocab.update(a.split())
+    if topk:
+        vocab = vocab.most_common(topk)
+    else:
+        vocab = dict(vocab.most_common()).keys()
+    vocab = {_ : i+2 for i, _ in enumerate(vocab)}
+    vocab['<PAD>'] = 0
+    vocab['<UNK>'] = 1
+    reverse_vocab = dict(zip(vocab.values(), vocab.keys()))
+    return vocab, reverse_vocab
+
+# 将每个词映射为词典中的id
+def transform(corpus, word2id, unk_id=1):
+    transformed_corpus = []
+    for line in corpus:
+        qid, q, aid, a, label = line.strip().split('\t')
+        q = [word2id.get(w, unk_id) for w in q.split()]
+        a = [word2id.get(w, unk_id) for w in a.split()]
+        transformed_corpus.append([qid, q, aid, a, int(label)])
+    return transformed_corpus
+
+# 得到pointwise形式的数据，即(Q, A, label)
+def pointwise_data(corpus, keep_ids=False):
+    # (q, a, label)
+    pointwise_corpus = []
+    for sample in corpus:
+        qid, q, aid, a, label = sample
+        if keep_ids:
+            pointwise_corpus.append((qid, q, aid, a, label))
+        else:
+            pointwise_corpus.append((q, a, label))
+    return pointwise_corpus
+
+# 得到pairwise形式的数据，即(Q, positive A, negative A)
+def pairwise_data(corpus):
+    # (q, a_pos, a_neg), two answers must from the same q
+    # once a question contains no positive answers, we discard this sample.
+    pairwise_corpus = dict()
+    for sample in corpus:
+        qid, q, aid, a, label = sample
+        pairwise_corpus.setdefault(qid, dict())
+        pairwise_corpus[qid].setdefault('pos', list())
+        pairwise_corpus[qid].setdefault('neg', list())
+        pairwise_corpus[qid]['q'] = q
+        if label == 0:
+            pairwise_corpus[qid]['neg'].append(a)
+        else:
+            pairwise_corpus[qid]['pos'].append(a)
+    real_pairwise_corpus = []
+    for qid in pairwise_corpus:
+        q = pairwise_corpus[qid]['q']
+        for pos in pairwise_corpus[qid]['pos']:
+            for neg in pairwise_corpus[qid]['neg']:
+                real_pairwise_corpus.append((q, pos, neg))
+    return real_pairwise_corpus
+    
+# 得到listwise形式的数据，即(Q, All answers related to this Q)
+def listwise_data(corpus):
+    # (q, a_list)
+    listwise_corpus = dict()
+    for sample in corpus:
+        qid, q, aid, a, label = sample
+        listwise_corpus.setdefault(qid, dict())
+        listwise_corpus[qid].setdefault('a', list())
+        listwise_corpus[qid]['q'] = q            
+        listwise_corpus[qid]['a'].append(a)
+    real_listwise_corpus = []
+    for qid in listwise_corpus:
+        q = listwise_corpus[qid]['q']
+        alist = listwise_corpus[qid]['a']
+        real_listwise_corpus.append((q, alist))
+    return real_listwise_corpus
+
+
+train_processed_qa = segment('WikiQA-train.tsv')
+val_processed_qa = segment('WikiQA-dev.tsv')
+test_processed_qa = segment('WikiQA-test.tsv')
+word2id, id2word = build_vocab(train_processed_qa)
+
+transformed_train_corpus = transform(train_processed_qa, word2id)
+pointwise_train_corpus = pointwise_data(transformed_train_corpus, keep_ids=True)
+pairwise_train_corpus = pairwise_data(transformed_train_corpus)
+listwise_train_corpus = listwise_data(transformed_train_corpus)
+
+transformed_val_corpus = transform(val_processed_qa, word2id)
+pointwise_val_corpus = pointwise_data(transformed_val_corpus, keep_ids=True)
+pairwise_val_corpus = pointwise_data(transformed_val_corpus, keep_ids=True)
+listwise_val_corpus = listwise_data(transformed_val_corpus)
+
+transformed_test_corpus = transform(test_processed_qa, word2id)
+pointwise_test_corpus = pointwise_data(transformed_test_corpus, keep_ids=True)
+pairwise_test_corpus = pointwise_data(transformed_test_corpus, keep_ids=True)
+listwise_test_corpus = listwise_data(transformed_test_corpus)
+
+
+with open(os.path.join(processed_data_path, 'vocab.pkl'), 'w') as fw:
+    pkl.dump([word2id, id2word], fw)
+with open(os.path.join(processed_data_path, 'pointwise_corpus.pkl'), 'w') as fw:
+    pkl.dump([pointwise_train_corpus, pointwise_val_corpus, pointwise_test_corpus], fw)
+with open(os.path.join(processed_data_path, 'pairwise_corpus.pkl'), 'w') as fw:
+    pkl.dump([pairwise_train_corpus, pairwise_val_corpus, pairwise_test_corpus], fw)
+with open(os.path.join(processed_data_path, 'listwise_corpus.pkl'), 'w') as fw:
+    pkl.dump([listwise_train_corpus, listwise_val_corpus, listwise_test_corpus], fw)
+    
+print('done!')
+
diff --git a/qacnn/._models.py b/qacnn/._models.py
new file mode 100755
index 0000000..772354b
Binary files /dev/null and b/qacnn/._models.py differ
diff --git a/qacnn/README.me b/qacnn/README.me
new file mode 100755
index 0000000..95b06ab
--- /dev/null
+++ b/qacnn/README.me
@@ -0,0 +1,23 @@
+# 使用pairwise形式的QACNN网络实现问答任务
+
+## 准备
+
+#### 下载词向量文件[glove](../download.sh)。
+
+```
+cd ..
+bash download.sh
+```
+
+#### 预处理wiki数据
+
+```
+cd ..
+python preprocess_wiki.py
+```
+
+## 运行
+
+```
+bash run.sh
+```
diff --git a/qacnn/models.py b/qacnn/models.py
new file mode 100755
index 0000000..0be58b7
--- /dev/null
+++ b/qacnn/models.py
@@ -0,0 +1,143 @@
+# -*- encoding:utf-8 -*-
+import tensorflow as tf
+import numpy as np
+
+class QACNN(object):
+    """
+    pairwise学习模型
+    """
+    def __init__(self, config):
+        self.config = config
+        # 输入
+        self.add_placeholders()
+        # [batch_size, sequence_size, embed_size]
+        q_embed, aplus_embed, aminus_embed = self.add_embeddings()
+        # [batch_size, sequence_size, hidden_size, 1]
+        self.h_q, self.h_ap, self.h_am = self.add_hl(q_embed, aplus_embed, aminus_embed)
+        # [batch_size, total_channels]
+        real_pool_q, real_pool_ap, real_pool_am = self.add_model(q_embed, aplus_embed, aminus_embed)
+        # [batch_size, 1]
+        self.q_ap_cosine, self.q_am_cosine = self.calc_cosine(real_pool_q, real_pool_ap, real_pool_am)
+        # 损失和精确度
+        self.total_loss, self.loss, self.accu = self.add_loss_op(self.q_ap_cosine, self.q_am_cosine)
+        # 训练节点
+        self.train_op = self.add_train_op(self.total_loss)
+
+
+    # 输入
+    def add_placeholders(self):
+        # 问题
+        self.q = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_q_length],
+                name='Question')
+        # 正向回答
+        self.aplus = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_a_length],
+                name='PosAns')
+        # 负向回答
+        self.aminus = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_a_length],
+                name='NegAns')
+        # drop_out
+        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
+        self.batch_size = tf.shape(self.q)[0]
+
+    # word embeddings
+    def add_embeddings(self):
+        with tf.variable_scope('embedding'):
+            if self.config.embeddings is not None:
+                embeddings = tf.Variable(self.config.embeddings, name="embeddings", trainable=False)
+            else:
+                embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
+            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
+            aplus_embed = tf.nn.embedding_lookup(embeddings, self.aplus)
+            aminus_embed = tf.nn.embedding_lookup(embeddings, self.aminus)
+            q_embed = tf.nn.dropout(q_embed, keep_prob=self.keep_prob)
+            aplus_embed = tf.nn.dropout(aplus_embed, keep_prob=self.keep_prob)
+            aminus_embed = tf.nn.dropout(aminus_embed, keep_prob=self.keep_prob)
+            return q_embed, aplus_embed, aminus_embed
+
+    # Hidden Layer
+    def add_hl(self, q_embed, aplus_embed, aminus_embed):
+        with tf.variable_scope('HL'):
+            W = tf.get_variable('weights', shape=[self.config.embedding_size, self.config.hidden_size], initializer=tf.uniform_unit_scaling_initializer())
+            b = tf.get_variable('biases', initializer=tf.constant(0.1, shape=[self.config.hidden_size]))
+            h_q = tf.reshape(tf.nn.tanh(tf.matmul(tf.reshape(q_embed, [-1, self.config.embedding_size]), W)+b), [-1, self.config.max_q_length, self.config.hidden_size])
+            h_ap = tf.reshape(tf.nn.tanh(tf.matmul(tf.reshape(aplus_embed, [-1, self.config.embedding_size]), W)+b), [-1, self.config.max_a_length, self.config.hidden_size])
+            h_am = tf.reshape(tf.nn.tanh(tf.matmul(tf.reshape(aminus_embed, [-1, self.config.embedding_size]), W)+b), [-1, self.config.max_a_length, self.config.hidden_size])
+            tf.add_to_collection('total_loss', 0.5*self.config.l2_reg_lambda*tf.nn.l2_loss(W))
+            return h_q, h_ap, h_am
+
+    # CNN层
+    def add_model(self, h_q, h_ap, h_am):
+        pool_q = list()
+        pool_ap = list()
+        pool_am = list()
+        h_q = tf.reshape(h_q, [-1, self.config.max_q_length, self.config.embedding_size, 1])
+        h_ap = tf.reshape(h_ap, [-1, self.config.max_a_length, self.config.embedding_size, 1])
+        h_am = tf.reshape(h_am, [-1, self.config.max_a_length, self.config.embedding_size, 1])
+        for i, filter_size in enumerate(self.config.filter_sizes):
+            with tf.variable_scope('filter{}'.format(filter_size)):
+                conv1_W = tf.get_variable('W_q', shape=[filter_size, self.config.embedding_size, 1, self.config.num_filters], initializer=tf.truncated_normal_initializer(.0, .1))
+                conv2_W = tf.get_variable('W_a', shape=[filter_size, self.config.embedding_size, 1, self.config.num_filters], initializer=tf.truncated_normal_initializer(.0, .1))
+                conv1_b = tf.get_variable('conv_qb', initializer=tf.constant(0.1, shape=[self.config.num_filters]))
+                conv2_b = tf.get_variable('conv_ab', initializer=tf.constant(0.1, shape=[self.config.num_filters]))
+                # pooling层的bias,Q和A分开
+                pool_qb = tf.get_variable('pool_qb', initializer=tf.constant(0.1, shape=[self.config.num_filters]))
+                pool_ab = tf.get_variable('pool_ab', initializer=tf.constant(0.1, shape=[self.config.num_filters]))
+                # 卷积
+                out_q = tf.nn.relu((tf.nn.conv2d(h_q, conv1_W, [1,1,1,1], padding='VALID')+conv1_b))
+                # 池化
+                out_q = tf.nn.max_pool(out_q, [1,self.config.max_q_length-filter_size+1,1,1], [1,1,1,1], padding='VALID')
+                out_q = tf.nn.tanh(out_q+pool_qb)
+                pool_q.append(out_q)
+
+                out_ap = tf.nn.relu((tf.nn.conv2d(h_ap, conv2_W, [1,1,1,1], padding='VALID')+conv2_b))
+                out_ap = tf.nn.max_pool(out_ap, [1,self.config.max_a_length-filter_size+1,1,1], [1,1,1,1], padding='VALID')
+                out_ap = tf.nn.tanh(out_ap+pool_ab)
+                pool_ap.append(out_ap)
+
+                out_am = tf.nn.relu((tf.nn.conv2d(h_am, conv2_W, [1,1,1,1], padding='VALID')+conv2_b))
+                out_am = tf.nn.max_pool(out_am, [1,self.config.max_a_length-filter_size+1,1,1], [1,1,1,1], padding='VALID')
+                out_am = tf.nn.tanh(out_am+pool_ab)
+                pool_am.append(out_am)
+
+                # 加入正则项
+                tf.add_to_collection('total_loss', 0.5*self.config.l2_reg_lambda*tf.nn.l2_loss(conv1_W))
+                tf.add_to_collection('total_loss', 0.5*self.config.l2_reg_lambda*tf.nn.l2_loss(conv2_W))
+
+        total_channels = len(self.config.filter_sizes)*self.config.num_filters
+        real_pool_q = tf.reshape(tf.concat(pool_q, 3), [-1, total_channels])
+        real_pool_ap = tf.reshape(tf.concat(pool_ap, 3), [-1, total_channels])
+        real_pool_am = tf.reshape(tf.concat(pool_am, 3), [-1, total_channels])
+
+        return real_pool_q, real_pool_ap, real_pool_am
+
+    # 计算cosine
+    def calc_cosine(self, real_pool_q, real_pool_ap, real_pool_am):
+        normalized_q_h_pool = tf.nn.l2_normalize(real_pool_q, dim=1)
+        normalized_pos_h_pool = tf.nn.l2_normalize(real_pool_ap, dim=1)
+        normalized_neg_h_pool = tf.nn.l2_normalize(real_pool_am, dim=1)
+        q_ap_cosine = tf.reduce_sum(tf.multiply(normalized_q_h_pool, normalized_pos_h_pool), 1)
+        q_am_cosine = tf.reduce_sum(tf.multiply(normalized_q_h_pool, normalized_neg_h_pool), 1)
+
+        return q_ap_cosine, q_am_cosine
+
+    # 损失节点
+    def add_loss_op(self, q_ap_cosine, q_am_cosine):
+        original_loss = self.config.m - q_ap_cosine + q_am_cosine
+        l = tf.maximum(tf.zeros_like(original_loss), original_loss)
+        loss = tf.reduce_sum(l)
+        tf.add_to_collection('total_loss', loss)
+        total_loss = tf.add_n(tf.get_collection('total_loss'))
+        accu = tf.reduce_mean(tf.cast(tf.equal(0., l), tf.float32))
+        return total_loss, loss, accu
+
+    # 训练节点
+    def add_train_op(self, loss):
+        with tf.name_scope('train_op'):
+            # 记录训练步骤
+            self.global_step = tf.Variable(0, name='global_step', trainable=False)
+            opt = tf.train.AdamOptimizer(self.config.lr)
+            train_op = opt.minimize(loss, self.global_step)
+            return train_op
\ No newline at end of file
diff --git a/code/qacnn.py b/qacnn/qacnn.py
similarity index 96%
rename from code/qacnn.py
rename to qacnn/qacnn.py
index f4a4bf8..f25550c 100755
--- a/code/qacnn.py
+++ b/qacnn/qacnn.py
@@ -7,14 +7,13 @@
 reload(sys)
 sys.stdout = stdout
 
-os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 
 import cPickle as pkl
-
 from utils import *
-
 from models import QACNN
 
+
 class QACNNConfig(object):
     def __init__(self, vocab_size, embeddings=None):
         # 输入问题(句子)长度
@@ -49,8 +48,6 @@ def __init__(self, vocab_size, embeddings=None):
         self.cf.gpu_options.per_process_gpu_memory_fraction = 0.2
         
 
-
-
 def train(train_corpus, config, val_corpus, eval_train_corpus=None):
     iterator = Iterator(train_corpus)
 
@@ -73,7 +70,7 @@ def train(train_corpus, config, val_corpus, eval_train_corpus=None):
                 count += 1
                 if count % 10 == 0:
                     print('[epoch {}, batch {}]Loss:{}, Accuracy:{}'.format(epoch, count, loss, accu))
-            saver.save(sess,'models/qacnn/my_model', global_step=epoch)
+            saver.save(sess,'{}/my_model'.format(model_path), global_step=epoch)
             if eval_train_corpus is not None:
                 train_res = evaluate(sess, model, eval_train_corpus, config)
                 print('[train] ' + train_res)
@@ -82,7 +79,6 @@ def train(train_corpus, config, val_corpus, eval_train_corpus=None):
                 print('[eval] ' + val_res)
 
 
-
 def evaluate(sess, model, corpus, config):
     iterator = Iterator(corpus)
 
@@ -122,7 +118,7 @@ def test(corpus, config):
     with tf.Session(config=config.cf) as sess:
         model = QACNN(config)
         saver = tf.train.Saver()
-        saver.restore(sess, tf.train.latest_checkpoint('models/qacnn'))
+        saver.restore(sess, tf.train.latest_checkpoint(model_path))
         print('[test] ' + evaluate(sess, model, corpus, config))
                     
 
@@ -178,6 +174,7 @@ def main(args):
     raw_data_path = '../data/WikiQA/raw'
     processed_data_path = '../data/WikiQA/processed'
     embedding_path = '../data/embedding/glove.6B.300d.txt'
+    model_path = 'models'
 
     with open(os.path.join(processed_data_path, 'vocab.pkl'), 'r') as fr:
         word2id, id2word = pkl.load(fr)
diff --git a/qacnn/run.sh b/qacnn/run.sh
new file mode 100755
index 0000000..10734f6
--- /dev/null
+++ b/qacnn/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+
+echo "train model"
+python qacnn.py --train
+
+
+echo "test model"
+python qacnn.py --test
diff --git a/qacnn/utils.py b/qacnn/utils.py
new file mode 100755
index 0000000..8b28136
--- /dev/null
+++ b/qacnn/utils.py
@@ -0,0 +1,125 @@
+# -*- encoding:utf-8 -*-
+import numpy as np
+import tensorflow as tf
+
+
+def padding(data, max_len):
+    return tf.keras.preprocessing.sequence.pad_sequences(data, max_len, padding='post', truncating='post')
+
+def eval_map_mrr(qids, aids, preds, labels):
+	# 衡量map指标和mrr指标
+    dic = dict()
+    pre_dic = dict()
+    for qid, aid, pred, label in zip(qids, aids, preds, labels):
+        pre_dic.setdefault(qid, [])
+        pre_dic[qid].append([aid, pred, label])
+    for qid in pre_dic:
+        dic[qid] = sorted(pre_dic[qid], key=lambda k: k[1], reverse=True)
+        aid2rank = {aid:[label, rank] for (rank, (aid, pred, label)) in enumerate(dic[qid])}
+        dic[qid] = aid2rank
+    # correct = 0
+    # total = 0
+    # for qid in dic:
+    #     cur_correct = 0
+    #     for aid in dic[qid]:
+    #         if dic[qid][aid][0] == 1:
+    #             cur_correct += 1
+    #     if cur_correct > 0:
+    #         correct += 1
+    #     total += 1
+    # print(correct * 1. / total)
+
+    MAP = 0.0
+    MRR = 0.0
+    useful_q_len = 0
+    for q_id in dic:
+        sort_rank = sorted(dic[q_id].items(), key=lambda k: k[1][1], reverse=False)
+        correct = 0
+        total = 0
+        AP = 0.0
+        mrr_mark = False
+        for i in range(len(sort_rank)):
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+        if correct == 0:
+            continue
+        useful_q_len += 1
+        correct = 0
+        for i in range(len(sort_rank)):
+            # compute MRR
+            if sort_rank[i][1][0] == 1 and mrr_mark == False:
+                MRR += 1.0 / float(i + 1)
+                mrr_mark = True
+            # compute MAP
+            total += 1
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+                AP += float(correct) / float(total)
+        
+        AP /= float(correct)
+        MAP += AP
+
+    MAP /= useful_q_len
+    MRR /= useful_q_len
+    return MAP, MRR
+
+def build_embedding(in_file, word_dict):
+	# 构建预训练的embedding矩阵
+    num_words = max(word_dict.values()) + 1
+    dim = int(in_file.split('.')[-2][:-1])
+    embeddings = np.zeros((num_words, dim))
+
+    if in_file is not None:
+        pre_trained = 0
+        initialized = {}
+        avg_sigma = 0
+        avg_mu = 0
+        for line in open(in_file).readlines():
+            sp = line.split()
+            assert len(sp) == dim + 1
+            if sp[0] in word_dict:
+                initialized[sp[0]] = True
+                pre_trained += 1
+                embeddings[word_dict[sp[0]]] = [float(x) for x in sp[1:]]
+                mu = embeddings[word_dict[sp[0]]].mean()
+                #print embeddings[word_dict[sp[0]]]
+                sigma = np.std(embeddings[word_dict[sp[0]]])
+                avg_mu += mu
+                avg_sigma += sigma
+        avg_sigma /= 1. * pre_trained
+        avg_mu /= 1. * pre_trained
+        for w in word_dict:
+            if w not in initialized:
+                embeddings[word_dict[w]] = np.random.normal(avg_mu, avg_sigma, (dim,))
+        print('Pre-trained: %d (%.2f%%)' %
+                     (pre_trained, pre_trained * 100.0 / num_words))
+    return embeddings.astype(np.float32)
+
+
+class Iterator(object):
+    """
+    数据迭代器
+    """
+    def __init__(self, x):
+        self.x = x
+        self.sample_num = len(self.x)
+
+    def next_batch(self, batch_size, shuffle=True):
+        # produce X, Y_out, Y_in, X_len, Y_in_len, Y_out_len
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = np.random.randint(0, self.sample_num - batch_size + 1)
+        r = l + batch_size
+        x_part = self.x[l:r]
+        return x_part
+
+    def next(self, batch_size, shuffle=False):
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = 0
+        while l < self.sample_num:
+            r = min(l + batch_size, self.sample_num)
+            batch_size = r - l
+            x_part = self.x[l:r]
+            l += batch_size
+            yield x_part
diff --git a/seq_match_seq/README.me b/seq_match_seq/README.me
new file mode 100755
index 0000000..09a0923
--- /dev/null
+++ b/seq_match_seq/README.me
@@ -0,0 +1,23 @@
+# 复现《A COMPARE-AGGREGATE MODEL FOR MATCHING TEXT SEQUENCES》中的模型完成问答任务
+
+## 准备
+
+#### 下载词向量文件[glove](../download.sh)。
+
+```
+cd ..
+bash download.sh
+```
+
+#### 预处理wiki数据
+
+```
+cd ..
+python preprocess_wiki.py
+```
+
+## 运行
+
+```
+bash run.sh
+```
diff --git a/seq_match_seq/models.py b/seq_match_seq/models.py
new file mode 100755
index 0000000..78c5687
--- /dev/null
+++ b/seq_match_seq/models.py
@@ -0,0 +1,181 @@
+# -*- encoding:utf-8 -*-
+import tensorflow as tf
+import numpy as np
+
+
+class SeqMatchSeq(object):
+    def __init__(self, config):
+        self.config = config
+        # 输入
+        self.add_placeholders()
+        # [batch_size, sequence_size, embed_size]
+        q_embed, a_embed = self.add_embeddings()
+        # 上下文编码
+        q_encode, a_encode = self.context_encoding(q_embed, a_embed)
+        # attention层
+        h_a = self.attend(q_encode, a_encode)
+        # compose层
+        t = self.compare(a_encode, h_a)
+        # aggregate层
+        agg_out = self.aggregate(t)
+        pred = self.soft_out(agg_out)
+        # 预测概率分布与损失
+        self.y_hat, self.total_loss = self.add_loss_op(pred)
+        # 训练节点
+        self.train_op = self.add_train_op(self.total_loss)
+
+    def add_placeholders(self):
+        # 问题
+        self.q = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_q_length],
+                name='Question')
+        # 回答
+        self.a = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_a_length],
+                name='Ans')
+        self.y = tf.placeholder(tf.int32, shape=[None, ], name='label')
+        # drop_out
+        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
+        self.batch_size = tf.shape(self.q)[0]
+
+    def add_embeddings(self):
+        with tf.variable_scope('embedding'):
+            if self.config.embeddings is not None:
+                embeddings = tf.Variable(self.config.embeddings, 
+                    name="embeddings", trainable=False)
+            else:
+                embeddings = tf.get_variable('embeddings', 
+                    shape=[self.config.vocab_size, self.config.embedding_size], 
+                    initializer=tf.uniform_unit_scaling_initializer())
+            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
+            a_embed = tf.nn.embedding_lookup(embeddings, self.a)
+            return q_embed, a_embed
+
+    def context_encoding(self, q, a):
+        """
+        q: [batch_size, q_length, embedding_dim]
+        a: [batch_size, a_length, embedding_dim]
+        """
+        with tf.variable_scope('context_encoding') as scope:
+            q_encode = self.proj_layer(q, 'proj_layer', reuse=None)
+            a_encode = self.proj_layer(a, 'proj_layer', reuse=True)
+        return q_encode, a_encode
+
+
+    def attend(self, q, a):
+        """
+        q: [batch_size, q_length, represent_dim]
+        a: [batch_size, a_length, represent_dim]
+        """
+        q_proj = self.mlp(q, self.config.mem_dim, 1, None, 
+                    'att_q_proj', reuse=None)
+        # [batch_size, q_length, a_length]
+        att_inner_product = tf.matmul(q_proj, tf.transpose(a, (0, 2, 1)))
+        # [batch_size, a_length, q_length]
+        q_weights = tf.nn.softmax(
+                        tf.transpose(
+                            att_inner_product, (0, 2, 1)), dim=-1)
+        output_a = tf.matmul(q_weights, q)
+        return output_a
+
+    def compare(self, a, h_a):
+        """
+        a: [batch_size, a_length, mem_dim]
+        a_att: [batch_size, a_length, mem_dim]
+        """
+        if self.config.comp_type == 'mul':
+            out = a * h_a
+        else:
+            raise ValueError('{} method is not implemented!'.format(
+                self.config.comp_type))
+
+        return out
+
+    def aggregate(self, t):
+        """
+        t: [batch_size, a_length, mem_dim]
+        """
+        pool_t = []
+        for i, filter_size in enumerate(self.config.filter_sizes):
+            with tf.variable_scope('filter{}'.format(filter_size)):
+                # 卷积
+                out_t = tf.layers.Conv1D(self.config.cov_dim,
+                                         filter_size,
+                                         strides=1,
+                                         padding='valid',
+                                         activation=tf.nn.relu, name='conv')(t)
+                # 池化
+                out_t = tf.layers.MaxPooling1D(
+                    self.config.max_a_length - filter_size + 1, 
+                    1, name='max_pool')(out_t)
+                out_t = tf.reshape(out_t, 
+                    (tf.shape(out_t)[0], out_t.get_shape().as_list()[2]))
+                pool_t.append(out_t)
+        # [batch_size, n * mem_dim]
+        out = tf.concat(pool_t, axis=-1)
+        # [batch_size, mem_dim]
+        out = self.mlp(out, self.config.mem_dim, 1, 
+                        tf.nn.tanh, 'pre_out', use_dropout=False, reuse=None)
+        return out
+
+    def soft_out(self, x):
+        out = self.mlp(x, 2, 1, None, 
+            'soft_out', use_dropout=False, reuse=None)
+        return out
+
+    def mlp(self, bottom, size, layer_num, activation, name, use_dropout=True, reuse=None):
+        """
+        bottom: 上层输入
+        size: 神经元大小
+        layer_num: 神经网络层数
+        name: mlp的名称
+        reuse: 是否复用层
+        """
+        now = bottom
+        if use_dropout:
+            now = tf.nn.dropout(now, keep_prob=self.keep_prob)
+        for i in xrange(layer_num):
+            now = tf.layers.dense(now, size, 
+                                  activation=activation, 
+                                  name=name + '_{}'.format(i), 
+                                  reuse=reuse)
+        return now
+
+    def proj_layer(self, seq, name, reuse=None):
+        out1 = self.mlp(seq, self.config.mem_dim, 1, 
+                    tf.nn.sigmoid, name + '_sigmoid', reuse=reuse)
+        out2 = self.mlp(seq, self.config.mem_dim, 1, 
+                    tf.nn.tanh, name + '_tanh', reuse=reuse)
+        out = out1 * out2
+        return out
+
+    def add_loss_op(self, pred):
+        """
+        损失节点
+        """
+        # [batch_size, 2]
+        y_hat = tf.nn.softmax(pred, dim=-1)
+        loss = tf.reduce_mean(
+            tf.losses.sparse_softmax_cross_entropy(self.y, pred))
+        tf.add_to_collection('total_loss', loss)
+        total_loss = tf.add_n(tf.get_collection('total_loss'))
+        return y_hat, total_loss
+
+    def add_train_op(self, loss):
+        """
+        训练节点
+        """
+        with tf.name_scope('train_op'):
+            # 记录训练步骤
+            self.global_step = tf.Variable(0, 
+                    name='global_step', trainable=False)
+            opt = tf.train.AdamOptimizer(self.config.lr)
+            # train_op = opt.minimize(loss, self.global_step)
+            train_variables = tf.trainable_variables()
+            grads_vars = opt.compute_gradients(loss, train_variables)
+            for i, (grad, var) in enumerate(grads_vars):
+                grads_vars[i] = (
+                    tf.clip_by_norm(grad, self.config.grad_clip), var)
+            train_op = opt.apply_gradients(
+                grads_vars, global_step=self.global_step)
+            return train_op
diff --git a/seq_match_seq/run.sh b/seq_match_seq/run.sh
new file mode 100755
index 0000000..061e6ff
--- /dev/null
+++ b/seq_match_seq/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+
+echo "train model"
+python seq_match_seq.py --train
+
+
+echo "test model"
+python seq_match_seq.py --test
diff --git a/seq_match_seq/seq_match_seq.py b/seq_match_seq/seq_match_seq.py
new file mode 100755
index 0000000..3d4549d
--- /dev/null
+++ b/seq_match_seq/seq_match_seq.py
@@ -0,0 +1,175 @@
+# -*- encoding:utf8 -*-
+import tensorflow as tf
+import numpy as np
+import os
+import sys
+from copy import deepcopy
+stdout = sys.stdout
+reload(sys)
+sys.stdout = stdout
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+import cPickle as pkl
+from utils import *
+from models import SeqMatchSeq
+
+
+class SeqMatchSeqConfig(object):
+    def __init__(self, vocab_size, embeddings=None):
+        # 输入问题(句子)长度
+        self.max_q_length = 200
+        # 输入答案长度
+        self.max_a_length = 200
+        # 循环数
+        self.num_epochs = 100
+        # batch大小
+        self.batch_size = 128
+        # 词表大小
+        self.vocab_size = vocab_size
+        # 词向量大小
+        self.embeddings = embeddings
+        self.embedding_size = 100
+        if self.embeddings is not None:
+            self.embedding_size = embeddings.shape[1]
+        # keep_prob=1-dropout
+        self.keep_prob = 0.6
+        # 学习率
+        self.lr = 0.0003
+        self.grad_clip = 1
+
+        self.reg = 0
+        self.mem_dim = 128
+        self.cov_dim = 128
+        self.filter_sizes = [2, 3, 4, 5]
+        self.comp_type = 'mul'
+
+        self.cf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
+        self.cf.gpu_options.per_process_gpu_memory_fraction = 0.2
+
+
+def train(train_corpus, config, val_corpus, eval_train_corpus=None):
+    iterator = Iterator(train_corpus)
+
+    with tf.Session(config=config.cf) as sess:
+        model = SeqMatchSeq(config)
+        saver = tf.train.Saver()
+        sess.run(tf.initialize_all_variables())
+        for epoch in xrange(config.num_epochs):
+            count = 0
+            for batch_x in iterator.next(config.batch_size, shuffle=True):
+                batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
+                batch_q = np.asarray(batch_q)
+                batch_ap = np.asarray(batch_ap)
+                labels = np.asarray(labels).astype(np.int32)
+                _, loss = sess.run([model.train_op, model.total_loss], 
+                                   feed_dict={model.q:batch_q, 
+                                              model.a:batch_ap,
+                                              model.y:labels,
+                                              model.keep_prob:config.keep_prob})
+                count += 1
+                if count % 10 == 0:
+                    print('[epoch {}, batch {}]Loss:{}'.format(epoch, count, loss))
+            saver.save(sess,'{}/my_model'.format(model_path), global_step=epoch)
+            if eval_train_corpus is not None:
+                train_res = evaluate(sess, model, eval_train_corpus, config)
+                print('[train] ' + train_res)
+            if val_corpus is not None:
+                val_res = evaluate(sess, model, val_corpus, config)
+                print('[eval] ' + val_res)
+
+
+def evaluate(sess, model, corpus, config):
+    iterator = Iterator(corpus)
+
+    count = 0
+    total_qids = []
+    total_aids = []
+    total_pred = []
+    total_labels = []
+    total_loss = 0.
+    for batch_x in iterator.next(config.batch_size, shuffle=False):
+        batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
+        batch_q = np.asarray(batch_q)
+        batch_ap = np.asarray(batch_ap)
+        y_hat, loss = sess.run([model.y_hat, model.total_loss], 
+                           feed_dict={model.q:batch_q, 
+                                      model.a:batch_ap, 
+                                      model.y:labels,
+                                      model.keep_prob:1.})
+        y_hat = np.argmax(y_hat, axis=-1)
+        total_loss += loss
+        count += 1
+        total_qids.append(batch_qids)
+        total_aids.append(batch_aids)
+        total_pred.append(y_hat)
+        total_labels.append(labels)
+        # print(batch_qids[0], [id2word[_] for _ in batch_q[0]], 
+        #     batch_aids[0], [id2word[_] for _ in batch_ap[0]])
+    total_qids = np.concatenate(total_qids, axis=0)
+    total_aids = np.concatenate(total_aids, axis=0)
+    total_pred = np.concatenate(total_pred, axis=0)
+    total_labels = np.concatenate(total_labels, axis=0)
+    MAP, MRR = eval_map_mrr(total_qids, total_aids, total_pred, total_labels)
+    # print('Eval loss:{}'.format(total_loss / count))
+    return 'MAP:{}, MRR:{}'.format(MAP, MRR)
+                
+
+def test(corpus, config):
+    with tf.Session(config=config.cf) as sess:
+        model = SeqMatchSeq(config)
+        saver = tf.train.Saver()
+        saver.restore(sess, tf.train.latest_checkpoint(model_path))
+        print('[test] ' + evaluate(sess, model, corpus, config))
+                    
+
+def main(args):
+    max_q_length = 30
+    max_a_length = 100
+
+    with open(os.path.join(processed_data_path, 'pointwise_corpus.pkl'), 'r') as fr:
+        train_corpus, val_corpus, test_corpus = pkl.load(fr)
+
+    embeddings = build_embedding(embedding_path, word2id)
+    
+    train_qids, train_q, train_aids, train_ap, train_labels = zip(*train_corpus)
+    train_q = padding(train_q, max_q_length)
+    train_ap = padding(train_ap, max_a_length)
+    train_corpus = zip(train_qids, train_q, train_aids, train_ap, train_labels)
+
+
+    val_qids, val_q, val_aids, val_ap, labels = zip(*val_corpus)
+    val_q = padding(val_q, max_q_length)
+    val_ap = padding(val_ap, max_a_length)
+    val_corpus = zip(val_qids, val_q, val_aids, val_ap, labels)
+
+
+    test_qids, test_q, test_aids, test_ap, labels = zip(*test_corpus)
+    test_q = padding(test_q, max_q_length)
+    test_ap = padding(test_ap, max_a_length)
+    test_corpus = zip(test_qids, test_q, test_aids, test_ap, labels)
+
+    config = SeqMatchSeqConfig(max(word2id.values()) + 1, embeddings=embeddings)
+    config.max_q_length = max_q_length
+    config.max_a_length = max_a_length
+    if args.train:
+        train(deepcopy(train_corpus), config, val_corpus, deepcopy(train_corpus))
+    elif args.test:
+        test(test_corpus, config)
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--train",  help="whether to train", action='store_true')
+    parser.add_argument("--test",  help="whether to test", action='store_true')
+    args = parser.parse_args()
+
+    raw_data_path = '../data/WikiQA/raw'
+    processed_data_path = '../data/WikiQA/processed'
+    embedding_path = '../data/embedding/glove.6B.300d.txt'
+    model_path = 'models'
+
+    with open(os.path.join(processed_data_path, 'vocab.pkl'), 'r') as fr:
+        word2id, id2word = pkl.load(fr)
+    main(args)
diff --git a/seq_match_seq/utils.py b/seq_match_seq/utils.py
new file mode 100755
index 0000000..8b28136
--- /dev/null
+++ b/seq_match_seq/utils.py
@@ -0,0 +1,125 @@
+# -*- encoding:utf-8 -*-
+import numpy as np
+import tensorflow as tf
+
+
+def padding(data, max_len):
+    return tf.keras.preprocessing.sequence.pad_sequences(data, max_len, padding='post', truncating='post')
+
+def eval_map_mrr(qids, aids, preds, labels):
+	# 衡量map指标和mrr指标
+    dic = dict()
+    pre_dic = dict()
+    for qid, aid, pred, label in zip(qids, aids, preds, labels):
+        pre_dic.setdefault(qid, [])
+        pre_dic[qid].append([aid, pred, label])
+    for qid in pre_dic:
+        dic[qid] = sorted(pre_dic[qid], key=lambda k: k[1], reverse=True)
+        aid2rank = {aid:[label, rank] for (rank, (aid, pred, label)) in enumerate(dic[qid])}
+        dic[qid] = aid2rank
+    # correct = 0
+    # total = 0
+    # for qid in dic:
+    #     cur_correct = 0
+    #     for aid in dic[qid]:
+    #         if dic[qid][aid][0] == 1:
+    #             cur_correct += 1
+    #     if cur_correct > 0:
+    #         correct += 1
+    #     total += 1
+    # print(correct * 1. / total)
+
+    MAP = 0.0
+    MRR = 0.0
+    useful_q_len = 0
+    for q_id in dic:
+        sort_rank = sorted(dic[q_id].items(), key=lambda k: k[1][1], reverse=False)
+        correct = 0
+        total = 0
+        AP = 0.0
+        mrr_mark = False
+        for i in range(len(sort_rank)):
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+        if correct == 0:
+            continue
+        useful_q_len += 1
+        correct = 0
+        for i in range(len(sort_rank)):
+            # compute MRR
+            if sort_rank[i][1][0] == 1 and mrr_mark == False:
+                MRR += 1.0 / float(i + 1)
+                mrr_mark = True
+            # compute MAP
+            total += 1
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+                AP += float(correct) / float(total)
+        
+        AP /= float(correct)
+        MAP += AP
+
+    MAP /= useful_q_len
+    MRR /= useful_q_len
+    return MAP, MRR
+
+def build_embedding(in_file, word_dict):
+	# 构建预训练的embedding矩阵
+    num_words = max(word_dict.values()) + 1
+    dim = int(in_file.split('.')[-2][:-1])
+    embeddings = np.zeros((num_words, dim))
+
+    if in_file is not None:
+        pre_trained = 0
+        initialized = {}
+        avg_sigma = 0
+        avg_mu = 0
+        for line in open(in_file).readlines():
+            sp = line.split()
+            assert len(sp) == dim + 1
+            if sp[0] in word_dict:
+                initialized[sp[0]] = True
+                pre_trained += 1
+                embeddings[word_dict[sp[0]]] = [float(x) for x in sp[1:]]
+                mu = embeddings[word_dict[sp[0]]].mean()
+                #print embeddings[word_dict[sp[0]]]
+                sigma = np.std(embeddings[word_dict[sp[0]]])
+                avg_mu += mu
+                avg_sigma += sigma
+        avg_sigma /= 1. * pre_trained
+        avg_mu /= 1. * pre_trained
+        for w in word_dict:
+            if w not in initialized:
+                embeddings[word_dict[w]] = np.random.normal(avg_mu, avg_sigma, (dim,))
+        print('Pre-trained: %d (%.2f%%)' %
+                     (pre_trained, pre_trained * 100.0 / num_words))
+    return embeddings.astype(np.float32)
+
+
+class Iterator(object):
+    """
+    数据迭代器
+    """
+    def __init__(self, x):
+        self.x = x
+        self.sample_num = len(self.x)
+
+    def next_batch(self, batch_size, shuffle=True):
+        # produce X, Y_out, Y_in, X_len, Y_in_len, Y_out_len
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = np.random.randint(0, self.sample_num - batch_size + 1)
+        r = l + batch_size
+        x_part = self.x[l:r]
+        return x_part
+
+    def next(self, batch_size, shuffle=False):
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = 0
+        while l < self.sample_num:
+            r = min(l + batch_size, self.sample_num)
+            batch_size = r - l
+            x_part = self.x[l:r]
+            l += batch_size
+            yield x_part
diff --git a/siamese_cnn/README.me b/siamese_cnn/README.me
new file mode 100755
index 0000000..84f28b8
--- /dev/null
+++ b/siamese_cnn/README.me
@@ -0,0 +1,23 @@
+# 使用CNN网络实现问答任务
+
+## 准备
+
+#### 下载词向量文件[glove](../download.sh)。
+
+```
+cd ..
+bash download.sh
+```
+
+#### 预处理wiki数据
+
+```
+cd ..
+python preprocess_wiki.py
+```
+
+## 运行
+
+```
+bash run.sh
+```
diff --git a/siamese_cnn/models.py b/siamese_cnn/models.py
new file mode 100755
index 0000000..b584a40
--- /dev/null
+++ b/siamese_cnn/models.py
@@ -0,0 +1,118 @@
+# -*- encoding:utf-8 -*-
+import tensorflow as tf
+import numpy as np
+
+
+class SiameseCNN(object):
+    def __init__(self, config):
+        self.config = config
+        # 输入
+        self.add_placeholders()
+        # [batch_size, sequence_size, embed_size]
+        q_embed, a_embed = self.add_embeddings()
+        with tf.variable_scope('siamese') as scope:
+            self.q_trans = self.network(q_embed, reuse=False)
+            scope.reuse_variables()
+            self.a_trans = self.network(a_embed, reuse=True)
+        # 损失和精确度
+        self.total_loss = self.add_loss_op(self.q_trans, self.a_trans)
+        # 训练节点
+        self.train_op = self.add_train_op(self.total_loss)
+
+    # 输入
+    def add_placeholders(self):
+        # 问题
+        self.q = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_q_length],
+                name='Question')
+        # 回答
+        self.a = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_a_length],
+                name='Ans')
+        self.y = tf.placeholder(tf.float32, shape=[None, ], name='label')
+        # drop_out
+        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
+        self.batch_size = tf.shape(self.q)[0]
+
+    # word embeddings
+    def add_embeddings(self):
+        with tf.variable_scope('embedding'):
+            if self.config.embeddings is not None:
+                embeddings = tf.Variable(self.config.embeddings, name="embeddings", trainable=False)
+            else:
+                embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
+            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
+            a_embed = tf.nn.embedding_lookup(embeddings, self.a)
+            q_embed = tf.nn.dropout(q_embed, keep_prob=self.keep_prob)
+            a_embed = tf.nn.dropout(a_embed, keep_prob=self.keep_prob)
+            return q_embed, a_embed
+
+    def network(self, x, reuse=False):
+        # (batch_size, conv_size)
+        conv1 = self.conv_layer(x, reuse=reuse)
+        # (batch_size, hidden_size)
+        fc1 = self.fc_layer(conv1, self.config.hidden_size, "fc1")
+        ac1 = tf.nn.relu(fc1)
+        # (batch_size, output_size)
+        fc2 = self.fc_layer(ac1, self.config.output_size, "fc2")
+        return fc2
+
+    def fc_layer(self, bottom, n_weight, name):
+        assert len(bottom.get_shape()) == 2
+        n_prev_weight = bottom.get_shape()[1]
+        initer = tf.truncated_normal_initializer(stddev=0.01)
+        W = tf.get_variable(name+'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=initer)
+        b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.0, shape=[n_weight], dtype=tf.float32))
+        fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
+        return fc
+
+    def conv_layer(self, h, reuse=False):
+        pool = list()
+        max_len = h.get_shape()[1]
+        h = tf.reshape(h, [-1, max_len, h.get_shape()[2], 1])
+        for i, filter_size in enumerate(self.config.filter_sizes):
+            with tf.variable_scope('filter{}'.format(filter_size)):
+                conv1_W = tf.get_variable('conv_W', shape=[filter_size, self.config.embedding_size, 1, self.config.num_filters], initializer=tf.truncated_normal_initializer(.0, .01))
+                conv1_b = tf.get_variable('conv_b', initializer=tf.constant(0.0, shape=[self.config.num_filters]))
+                # pooling层的bias,Q和A分开
+                pool_b = tf.get_variable('pool_b', initializer=tf.constant(0.0, shape=[self.config.num_filters]))
+                # 卷积
+                out = tf.nn.relu((tf.nn.conv2d(h, conv1_W, [1,1,1,1], padding='VALID')+conv1_b))
+                # 池化
+                out = tf.nn.max_pool(out, [1,max_len-filter_size+1,1,1], [1,1,1,1], padding='VALID')
+                out = tf.nn.tanh(out+pool_b)
+                pool.append(out)
+                # 加入正则项
+                if not reuse:
+                    tf.add_to_collection('total_loss', 0.5 * self.config.l2_reg_lambda * tf.nn.l2_loss(conv1_W))
+
+        total_channels = len(self.config.filter_sizes) * self.config.num_filters
+        real_pool = tf.reshape(tf.concat(pool, 3), [self.batch_size, total_channels])
+        return real_pool
+
+    # 损失节点
+    def add_loss_op(self, o1, o2):
+        # 此处用cos距离
+        norm_o1 = tf.nn.l2_normalize(o1, dim=1)
+        norm_o2 = tf.nn.l2_normalize(o2, dim=1)
+        self.q_a_cosine = tf.reduce_sum(tf.multiply(o1, o2), 1)
+
+        loss = self.contrastive_loss(self.q_a_cosine, self.y)
+        tf.add_to_collection('total_loss', loss)
+        total_loss = tf.add_n(tf.get_collection('total_loss'))
+        return total_loss
+
+    def contrastive_loss(self, Ew, y):
+        l_1 = self.config.pos_weight * tf.square(1 - Ew)
+        l_0 = tf.square(tf.maximum(Ew, 0))
+        loss = tf.reduce_mean(y * l_1 + (1 - y) * l_0)
+        return loss
+
+    # 训练节点
+    def add_train_op(self, loss):
+        with tf.name_scope('train_op'):
+            # 记录训练步骤
+            self.global_step = tf.Variable(0, name='global_step', trainable=False)
+            opt = tf.train.AdamOptimizer(self.config.lr)
+            train_op = opt.minimize(loss, self.global_step)
+            return train_op
diff --git a/siamese_cnn/run.sh b/siamese_cnn/run.sh
new file mode 100755
index 0000000..535c05c
--- /dev/null
+++ b/siamese_cnn/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+
+echo "train model"
+python siamese_cnn.py --train
+
+
+echo "test model"
+python siamese_cnn.py --test
diff --git a/code/siamese.py b/siamese_cnn/siamese_cnn.py
similarity index 65%
rename from code/siamese.py
rename to siamese_cnn/siamese_cnn.py
index 9626531..1c3dfe0 100755
--- a/code/siamese.py
+++ b/siamese_cnn/siamese_cnn.py
@@ -8,41 +8,11 @@
 reload(sys)
 sys.stdout = stdout
 
-os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 
 import cPickle as pkl
-
 from utils import *
-
-from models import SiameseNN, SiameseCNN, SiameseRNN
-
-class NNConfig(object):
-    def __init__(self, vocab_size, embeddings=None):
-        # 输入问题(句子)长度
-        self.max_q_length = 200
-        # 输入答案长度
-        self.max_a_length = 200
-        # 循环数
-        self.num_epochs = 100
-        # batch大小
-        self.batch_size = 128
-        # 词表大小
-        self.vocab_size = vocab_size
-        self.hidden_size = 256
-        self.output_size = 128
-        self.keep_prob = 0.6
-        # 词向量大小
-        self.embeddings = embeddings
-        self.embedding_size = 100
-        if self.embeddings is not None:
-            self.embedding_size = embeddings.shape[1]
-        # 学习率
-        self.lr = 0.001
-        # contrasive loss 中的 positive loss部分的权重
-        self.pos_weight = 0.25
-
-        self.cf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
-        self.cf.gpu_options.per_process_gpu_memory_fraction = 0.2
+from models import SiameseCNN
 
 
 class CNNConfig(object):
@@ -80,52 +50,12 @@ def __init__(self, vocab_size, embeddings=None):
         self.cf.gpu_options.per_process_gpu_memory_fraction = 0.2
 
 
-class RNNConfig(object):
-    def __init__(self, vocab_size, embeddings=None):
-        # 输入问题(句子)长度
-        self.max_q_length = 200
-        # 输入答案长度
-        self.max_a_length = 200
-        # 循环数
-        self.num_epochs = 100
-        # batch大小
-        self.batch_size = 128
-        # 词表大小
-        self.vocab_size = vocab_size
-        # 词向量大小
-        self.embeddings = embeddings
-        self.embedding_size = 100
-        if self.embeddings is not None:
-            self.embedding_size = embeddings.shape[1]
-        # RNN单元类型和大小与堆叠层数
-        self.cell_type = 'GRU'
-        self.rnn_size = 128
-        self.layer_size = 2
-        # 隐层大小
-        self.hidden_size = 256
-        self.output_size = 128
-        # 每种filter的数量
-        self.num_filters = 128
-        self.keep_prob = 0.6
-        # 学习率
-        self.lr = 0.001
-        # contrasive loss 中的 positive loss部分的权重
-        self.pos_weight = 0.5
-
-        self.cf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
-        self.cf.gpu_options.per_process_gpu_memory_fraction = 0.2
-
 
 def train(train_corpus, config, val_corpus, eval_train_corpus=None):
     iterator = Iterator(train_corpus)
 
     with tf.Session(config=config.cf) as sess:
-        if args.model == 'NN':
-            model = SiameseNN(config)
-        elif args.model == 'CNN':
-            model = SiameseCNN(config)
-        else:
-            model = SiameseRNN(config)
+        model = SiameseCNN(config)
         saver = tf.train.Saver()
         sess.run(tf.initialize_all_variables())
         for epoch in xrange(config.num_epochs):
@@ -142,7 +72,7 @@ def train(train_corpus, config, val_corpus, eval_train_corpus=None):
                 count += 1
                 if count % 10 == 0:
                     print('[epoch {}, batch {}]Loss:{}'.format(epoch, count, loss))
-            saver.save(sess,'models/siamese_{}/my_model'.format(args.model.lower()), global_step=epoch)
+            saver.save(sess,'{}/my_model'.format(model_path), global_step=epoch)
             if eval_train_corpus is not None:
                 train_res = evaluate(sess, model, eval_train_corpus, config)
                 print('[train] ' + train_res)
@@ -188,14 +118,9 @@ def evaluate(sess, model, corpus, config):
 
 def test(corpus, config):
     with tf.Session(config=config.cf) as sess:
-        if args.model == 'NN':
-            model = SiameseNN(config)
-        elif args.model == 'CNN':
-            model = SiameseCNN(config)
-        else:
-            model = SiameseRNN(config)
+        model = SiameseCNN(config)
         saver = tf.train.Saver()
-        saver.restore(sess, tf.train.latest_checkpoint('models/siamese_{}'.format(args.model)))
+        saver.restore(sess, tf.train.latest_checkpoint(model_path))
         print('[test] ' + evaluate(sess, model, corpus, config))
                     
 
@@ -225,12 +150,7 @@ def main(args):
     test_ap = padding(test_ap, max_a_length)
     test_corpus = zip(test_qids, test_q, test_aids, test_ap, labels)
 
-    if args.model == 'NN':
-        config = NNConfig(max(word2id.values()) + 1, embeddings=embeddings)
-    elif args.model == 'CNN':
-        config = CNNConfig(max(word2id.values()) + 1, embeddings=embeddings)
-    else:
-        config = RNNConfig(max(word2id.values()) + 1, embeddings=embeddings)
+    config = CNNConfig(max(word2id.values()) + 1, embeddings=embeddings)
     config.max_q_length = max_q_length
     config.max_a_length = max_a_length
     if args.train:
@@ -244,12 +164,12 @@ def main(args):
     parser = argparse.ArgumentParser()
     parser.add_argument("--train",  help="whether to train", action='store_true')
     parser.add_argument("--test",  help="whether to test", action='store_true')
-    parser.add_argument("--model",  help="choose models from nn, cnn, rnn", type=str, default='NN')
     args = parser.parse_args()
 
     raw_data_path = '../data/WikiQA/raw'
     processed_data_path = '../data/WikiQA/processed'
     embedding_path = '../data/embedding/glove.6B.300d.txt'
+    model_path = 'models'
 
     with open(os.path.join(processed_data_path, 'vocab.pkl'), 'r') as fr:
         word2id, id2word = pkl.load(fr)
diff --git a/siamese_cnn/utils.py b/siamese_cnn/utils.py
new file mode 100755
index 0000000..8b28136
--- /dev/null
+++ b/siamese_cnn/utils.py
@@ -0,0 +1,125 @@
+# -*- encoding:utf-8 -*-
+import numpy as np
+import tensorflow as tf
+
+
+def padding(data, max_len):
+    return tf.keras.preprocessing.sequence.pad_sequences(data, max_len, padding='post', truncating='post')
+
+def eval_map_mrr(qids, aids, preds, labels):
+	# 衡量map指标和mrr指标
+    dic = dict()
+    pre_dic = dict()
+    for qid, aid, pred, label in zip(qids, aids, preds, labels):
+        pre_dic.setdefault(qid, [])
+        pre_dic[qid].append([aid, pred, label])
+    for qid in pre_dic:
+        dic[qid] = sorted(pre_dic[qid], key=lambda k: k[1], reverse=True)
+        aid2rank = {aid:[label, rank] for (rank, (aid, pred, label)) in enumerate(dic[qid])}
+        dic[qid] = aid2rank
+    # correct = 0
+    # total = 0
+    # for qid in dic:
+    #     cur_correct = 0
+    #     for aid in dic[qid]:
+    #         if dic[qid][aid][0] == 1:
+    #             cur_correct += 1
+    #     if cur_correct > 0:
+    #         correct += 1
+    #     total += 1
+    # print(correct * 1. / total)
+
+    MAP = 0.0
+    MRR = 0.0
+    useful_q_len = 0
+    for q_id in dic:
+        sort_rank = sorted(dic[q_id].items(), key=lambda k: k[1][1], reverse=False)
+        correct = 0
+        total = 0
+        AP = 0.0
+        mrr_mark = False
+        for i in range(len(sort_rank)):
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+        if correct == 0:
+            continue
+        useful_q_len += 1
+        correct = 0
+        for i in range(len(sort_rank)):
+            # compute MRR
+            if sort_rank[i][1][0] == 1 and mrr_mark == False:
+                MRR += 1.0 / float(i + 1)
+                mrr_mark = True
+            # compute MAP
+            total += 1
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+                AP += float(correct) / float(total)
+        
+        AP /= float(correct)
+        MAP += AP
+
+    MAP /= useful_q_len
+    MRR /= useful_q_len
+    return MAP, MRR
+
+def build_embedding(in_file, word_dict):
+	# 构建预训练的embedding矩阵
+    num_words = max(word_dict.values()) + 1
+    dim = int(in_file.split('.')[-2][:-1])
+    embeddings = np.zeros((num_words, dim))
+
+    if in_file is not None:
+        pre_trained = 0
+        initialized = {}
+        avg_sigma = 0
+        avg_mu = 0
+        for line in open(in_file).readlines():
+            sp = line.split()
+            assert len(sp) == dim + 1
+            if sp[0] in word_dict:
+                initialized[sp[0]] = True
+                pre_trained += 1
+                embeddings[word_dict[sp[0]]] = [float(x) for x in sp[1:]]
+                mu = embeddings[word_dict[sp[0]]].mean()
+                #print embeddings[word_dict[sp[0]]]
+                sigma = np.std(embeddings[word_dict[sp[0]]])
+                avg_mu += mu
+                avg_sigma += sigma
+        avg_sigma /= 1. * pre_trained
+        avg_mu /= 1. * pre_trained
+        for w in word_dict:
+            if w not in initialized:
+                embeddings[word_dict[w]] = np.random.normal(avg_mu, avg_sigma, (dim,))
+        print('Pre-trained: %d (%.2f%%)' %
+                     (pre_trained, pre_trained * 100.0 / num_words))
+    return embeddings.astype(np.float32)
+
+
+class Iterator(object):
+    """
+    数据迭代器
+    """
+    def __init__(self, x):
+        self.x = x
+        self.sample_num = len(self.x)
+
+    def next_batch(self, batch_size, shuffle=True):
+        # produce X, Y_out, Y_in, X_len, Y_in_len, Y_out_len
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = np.random.randint(0, self.sample_num - batch_size + 1)
+        r = l + batch_size
+        x_part = self.x[l:r]
+        return x_part
+
+    def next(self, batch_size, shuffle=False):
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = 0
+        while l < self.sample_num:
+            r = min(l + batch_size, self.sample_num)
+            batch_size = r - l
+            x_part = self.x[l:r]
+            l += batch_size
+            yield x_part
diff --git a/siamese_nn/._README.me b/siamese_nn/._README.me
new file mode 100755
index 0000000..1cebd06
Binary files /dev/null and b/siamese_nn/._README.me differ
diff --git a/siamese_nn/._run.sh b/siamese_nn/._run.sh
new file mode 100755
index 0000000..77fd951
Binary files /dev/null and b/siamese_nn/._run.sh differ
diff --git a/siamese_nn/README.me b/siamese_nn/README.me
new file mode 100755
index 0000000..d1f5bb8
--- /dev/null
+++ b/siamese_nn/README.me
@@ -0,0 +1,23 @@
+# 使用NN网络实现问答任务
+
+## 准备
+
+#### 下载词向量文件[glove](../download.sh)。
+
+```
+cd ..
+bash download.sh
+```
+
+#### 预处理wiki数据
+
+```
+cd ..
+python preprocess_wiki.py
+```
+
+## 运行
+
+```
+bash run.sh
+```
diff --git a/siamese_nn/models.py b/siamese_nn/models.py
new file mode 100755
index 0000000..d818667
--- /dev/null
+++ b/siamese_nn/models.py
@@ -0,0 +1,98 @@
+# -*- encoding:utf-8 -*-
+import tensorflow as tf
+import numpy as np
+
+class SiameseNN(object):
+    def __init__(self, config):
+        self.config = config
+        # 输入
+        self.add_placeholders()
+        # [batch_size, sequence_size, embed_size]
+        q_embed, a_embed = self.add_embeddings()
+        with tf.variable_scope('siamese') as scope:
+            self.q_trans = self.network(q_embed)
+            scope.reuse_variables()
+            self.a_trans = self.network(a_embed)
+        # 损失和精确度
+        self.total_loss = self.add_loss_op(self.q_trans, self.a_trans)
+        # 训练节点
+        self.train_op = self.add_train_op(self.total_loss)
+
+    # 输入
+    def add_placeholders(self):
+        # 问题
+        self.q = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_q_length],
+                name='Question')
+        # 回答
+        self.a = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_a_length],
+                name='Ans')
+        self.y = tf.placeholder(tf.float32, shape=[None, ], name='label')
+        # drop_out
+        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
+        self.batch_size = tf.shape(self.q)[0]
+
+    # word embeddings
+    def add_embeddings(self):
+        with tf.variable_scope('embedding'):
+            if self.config.embeddings is not None:
+                embeddings = tf.Variable(self.config.embeddings, name="embeddings", trainable=False)
+            else:
+                embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
+            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
+            a_embed = tf.nn.embedding_lookup(embeddings, self.a)
+            q_embed = tf.nn.dropout(q_embed, keep_prob=self.keep_prob)
+            a_embed = tf.nn.dropout(a_embed, keep_prob=self.keep_prob)
+            return q_embed, a_embed
+
+    def network(self, x):
+        # (batch_size * max_len, embed_size)
+        max_len = tf.shape(x)[1]
+        x = tf.reshape(x, (-1, x.get_shape()[-1]))
+        fc1 = self.fc_layer(x, self.config.hidden_size, "fc1")
+        ac1 = tf.nn.relu(fc1)
+        fc2 = self.fc_layer(ac1, self.config.hidden_size, "fc2")
+        ac2 = tf.nn.relu(fc2)
+        # (batch_size, max_len, embed_size)
+        ac3 = tf.reshape(ac2, (self.batch_size, max_len, ac2.get_shape()[1]))
+        # (batch_size, embed_size)
+        ac3 = tf.reduce_mean(ac3, axis=1)
+        fc3 = self.fc_layer(ac3, self.config.output_size, "fc3")
+        return fc3
+
+    def fc_layer(self, bottom, n_weight, name):
+        assert len(bottom.get_shape()) == 2
+        n_prev_weight = bottom.get_shape()[1]
+        initer = tf.truncated_normal_initializer(stddev=0.01)
+        W = tf.get_variable(name+'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=initer)
+        b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.01, shape=[n_weight], dtype=tf.float32))
+        fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
+        return fc
+
+    # 损失节点
+    def add_loss_op(self, o1, o2):
+        # 此处用cos距离
+        norm_o1 = tf.nn.l2_normalize(o1, dim=1)
+        norm_o2 = tf.nn.l2_normalize(o2, dim=1)
+        self.q_a_cosine = tf.reduce_sum(tf.multiply(o1, o2), 1)
+
+        loss = self.contrastive_loss(self.q_a_cosine, self.y)
+        tf.add_to_collection('total_loss', loss)
+        total_loss = tf.add_n(tf.get_collection('total_loss'))
+        return total_loss
+
+    def contrastive_loss(self, Ew, y):
+        l_1 = self.config.pos_weight * tf.square(1 - Ew)
+        l_0 = tf.square(tf.maximum(Ew, 0))
+        loss = tf.reduce_mean(y * l_1 + (1 - y) * l_0)
+        return loss
+
+    # 训练节点
+    def add_train_op(self, loss):
+        with tf.name_scope('train_op'):
+            # 记录训练步骤
+            self.global_step = tf.Variable(0, name='global_step', trainable=False)
+            opt = tf.train.AdamOptimizer(self.config.lr)
+            train_op = opt.minimize(loss, self.global_step)
+            return train_op
\ No newline at end of file
diff --git a/siamese_nn/run.sh b/siamese_nn/run.sh
new file mode 100755
index 0000000..40dc941
--- /dev/null
+++ b/siamese_nn/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+
+echo "train model"
+python siamese_nn.py --train
+
+
+echo "test model"
+python siamese_nn.py --test
\ No newline at end of file
diff --git a/siamese_nn/siamese_nn.py b/siamese_nn/siamese_nn.py
new file mode 100755
index 0000000..365a3ab
--- /dev/null
+++ b/siamese_nn/siamese_nn.py
@@ -0,0 +1,170 @@
+# -*- encoding:utf8 -*-
+import tensorflow as tf
+import numpy as np
+import os
+import sys
+from copy import deepcopy
+stdout = sys.stdout
+reload(sys)
+sys.stdout = stdout
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+import cPickle as pkl
+from utils import *
+from models import SiameseNN
+
+
+class NNConfig(object):
+    def __init__(self, vocab_size, embeddings=None):
+        # 输入问题(句子)长度
+        self.max_q_length = 200
+        # 输入答案长度
+        self.max_a_length = 200
+        # 循环数
+        self.num_epochs = 100
+        # batch大小
+        self.batch_size = 128
+        # 词表大小
+        self.vocab_size = vocab_size
+        self.hidden_size = 256
+        self.output_size = 128
+        self.keep_prob = 0.6
+        # 词向量大小
+        self.embeddings = embeddings
+        self.embedding_size = 100
+        if self.embeddings is not None:
+            self.embedding_size = embeddings.shape[1]
+        # 学习率
+        self.lr = 0.001
+        # contrasive loss 中的 positive loss部分的权重
+        self.pos_weight = 0.25
+
+        self.cf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
+        self.cf.gpu_options.per_process_gpu_memory_fraction = 0.2
+
+
+def train(train_corpus, config, val_corpus, eval_train_corpus=None):
+    iterator = Iterator(train_corpus)
+    if os.path.exists(model_path):
+        os.mkdir(model_path)
+    with tf.Session(config=config.cf) as sess:
+        model = SiameseNN(config)
+        saver = tf.train.Saver()
+        sess.run(tf.initialize_all_variables())
+        for epoch in xrange(config.num_epochs):
+            count = 0
+            for batch_x in iterator.next(config.batch_size, shuffle=True):
+                batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
+                batch_q = np.asarray(batch_q)
+                batch_ap = np.asarray(batch_ap)
+                _, loss = sess.run([model.train_op, model.total_loss], 
+                                   feed_dict={model.q:batch_q, 
+                                              model.a:batch_ap,
+                                              model.y:labels,
+                                              model.keep_prob:config.keep_prob})
+                count += 1
+                if count % 10 == 0:
+                    print('[epoch {}, batch {}]Loss:{}'.format(epoch, count, loss))
+            saver.save(sess,'{}/my_model'.format(model_path), global_step=epoch)
+            if eval_train_corpus is not None:
+                train_res = evaluate(sess, model, eval_train_corpus, config)
+                print('[train] ' + train_res)
+            if val_corpus is not None:
+                val_res = evaluate(sess, model, val_corpus, config)
+                print('[eval] ' + val_res)
+
+
+def evaluate(sess, model, corpus, config):
+    iterator = Iterator(corpus)
+
+    count = 0
+    total_qids = []
+    total_aids = []
+    total_pred = []
+    total_labels = []
+    total_loss = 0.
+    for batch_x in iterator.next(config.batch_size, shuffle=False):
+        batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
+        batch_q = np.asarray(batch_q)
+        batch_ap = np.asarray(batch_ap)
+        q_ap_cosine, loss = sess.run([model.q_a_cosine, model.total_loss], 
+                           feed_dict={model.q:batch_q, 
+                                      model.a:batch_ap, 
+                                      model.y:labels,
+                                      model.keep_prob:1.})
+        total_loss += loss
+        count += 1
+        total_qids.append(batch_qids)
+        total_aids.append(batch_aids)
+        total_pred.append(q_ap_cosine)
+        total_labels.append(labels)
+        # print(batch_qids[0], [id2word[_] for _ in batch_q[0]], 
+        #     batch_aids[0], [id2word[_] for _ in batch_ap[0]])
+    total_qids = np.concatenate(total_qids, axis=0)
+    total_aids = np.concatenate(total_aids, axis=0)
+    total_pred = np.concatenate(total_pred, axis=0)
+    total_labels = np.concatenate(total_labels, axis=0)
+    MAP, MRR = eval_map_mrr(total_qids, total_aids, total_pred, total_labels)
+    # print('Eval loss:{}'.format(total_loss / count))
+    return 'MAP:{}, MRR:{}'.format(MAP, MRR)
+                
+
+def test(corpus, config):
+    with tf.Session(config=config.cf) as sess:
+        model = SiameseNN(config)
+        saver = tf.train.Saver()
+        saver.restore(sess, tf.train.latest_checkpoint(model_path))
+        print('[test] ' + evaluate(sess, model, corpus, config))
+                    
+
+def main(args):
+    max_q_length = 25
+    max_a_length = 90
+
+    with open(os.path.join(processed_data_path, 'pointwise_corpus.pkl'), 'r') as fr:
+        train_corpus, val_corpus, test_corpus = pkl.load(fr)
+
+    embeddings = build_embedding(embedding_path, word2id)
+    
+    train_qids, train_q, train_aids, train_ap, train_labels = zip(*train_corpus)
+    train_q = padding(train_q, max_q_length)
+    train_ap = padding(train_ap, max_a_length)
+    train_corpus = zip(train_qids, train_q, train_aids, train_ap, train_labels)
+
+
+    val_qids, val_q, val_aids, val_ap, labels = zip(*val_corpus)
+    val_q = padding(val_q, max_q_length)
+    val_ap = padding(val_ap, max_a_length)
+    val_corpus = zip(val_qids, val_q, val_aids, val_ap, labels)
+
+
+    test_qids, test_q, test_aids, test_ap, labels = zip(*test_corpus)
+    test_q = padding(test_q, max_q_length)
+    test_ap = padding(test_ap, max_a_length)
+    test_corpus = zip(test_qids, test_q, test_aids, test_ap, labels)
+
+    config = NNConfig(max(word2id.values()) + 1, embeddings=embeddings)
+    config.max_q_length = max_q_length
+    config.max_a_length = max_a_length
+    if args.train:
+        train(deepcopy(train_corpus), config, val_corpus, deepcopy(train_corpus))
+    elif args.test:
+        test(test_corpus, config)
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--train",  help="whether to train", action='store_true')
+    parser.add_argument("--test",  help="whether to test", action='store_true')
+    args = parser.parse_args()
+
+    model_path = 'models'
+    raw_data_path = '../data/WikiQA/raw'
+    processed_data_path = '../data/WikiQA/processed'
+    embedding_path = '../data/embedding/glove.6B.300d.txt'
+
+    with open(os.path.join(processed_data_path, 'vocab.pkl'), 'r') as fr:
+        word2id, id2word = pkl.load(fr)
+    main(args)
diff --git a/siamese_nn/utils.py b/siamese_nn/utils.py
new file mode 100755
index 0000000..8b28136
--- /dev/null
+++ b/siamese_nn/utils.py
@@ -0,0 +1,125 @@
+# -*- encoding:utf-8 -*-
+import numpy as np
+import tensorflow as tf
+
+
+def padding(data, max_len):
+    return tf.keras.preprocessing.sequence.pad_sequences(data, max_len, padding='post', truncating='post')
+
+def eval_map_mrr(qids, aids, preds, labels):
+	# 衡量map指标和mrr指标
+    dic = dict()
+    pre_dic = dict()
+    for qid, aid, pred, label in zip(qids, aids, preds, labels):
+        pre_dic.setdefault(qid, [])
+        pre_dic[qid].append([aid, pred, label])
+    for qid in pre_dic:
+        dic[qid] = sorted(pre_dic[qid], key=lambda k: k[1], reverse=True)
+        aid2rank = {aid:[label, rank] for (rank, (aid, pred, label)) in enumerate(dic[qid])}
+        dic[qid] = aid2rank
+    # correct = 0
+    # total = 0
+    # for qid in dic:
+    #     cur_correct = 0
+    #     for aid in dic[qid]:
+    #         if dic[qid][aid][0] == 1:
+    #             cur_correct += 1
+    #     if cur_correct > 0:
+    #         correct += 1
+    #     total += 1
+    # print(correct * 1. / total)
+
+    MAP = 0.0
+    MRR = 0.0
+    useful_q_len = 0
+    for q_id in dic:
+        sort_rank = sorted(dic[q_id].items(), key=lambda k: k[1][1], reverse=False)
+        correct = 0
+        total = 0
+        AP = 0.0
+        mrr_mark = False
+        for i in range(len(sort_rank)):
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+        if correct == 0:
+            continue
+        useful_q_len += 1
+        correct = 0
+        for i in range(len(sort_rank)):
+            # compute MRR
+            if sort_rank[i][1][0] == 1 and mrr_mark == False:
+                MRR += 1.0 / float(i + 1)
+                mrr_mark = True
+            # compute MAP
+            total += 1
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+                AP += float(correct) / float(total)
+        
+        AP /= float(correct)
+        MAP += AP
+
+    MAP /= useful_q_len
+    MRR /= useful_q_len
+    return MAP, MRR
+
+def build_embedding(in_file, word_dict):
+	# 构建预训练的embedding矩阵
+    num_words = max(word_dict.values()) + 1
+    dim = int(in_file.split('.')[-2][:-1])
+    embeddings = np.zeros((num_words, dim))
+
+    if in_file is not None:
+        pre_trained = 0
+        initialized = {}
+        avg_sigma = 0
+        avg_mu = 0
+        for line in open(in_file).readlines():
+            sp = line.split()
+            assert len(sp) == dim + 1
+            if sp[0] in word_dict:
+                initialized[sp[0]] = True
+                pre_trained += 1
+                embeddings[word_dict[sp[0]]] = [float(x) for x in sp[1:]]
+                mu = embeddings[word_dict[sp[0]]].mean()
+                #print embeddings[word_dict[sp[0]]]
+                sigma = np.std(embeddings[word_dict[sp[0]]])
+                avg_mu += mu
+                avg_sigma += sigma
+        avg_sigma /= 1. * pre_trained
+        avg_mu /= 1. * pre_trained
+        for w in word_dict:
+            if w not in initialized:
+                embeddings[word_dict[w]] = np.random.normal(avg_mu, avg_sigma, (dim,))
+        print('Pre-trained: %d (%.2f%%)' %
+                     (pre_trained, pre_trained * 100.0 / num_words))
+    return embeddings.astype(np.float32)
+
+
+class Iterator(object):
+    """
+    数据迭代器
+    """
+    def __init__(self, x):
+        self.x = x
+        self.sample_num = len(self.x)
+
+    def next_batch(self, batch_size, shuffle=True):
+        # produce X, Y_out, Y_in, X_len, Y_in_len, Y_out_len
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = np.random.randint(0, self.sample_num - batch_size + 1)
+        r = l + batch_size
+        x_part = self.x[l:r]
+        return x_part
+
+    def next(self, batch_size, shuffle=False):
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = 0
+        while l < self.sample_num:
+            r = min(l + batch_size, self.sample_num)
+            batch_size = r - l
+            x_part = self.x[l:r]
+            l += batch_size
+            yield x_part
diff --git a/siamese_rnn/README.me b/siamese_rnn/README.me
new file mode 100755
index 0000000..0982db0
--- /dev/null
+++ b/siamese_rnn/README.me
@@ -0,0 +1,23 @@
+# 使用RNN网络实现问答任务
+
+## 准备
+
+#### 下载词向量文件[glove](../download.sh)。
+
+```
+cd ..
+bash download.sh
+```
+
+#### 预处理wiki数据
+
+```
+cd ..
+python preprocess_wiki.py
+```
+
+## 运行
+
+```
+bash run.sh
+```
diff --git a/siamese_rnn/models.py b/siamese_rnn/models.py
new file mode 100755
index 0000000..918eb9a
--- /dev/null
+++ b/siamese_rnn/models.py
@@ -0,0 +1,137 @@
+# -*- encoding:utf-8 -*-
+import tensorflow as tf
+import numpy as np
+
+
+class SiameseRNN(object):
+    def __init__(self, config):
+        self.config = config
+        # 输入
+        self.add_placeholders()
+        # [batch_size, sequence_size, embed_size]
+        q_embed, a_embed = self.add_embeddings()
+        with tf.variable_scope('siamese') as scope:
+            self.q_trans = self.network(q_embed)
+            tf.get_variable_scope().reuse_variables()
+            self.a_trans = self.network(a_embed)
+        # 损失和精确度
+        self.total_loss = self.add_loss_op(self.q_trans, self.a_trans)
+        # 训练节点
+        self.train_op = self.add_train_op(self.total_loss)
+
+    # 输入
+    def add_placeholders(self):
+        # 问题
+        self.q = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_q_length],
+                name='Question')
+        # 回答
+        self.a = tf.placeholder(tf.int32,
+                shape=[None, self.config.max_a_length],
+                name='Ans')
+        self.y = tf.placeholder(tf.float32, shape=[None, ], name='label')
+        # drop_out
+        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
+        self.batch_size = tf.shape(self.q)[0]
+
+    # word embeddings
+    def add_embeddings(self):
+        with tf.variable_scope('embedding'):
+            if self.config.embeddings is not None:
+                embeddings = tf.Variable(self.config.embeddings, name="embeddings", trainable=False)
+            else:
+                embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
+            q_embed = tf.nn.embedding_lookup(embeddings, self.q)
+            a_embed = tf.nn.embedding_lookup(embeddings, self.a)
+            q_embed = tf.nn.dropout(q_embed, keep_prob=self.keep_prob)
+            a_embed = tf.nn.dropout(a_embed, keep_prob=self.keep_prob)
+            return q_embed, a_embed
+
+    def network(self, x):
+        sequence_length = x.get_shape()[1]
+        # (batch_size, time_step, embed_size) -> (time_step, batch_size, embed_size)
+        inputs = tf.transpose(x, [1, 0, 2])
+        inputs = tf.reshape(inputs, [-1, self.config.embedding_size])
+        inputs = tf.split(inputs, sequence_length, 0)
+        # (batch_size, rnn_output_size)
+        rnn1 = self.rnn_layer(inputs)
+        # (batch_size, hidden_size)
+        fc1 = self.fc_layer(rnn1, self.config.hidden_size, "fc1")
+        ac1 = tf.nn.relu(fc1)
+        # (batch_size, output_size)
+        fc2 = self.fc_layer(ac1, self.config.output_size, "fc2")
+        return fc2
+
+    def fc_layer(self, bottom, n_weight, name):
+        assert len(bottom.get_shape()) == 2
+        n_prev_weight = bottom.get_shape()[1]
+        initer = tf.truncated_normal_initializer(stddev=0.01)
+        W = tf.get_variable(name+'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=initer)
+        b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.01, shape=[n_weight], dtype=tf.float32))
+        fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
+        return fc
+
+    def rnn_layer(self, h):
+        if self.config.cell_type == 'lstm':
+            birnn_fw, birnn_bw = self.bi_lstm(self.config.rnn_size, self.config.layer_size, self.config.keep_prob)
+        else:
+            birnn_fw, birnn_bw = self.bi_gru(self.config.rnn_size, self.config.layer_size, self.config.keep_prob)
+        outputs_x1, _, _ = tf.contrib.rnn.static_bidirectional_rnn(birnn_fw, birnn_bw, h, dtype=tf.float32)
+        # (time_step, batch_size, 2*rnn_size) -> (batch_size, 2*rnn_size)
+        output_x1 = tf.reduce_mean(outputs_x1, 0)
+        return output_x1
+
+    def bi_lstm(self, rnn_size, layer_size, keep_prob):
+
+        # forward rnn
+        with tf.name_scope('fw_rnn'), tf.variable_scope('fw_rnn'):
+            lstm_fw_cell_list = [tf.contrib.rnn.LSTMCell(rnn_size) for _ in xrange(layer_size)]
+            lstm_fw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(lstm_fw_cell_list), output_keep_prob=keep_prob)
+
+        # backward rnn
+        with tf.name_scope('bw_rnn'), tf.variable_scope('bw_rnn'):
+            lstm_bw_cell_list = [tf.contrib.rnn.LSTMCell(rnn_size) for _ in xrange(layer_size)]
+            lstm_bw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(lstm_fw_cell_list), output_keep_prob=keep_prob)
+
+        return lstm_fw_cell_m, lstm_bw_cell_m
+
+    def bi_gru(self, rnn_size, layer_size, keep_prob):
+
+        # forward rnn
+        with tf.name_scope('fw_rnn'), tf.variable_scope('fw_rnn'):
+            gru_fw_cell_list = [tf.contrib.rnn.GRUCell(rnn_size) for _ in xrange(layer_size)]
+            gru_fw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(gru_fw_cell_list), output_keep_prob=keep_prob)
+
+        # backward rnn
+        with tf.name_scope('bw_rnn'), tf.variable_scope('bw_rnn'):
+            gru_bw_cell_list = [tf.contrib.rnn.GRUCell(rnn_size) for _ in xrange(layer_size)]
+            gru_bw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(gru_bw_cell_list), output_keep_prob=keep_prob)
+
+        return gru_fw_cell_m, gru_bw_cell_m
+
+    # 损失节点
+    def add_loss_op(self, o1, o2):
+        # 此处用cos距离
+        norm_o1 = tf.nn.l2_normalize(o1, dim=1)
+        norm_o2 = tf.nn.l2_normalize(o2, dim=1)
+        self.q_a_cosine = tf.reduce_sum(tf.multiply(o1, o2), 1)
+
+        loss = self.contrastive_loss(self.q_a_cosine, self.y)
+        tf.add_to_collection('total_loss', loss)
+        total_loss = tf.add_n(tf.get_collection('total_loss'))
+        return total_loss
+
+    def contrastive_loss(self, Ew, y):
+        l_1 = self.config.pos_weight * tf.square(1 - Ew)
+        l_0 = tf.square(tf.maximum(Ew, 0))
+        loss = tf.reduce_mean(y * l_1 + (1 - y) * l_0)
+        return loss
+
+    # 训练节点
+    def add_train_op(self, loss):
+        with tf.name_scope('train_op'):
+            # 记录训练步骤
+            self.global_step = tf.Variable(0, name='global_step', trainable=False)
+            opt = tf.train.AdamOptimizer(self.config.lr)
+            train_op = opt.minimize(loss, self.global_step)
+            return train_op
diff --git a/siamese_rnn/run.sh b/siamese_rnn/run.sh
new file mode 100755
index 0000000..082d607
--- /dev/null
+++ b/siamese_rnn/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+
+echo "train model"
+python siamese_rnn.py --train
+
+
+echo "test model"
+python siamese_rnn.py --test
diff --git a/siamese_rnn/siamese_rnn.py b/siamese_rnn/siamese_rnn.py
new file mode 100755
index 0000000..eb35259
--- /dev/null
+++ b/siamese_rnn/siamese_rnn.py
@@ -0,0 +1,176 @@
+# -*- encoding:utf8 -*-
+import tensorflow as tf
+import numpy as np
+import os
+import sys
+from copy import deepcopy
+stdout = sys.stdout
+reload(sys)
+sys.stdout = stdout
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+import cPickle as pkl
+from utils import *
+from models import SiameseRNN
+
+
+class RNNConfig(object):
+    def __init__(self, vocab_size, embeddings=None):
+        # 输入问题(句子)长度
+        self.max_q_length = 200
+        # 输入答案长度
+        self.max_a_length = 200
+        # 循环数
+        self.num_epochs = 100
+        # batch大小
+        self.batch_size = 128
+        # 词表大小
+        self.vocab_size = vocab_size
+        # 词向量大小
+        self.embeddings = embeddings
+        self.embedding_size = 100
+        if self.embeddings is not None:
+            self.embedding_size = embeddings.shape[1]
+        # RNN单元类型和大小与堆叠层数
+        self.cell_type = 'GRU'
+        self.rnn_size = 128
+        self.layer_size = 2
+        # 隐层大小
+        self.hidden_size = 256
+        self.output_size = 128
+        # 每种filter的数量
+        self.num_filters = 128
+        self.keep_prob = 0.6
+        # 学习率
+        self.lr = 0.001
+        # contrasive loss 中的 positive loss部分的权重
+        self.pos_weight = 0.5
+
+        self.cf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
+        self.cf.gpu_options.per_process_gpu_memory_fraction = 0.2
+
+
+def train(train_corpus, config, val_corpus, eval_train_corpus=None):
+    iterator = Iterator(train_corpus)
+
+    with tf.Session(config=config.cf) as sess:
+        model = SiameseRNN(config)
+        saver = tf.train.Saver()
+        sess.run(tf.initialize_all_variables())
+        for epoch in xrange(config.num_epochs):
+            count = 0
+            for batch_x in iterator.next(config.batch_size, shuffle=True):
+                batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
+                batch_q = np.asarray(batch_q)
+                batch_ap = np.asarray(batch_ap)
+                _, loss = sess.run([model.train_op, model.total_loss], 
+                                   feed_dict={model.q:batch_q, 
+                                              model.a:batch_ap,
+                                              model.y:labels,
+                                              model.keep_prob:config.keep_prob})
+                count += 1
+                if count % 10 == 0:
+                    print('[epoch {}, batch {}]Loss:{}'.format(epoch, count, loss))
+            saver.save(sess,'{}/my_model'.format(model_path), global_step=epoch)
+            if eval_train_corpus is not None:
+                train_res = evaluate(sess, model, eval_train_corpus, config)
+                print('[train] ' + train_res)
+            if val_corpus is not None:
+                val_res = evaluate(sess, model, val_corpus, config)
+                print('[eval] ' + val_res)
+
+
+def evaluate(sess, model, corpus, config):
+    iterator = Iterator(corpus)
+
+    count = 0
+    total_qids = []
+    total_aids = []
+    total_pred = []
+    total_labels = []
+    total_loss = 0.
+    for batch_x in iterator.next(config.batch_size, shuffle=False):
+        batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
+        batch_q = np.asarray(batch_q)
+        batch_ap = np.asarray(batch_ap)
+        q_ap_cosine, loss = sess.run([model.q_a_cosine, model.total_loss], 
+                           feed_dict={model.q:batch_q, 
+                                      model.a:batch_ap, 
+                                      model.y:labels,
+                                      model.keep_prob:1.})
+        total_loss += loss
+        count += 1
+        total_qids.append(batch_qids)
+        total_aids.append(batch_aids)
+        total_pred.append(q_ap_cosine)
+        total_labels.append(labels)
+        # print(batch_qids[0], [id2word[_] for _ in batch_q[0]], 
+        #     batch_aids[0], [id2word[_] for _ in batch_ap[0]])
+    total_qids = np.concatenate(total_qids, axis=0)
+    total_aids = np.concatenate(total_aids, axis=0)
+    total_pred = np.concatenate(total_pred, axis=0)
+    total_labels = np.concatenate(total_labels, axis=0)
+    MAP, MRR = eval_map_mrr(total_qids, total_aids, total_pred, total_labels)
+    # print('Eval loss:{}'.format(total_loss / count))
+    return 'MAP:{}, MRR:{}'.format(MAP, MRR)
+                
+
+def test(corpus, config):
+    with tf.Session(config=config.cf) as sess:
+        model = SiameseRNN(config)
+        saver = tf.train.Saver()
+        saver.restore(sess, tf.train.latest_checkpoint(model_path))
+        print('[test] ' + evaluate(sess, model, corpus, config))
+                    
+
+def main(args):
+    max_q_length = 25
+    max_a_length = 90
+
+    with open(os.path.join(processed_data_path, 'pointwise_corpus.pkl'), 'r') as fr:
+        train_corpus, val_corpus, test_corpus = pkl.load(fr)
+
+    embeddings = build_embedding(embedding_path, word2id)
+    
+    train_qids, train_q, train_aids, train_ap, train_labels = zip(*train_corpus)
+    train_q = padding(train_q, max_q_length)
+    train_ap = padding(train_ap, max_a_length)
+    train_corpus = zip(train_qids, train_q, train_aids, train_ap, train_labels)
+
+
+    val_qids, val_q, val_aids, val_ap, labels = zip(*val_corpus)
+    val_q = padding(val_q, max_q_length)
+    val_ap = padding(val_ap, max_a_length)
+    val_corpus = zip(val_qids, val_q, val_aids, val_ap, labels)
+
+
+    test_qids, test_q, test_aids, test_ap, labels = zip(*test_corpus)
+    test_q = padding(test_q, max_q_length)
+    test_ap = padding(test_ap, max_a_length)
+    test_corpus = zip(test_qids, test_q, test_aids, test_ap, labels)
+
+    config = RNNConfig(max(word2id.values()) + 1, embeddings=embeddings)
+    config.max_q_length = max_q_length
+    config.max_a_length = max_a_length
+    if args.train:
+        train(deepcopy(train_corpus), config, val_corpus, deepcopy(train_corpus))
+    elif args.test:
+        test(test_corpus, config)
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--train",  help="whether to train", action='store_true')
+    parser.add_argument("--test",  help="whether to test", action='store_true')
+    args = parser.parse_args()
+
+    raw_data_path = '../data/WikiQA/raw'
+    processed_data_path = '../data/WikiQA/processed'
+    embedding_path = '../data/embedding/glove.6B.300d.txt'
+    model_path = 'models'
+
+    with open(os.path.join(processed_data_path, 'vocab.pkl'), 'r') as fr:
+        word2id, id2word = pkl.load(fr)
+    main(args)
diff --git a/siamese_rnn/utils.py b/siamese_rnn/utils.py
new file mode 100755
index 0000000..8b28136
--- /dev/null
+++ b/siamese_rnn/utils.py
@@ -0,0 +1,125 @@
+# -*- encoding:utf-8 -*-
+import numpy as np
+import tensorflow as tf
+
+
+def padding(data, max_len):
+    return tf.keras.preprocessing.sequence.pad_sequences(data, max_len, padding='post', truncating='post')
+
+def eval_map_mrr(qids, aids, preds, labels):
+	# 衡量map指标和mrr指标
+    dic = dict()
+    pre_dic = dict()
+    for qid, aid, pred, label in zip(qids, aids, preds, labels):
+        pre_dic.setdefault(qid, [])
+        pre_dic[qid].append([aid, pred, label])
+    for qid in pre_dic:
+        dic[qid] = sorted(pre_dic[qid], key=lambda k: k[1], reverse=True)
+        aid2rank = {aid:[label, rank] for (rank, (aid, pred, label)) in enumerate(dic[qid])}
+        dic[qid] = aid2rank
+    # correct = 0
+    # total = 0
+    # for qid in dic:
+    #     cur_correct = 0
+    #     for aid in dic[qid]:
+    #         if dic[qid][aid][0] == 1:
+    #             cur_correct += 1
+    #     if cur_correct > 0:
+    #         correct += 1
+    #     total += 1
+    # print(correct * 1. / total)
+
+    MAP = 0.0
+    MRR = 0.0
+    useful_q_len = 0
+    for q_id in dic:
+        sort_rank = sorted(dic[q_id].items(), key=lambda k: k[1][1], reverse=False)
+        correct = 0
+        total = 0
+        AP = 0.0
+        mrr_mark = False
+        for i in range(len(sort_rank)):
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+        if correct == 0:
+            continue
+        useful_q_len += 1
+        correct = 0
+        for i in range(len(sort_rank)):
+            # compute MRR
+            if sort_rank[i][1][0] == 1 and mrr_mark == False:
+                MRR += 1.0 / float(i + 1)
+                mrr_mark = True
+            # compute MAP
+            total += 1
+            if sort_rank[i][1][0] == 1:
+                correct += 1
+                AP += float(correct) / float(total)
+        
+        AP /= float(correct)
+        MAP += AP
+
+    MAP /= useful_q_len
+    MRR /= useful_q_len
+    return MAP, MRR
+
+def build_embedding(in_file, word_dict):
+	# 构建预训练的embedding矩阵
+    num_words = max(word_dict.values()) + 1
+    dim = int(in_file.split('.')[-2][:-1])
+    embeddings = np.zeros((num_words, dim))
+
+    if in_file is not None:
+        pre_trained = 0
+        initialized = {}
+        avg_sigma = 0
+        avg_mu = 0
+        for line in open(in_file).readlines():
+            sp = line.split()
+            assert len(sp) == dim + 1
+            if sp[0] in word_dict:
+                initialized[sp[0]] = True
+                pre_trained += 1
+                embeddings[word_dict[sp[0]]] = [float(x) for x in sp[1:]]
+                mu = embeddings[word_dict[sp[0]]].mean()
+                #print embeddings[word_dict[sp[0]]]
+                sigma = np.std(embeddings[word_dict[sp[0]]])
+                avg_mu += mu
+                avg_sigma += sigma
+        avg_sigma /= 1. * pre_trained
+        avg_mu /= 1. * pre_trained
+        for w in word_dict:
+            if w not in initialized:
+                embeddings[word_dict[w]] = np.random.normal(avg_mu, avg_sigma, (dim,))
+        print('Pre-trained: %d (%.2f%%)' %
+                     (pre_trained, pre_trained * 100.0 / num_words))
+    return embeddings.astype(np.float32)
+
+
+class Iterator(object):
+    """
+    数据迭代器
+    """
+    def __init__(self, x):
+        self.x = x
+        self.sample_num = len(self.x)
+
+    def next_batch(self, batch_size, shuffle=True):
+        # produce X, Y_out, Y_in, X_len, Y_in_len, Y_out_len
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = np.random.randint(0, self.sample_num - batch_size + 1)
+        r = l + batch_size
+        x_part = self.x[l:r]
+        return x_part
+
+    def next(self, batch_size, shuffle=False):
+        if shuffle:
+            np.random.shuffle(self.x)
+        l = 0
+        while l < self.sample_num:
+            r = min(l + batch_size, self.sample_num)
+            batch_size = r - l
+            x_part = self.x[l:r]
+            l += batch_size
+            yield x_part