Skip to content

Commit

Permalink
更新目录结构;加入多个模型的复现
Browse files Browse the repository at this point in the history
  • Loading branch information
l11x0m7 committed Jun 13, 2018
1 parent f726884 commit e0d801d
Show file tree
Hide file tree
Showing 45 changed files with 3,743 additions and 590 deletions.
Binary file added ._download.sh
Binary file not shown.
5 changes: 4 additions & 1 deletion .gitignore
Expand Up @@ -103,4 +103,7 @@ ENV/
code/models/*
code/logs/*
data/*

*/models/
models/
*.log
*.DS_Store
68 changes: 63 additions & 5 deletions README.md
Expand Up @@ -9,14 +9,20 @@ WikiQA, TrecQA, InsuranceQA

#### data preprocess on WikiQA

`run preprocess_wiki.ipynb`

```
bash download.sh
python preprocess_wiki.py
```

### Pointwise Style

#### Siamese-NN model

This model is a simple complementation of a Siamese NN QA model with a pointwise way.

[To this repo](./siamese_nn)

##### train model

`python siamese.py --train --model NN`
Expand All @@ -29,6 +35,8 @@ This model is a simple complementation of a Siamese NN QA model with a pointwise

This model is a simple complementation of a Siamese CNN QA model with a pointwise way.

[To this repo](./siamese_cnn)

##### train model

`python siamese.py --train --model CNN`
Expand All @@ -41,6 +49,8 @@ This model is a simple complementation of a Siamese CNN QA model with a pointwis

This model is a simple complementation of a Siamese RNN/LSTM/GRU QA model with a pointwise way.

[To this repo](./siamese_rnn)

##### train model

`python siamese.py --train --model RNN`
Expand All @@ -60,6 +70,10 @@ All these three models above are based on the vanilla siamese structure. You can

Given a question, a positive answer and a negative answer, this pairwise model can rank two answers with higher ranking in terms of the right answer.

Refer to 《APPLYING DEEP LEARNING TO ANSWER SELECTION:A STUDY AND AN OPEN TASK》

[To this repo](./qacnn)

##### train model

`python qacnn.py --train`
Expand All @@ -68,11 +82,49 @@ Given a question, a positive answer and a negative answer, this pairwise model c

`python qacnn.py --test`

### Listwise Style
### Listwise Style(also can be transformed to pointwise style)

#### Compare-Aggregate model
#### Decomposable Attention Model

To be done
Refer to 《A Decomposable Attention Model for Natural Language Inference》

[To this repo](./decomposable_att_model)

##### train model

`python decomp_att.py --train`

##### test model

`python decomp_att.py --test`

#### Compare-Aggregate Model with Multi-Compare

Refer to 《A COMPARE-AGGREGATE MODEL FOR MATCHING TEXT SEQUENCES》

[To this repo](./seq_match_seq)

##### train model

`python seq_match_seq.py --train`

##### test model

`python seq_match_seq.py --test`

#### BiMPM

Refer to 《Bilateral Multi-Perspective Matching for Natural Language Sentence》

[To this repo](./bimpm)

##### train model

`python bimpm.py --train`

##### test model

`python bimpm.py --test`

## Machine Reading Comprehension

Expand Down Expand Up @@ -104,6 +156,12 @@ SQuAD, MS MARCO

To be done

#### QANet

Refer to 《QANet: Combining Local Convolution with Global Self-Attention for Reading Comprehension》

[To this repo](./QANet)

### Answer Selection Style

#### Dataset
Expand All @@ -112,4 +170,4 @@ RACE dataset

## Information

For more information, please visit http://skyhigh233.com/blog/2018/04/26/cqa-intro/.
For more information, please visit http://skyhigh233.com/blog/2018/04/26/cqa-intro/.
23 changes: 23 additions & 0 deletions bimpm/README.me
@@ -0,0 +1,23 @@
# 复现《Bilateral Multi-Perspective Matching for Natural Language Sentences》中的模型完成问答任务

## 准备

#### 下载词向量文件[glove](../download.sh)。

```
cd ..
bash download.sh
```

#### 预处理wiki数据

```
cd ..
python preprocess_wiki.py
```

## 运行

```
bash run.sh
```
175 changes: 175 additions & 0 deletions bimpm/bimpm.py
@@ -0,0 +1,175 @@
# -*- encoding:utf8 -*-
import tensorflow as tf
import numpy as np
import os
import sys
from copy import deepcopy
stdout = sys.stdout
reload(sys)
sys.stdout = stdout

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import cPickle as pkl
from utils import *
from models import BiMPM


class BiMPMConfig(object):
def __init__(self, vocab_size, embeddings=None):
# 输入问题(句子)长度
self.max_q_length = 200
# 输入答案长度
self.max_a_length = 200
# 循环数
self.num_epochs = 100
# batch大小
self.batch_size = 128
# 词表大小
self.vocab_size = vocab_size
# 词向量大小
self.embeddings = embeddings
self.embedding_size = 100
if self.embeddings is not None:
self.embedding_size = embeddings.shape[1]
# keep_prob=1-dropout
self.keep_prob = 0.6
# 学习率
self.lr = 0.0003
self.grad_clip = 1

self.reg = 0
self.mem_dim = 128
self.cov_dim = 128
self.filter_sizes = [2, 3, 4, 5]
self.comp_type = 'mul'

self.cf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
self.cf.gpu_options.per_process_gpu_memory_fraction = 0.2


def train(train_corpus, config, val_corpus, eval_train_corpus=None):
iterator = Iterator(train_corpus)

with tf.Session(config=config.cf) as sess:
model = BiMPM(config)
saver = tf.train.Saver()
sess.run(tf.initialize_all_variables())
for epoch in xrange(config.num_epochs):
count = 0
for batch_x in iterator.next(config.batch_size, shuffle=True):
batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
batch_q = np.asarray(batch_q)
batch_ap = np.asarray(batch_ap)
labels = np.asarray(labels).astype(np.int32)
_, loss = sess.run([model.train_op, model.total_loss],
feed_dict={model.q:batch_q,
model.a:batch_ap,
model.y:labels,
model.keep_prob:config.keep_prob})
count += 1
if count % 10 == 0:
print('[epoch {}, batch {}]Loss:{}'.format(epoch, count, loss))
saver.save(sess,'{}/my_model'.format(model_path), global_step=epoch)
if eval_train_corpus is not None:
train_res = evaluate(sess, model, eval_train_corpus, config)
print('[train] ' + train_res)
if val_corpus is not None:
val_res = evaluate(sess, model, val_corpus, config)
print('[eval] ' + val_res)


def evaluate(sess, model, corpus, config):
iterator = Iterator(corpus)

count = 0
total_qids = []
total_aids = []
total_pred = []
total_labels = []
total_loss = 0.
for batch_x in iterator.next(config.batch_size, shuffle=False):
batch_qids, batch_q, batch_aids, batch_ap, labels = zip(*batch_x)
batch_q = np.asarray(batch_q)
batch_ap = np.asarray(batch_ap)
y_hat, loss = sess.run([model.y_hat, model.total_loss],
feed_dict={model.q:batch_q,
model.a:batch_ap,
model.y:labels,
model.keep_prob:1.})
y_hat = np.argmax(y_hat, axis=-1)
total_loss += loss
count += 1
total_qids.append(batch_qids)
total_aids.append(batch_aids)
total_pred.append(y_hat)
total_labels.append(labels)
# print(batch_qids[0], [id2word[_] for _ in batch_q[0]],
# batch_aids[0], [id2word[_] for _ in batch_ap[0]])
total_qids = np.concatenate(total_qids, axis=0)
total_aids = np.concatenate(total_aids, axis=0)
total_pred = np.concatenate(total_pred, axis=0)
total_labels = np.concatenate(total_labels, axis=0)
MAP, MRR = eval_map_mrr(total_qids, total_aids, total_pred, total_labels)
# print('Eval loss:{}'.format(total_loss / count))
return 'MAP:{}, MRR:{}'.format(MAP, MRR)


def test(corpus, config):
with tf.Session(config=config.cf) as sess:
model = BiMPM(config)
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint(model_path))
print('[test] ' + evaluate(sess, model, corpus, config))


def main(args):
max_q_length = 25
max_a_length = 90

with open(os.path.join(processed_data_path, 'pointwise_corpus.pkl'), 'r') as fr:
train_corpus, val_corpus, test_corpus = pkl.load(fr)

embeddings = build_embedding(embedding_path, word2id)

train_qids, train_q, train_aids, train_ap, train_labels = zip(*train_corpus)
train_q = padding(train_q, max_q_length)
train_ap = padding(train_ap, max_a_length)
train_corpus = zip(train_qids, train_q, train_aids, train_ap, train_labels)


val_qids, val_q, val_aids, val_ap, labels = zip(*val_corpus)
val_q = padding(val_q, max_q_length)
val_ap = padding(val_ap, max_a_length)
val_corpus = zip(val_qids, val_q, val_aids, val_ap, labels)


test_qids, test_q, test_aids, test_ap, labels = zip(*test_corpus)
test_q = padding(test_q, max_q_length)
test_ap = padding(test_ap, max_a_length)
test_corpus = zip(test_qids, test_q, test_aids, test_ap, labels)

config = BiMPMConfig(max(word2id.values()) + 1, embeddings=embeddings)
config.max_q_length = max_q_length
config.max_a_length = max_a_length
if args.train:
train(deepcopy(train_corpus), config, val_corpus, deepcopy(train_corpus))
elif args.test:
test(test_corpus, config)


if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--train", help="whether to train", action='store_true')
parser.add_argument("--test", help="whether to test", action='store_true')
args = parser.parse_args()

raw_data_path = '../data/WikiQA/raw'
processed_data_path = '../data/WikiQA/processed'
embedding_path = '../data/embedding/glove.6B.300d.txt'
model_path = 'models'

with open(os.path.join(processed_data_path, 'vocab.pkl'), 'r') as fr:
word2id, id2word = pkl.load(fr)
main(args)

0 comments on commit e0d801d

Please sign in to comment.