Skip to content

Commit

Permalink
run on CRC
Browse files Browse the repository at this point in the history
add twacg
change -must_appear_in_src to default False
  • Loading branch information
memray committed Oct 1, 2018
1 parent 1d1cf41 commit 7f748b8
Show file tree
Hide file tree
Showing 8 changed files with 282 additions and 152 deletions.
9 changes: 7 additions & 2 deletions config.py
Expand Up @@ -68,6 +68,8 @@ def init_opt(description):
opt.test_dataset_names = ['inspec', 'nus', 'semeval', 'krapivin', 'kp20k', 'duc']
elif opt.exp.startswith('stackexchange'):
opt.test_dataset_names = ['stackexchange']
elif opt.exp.startswith('twacg'):
opt.test_dataset_names = ['twacg']
else:
raise Exception('Unsupported training data')

Expand Down Expand Up @@ -430,10 +432,13 @@ def train_opts(parser):
help='Maximum sentence length.')

def predict_opts(parser):
parser.add_argument('-must_appear_in_src', action="store_true", default="True",
parser.add_argument('-must_appear_in_src', action='store_true', default=False,
help='whether the predicted sequences must appear in the source text')

parser.add_argument('-report_score_names', type=str, nargs='+', default=['f_score@5_exact', 'f_score@5_soft', 'f_score@10_exact', 'f_score@10_soft'], help="""Default measure to report""")
parser.add_argument('-report_score_names', type=str, nargs='+',
# default=['f_score@5_exact', 'f_score@10_exact', 'f_score@5_soft', 'f_score@10_soft'],
default=['f_score@5_exact', 'f_score@10_exact'],
help="""Default measure to report""")

parser.add_argument('-test_dataset_root_path', type=str, default="data/")

Expand Down
42 changes: 30 additions & 12 deletions evaluate.py
Expand Up @@ -157,26 +157,41 @@ def if_present_duplicate_phrases(src_str, trgs_str, do_stemming=True, check_dupl


def evaluate_multiple_datasets(generator, data_loaders, opt, title='', epoch=1, predict_save_path=None):
score_dict_list = []
# return the scores of all examples in multiple datasets
datasets_score_dict = {}
for dataset_name, data_loader in zip(opt.test_dataset_names, data_loaders):
logging.getLogger().info('Evaluating %s' % dataset_name)
score_dict = evaluate_beam_search(generator, data_loader, opt,
title=title + '_' + dataset_name, epoch=epoch,
title=dataset_name + '.' + title, epoch=epoch,
predict_save_path=os.path.join(predict_save_path, dataset_name))
score_dict_list.append(score_dict)

# concatenate the scores of all examples in multiple datasets
# write the scores into file
score_json_path = os.path.join(predict_save_path, dataset_name, '%s.%s.detailed_score.json' % (dataset_name, title))
with open(score_json_path, 'w') as score_json:
score_json.write(json.dumps(score_dict))

# return a dict, key is dataset name and value is another dict of scores
datasets_score_dict[dataset_name] = score_dict

# empty dataset to free memory
data_loader.dataset.offload_dataset()

# create a new tuple (key='all_datasets') by merging all results
merged_score_dict = {}
for k in score_dict_list[0].keys():
merged_score_dict[k] = np.concatenate([d[k] for d in score_dict_list])
for dataset_name, score_dict in datasets_score_dict.items():
for score_name, score_values in score_dict.items():
merged_score_values = merged_score_dict.get(score_name, [])
merged_score_values.extend(score_values)
merged_score_dict[score_name] = merged_score_values
datasets_score_dict['all_datasets'] = merged_score_dict

return merged_score_dict
return datasets_score_dict


def evaluate_beam_search(generator, data_loader, opt, title='', epoch=1, predict_save_path=None):
logger = config.init_logging(title, predict_save_path + '/%s.log' % title, redirect_to_stdout=False)
progbar = Progbar(logger=logger, title=title, target=len(data_loader.dataset.examples), batch_size=data_loader.batch_size,
total_examples=len(data_loader.dataset.examples))
progbar = Progbar(logger=logger, title=title, target=len(data_loader), batch_size=data_loader.batch_size,
total_examples=len(data_loader.dataset))

topk_range = [5, 10]
score_names = ['precision', 'recall', 'f_score']
Expand Down Expand Up @@ -236,20 +251,21 @@ def evaluate_beam_search(generator, data_loader, opt, title='', epoch=1, predict
filtered_trg_str_seqs = np.asarray(trg_str_seqs)[trg_str_is_present_flags]
else:
pred_is_present_flags = [True] * len(processed_pred_str_seqs)
filtered_trg_str_seqs = trg_str_seqs

valid_and_present = np.asarray(pred_is_valid_flags) * np.asarray(pred_is_present_flags)
match_list = get_match_result(true_seqs=filtered_trg_str_seqs, pred_seqs=processed_pred_str_seqs)
print_out += '[PREDICTION] #(valid)=%d, #(present)=%d, #(retained&present)=%d, #(all)=%d\n' % (sum(pred_is_valid_flags), sum(pred_is_present_flags), sum(valid_and_present), len(pred_seq))
print_out += ''

'''
Print and export predictions
Iterate every prediction, print and export predictions
'''
preds_out = ''
for p_id, (seq, word, score, match, is_valid, is_present) in enumerate(
zip(processed_pred_seqs, processed_pred_str_seqs, processed_pred_score, match_list, pred_is_valid_flags, pred_is_present_flags)):
# if p_id > 5:
# break

preds_out += '%s\n' % (' '.join(word))
if is_present:
print_phrase = '[%s]' % ' '.join(word)
Expand Down Expand Up @@ -327,8 +343,10 @@ def evaluate_beam_search(generator, data_loader, opt, title='', epoch=1, predict
if predict_save_path:
if not os.path.exists(os.path.join(predict_save_path, title + '_detail')):
os.makedirs(os.path.join(predict_save_path, title + '_detail'))
# write print-out
with open(os.path.join(predict_save_path, title + '_detail', str(example_idx) + '_print.txt'), 'w') as f_:
f_.write(print_out)
# write original predictions
with open(os.path.join(predict_save_path, title + '_detail', str(example_idx) + '_prediction.txt'), 'w') as f_:
f_.write(preds_out)

Expand Down Expand Up @@ -365,8 +383,8 @@ def evaluate_beam_search(generator, data_loader, opt, title='', epoch=1, predict
logger.info('#(f_score@5_exact)=%d, sum=%f' % (len(score_dict['f_score@5_exact']), sum(score_dict['f_score@5_exact'])))
logger.info('#(f_score@10_exact)=%d, sum=%f' % (len(score_dict['f_score@10_exact']), sum(score_dict['f_score@10_exact'])))

# Write score summary to disk. Each row is scores (precision, recall and f-score)
if predict_save_path:
# export scores. Each row is scores (precision, recall and f-score) of different way of filtering predictions (how many one-word predictions to keep)
with open(predict_save_path + os.path.sep + title + '_result.csv', 'w') as result_csv:
csv_lines = []
for mode in ["exact", "soft"]:
Expand Down
87 changes: 22 additions & 65 deletions predict.py
@@ -1,22 +1,15 @@
# -*- coding: utf-8 -*-
import os
import sys
import argparse
from evaluate import evaluate_beam_search
from evaluate import evaluate_beam_search, evaluate_multiple_datasets
import logging
import numpy as np

import config
import utils

import torch
import torch.nn as nn
from torch import cuda

from beam_search import SequenceGenerator
from pykp.dataloader import KeyphraseDataLoader
from train import load_data_vocab, init_model, init_optimizer_criterion
from utils import Progbar, plot_learning_curve_and_write_csv
from train import init_model, load_vocab_and_datasets_for_testing

import pykp
from pykp.io import KeyphraseDatasetTorchText, KeyphraseDataset
Expand All @@ -26,56 +19,6 @@

logger = logging.getLogger()

def load_vocab_and_datasets(dataset_names, type, opt):
'''
Load additional datasets from disk
For now seven datasets are included: 'inspec', 'nus', 'semeval', 'krapivin', 'kp20k', 'duc', 'stackexchange'
Only 'kp20k', 'stackexchange' provide train/valid/test data.
The others have only train/test, and the train is mostly used for validation.
:param type:
:param opt:
:return:
'''
assert type == 'test' or type == 'valid'

logger.info("Loading vocab from disk: %s" % (opt.vocab_path))
word2id, id2word, vocab = torch.load(opt.vocab_path, 'rb')
logger.info('#(vocab)=%d' % len(vocab))

pin_memory = torch.cuda.is_available()
one2many_loaders = []

for dataset_name in dataset_names:
logger.info("Loading test dataset %s" % dataset_name)
if type == 'test':
dataset_path = os.path.join(opt.test_dataset_root_path, dataset_name, dataset_name + '.test.one2many.pt')
elif type == 'valid' and dataset_name in ['kp20k', 'stackexchange']:
dataset_path = os.path.join(opt.test_dataset_root_path, dataset_name, dataset_name + '.valid.one2many.pt')
elif type == 'valid' and dataset_name in ['inspec', 'nus', 'semeval', 'krapivin', 'duc']:
dataset_path = os.path.join(opt.test_dataset_root_path, dataset_name, dataset_name + '.train.one2many.pt')
else:
raise Exception('Unsupported dataset: %s, type=%s' % (dataset_name, type))

one2many = torch.load(dataset_path, 'wb')
one2many_dataset = KeyphraseDataset(one2many, word2id=word2id, id2word=id2word, type='one2many', include_original=True)
one2many_loader = KeyphraseDataLoader(dataset=one2many_dataset,
collate_fn=one2many_dataset.collate_fn_one2many,
num_workers=opt.batch_workers,
max_batch_example=opt.beam_search_batch_example,
max_batch_pair=opt.beam_search_batch_size,
pin_memory=pin_memory,
shuffle=False)

one2many_loaders.append(one2many_loader)

logger.info('#(%s data size: #(one2many pair)=%d, #(one2one pair)=%d, #(batch)=%d' %
(type, len(one2many_loader.dataset),
one2many_loader.one2one_number(),
len(one2many_loader)))
logger.info('*' * 50)

return one2many_loaders, word2id, id2word, vocab


def main():
opt = config.init_opt(description='predict.py')
Expand All @@ -95,19 +38,33 @@ def main():
logger.info('Running on CPU!')

try:
test_data_loaders, word2id, id2word, vocab = load_vocab_and_datasets(opt)
valid_data_loaders, word2id, id2word, vocab = load_vocab_and_datasets_for_testing(dataset_names=opt.test_dataset_names, type='valid', opt=opt)
test_data_loaders, _, _, _ = load_vocab_and_datasets_for_testing(dataset_names=opt.test_dataset_names, type='test', opt=opt)

opt.word2id = word2id
opt.id2word = id2word
opt.vocab = vocab

model = init_model(opt)
generator = SequenceGenerator(model,
eos_id=opt.word2id[pykp.io.EOS_WORD],
beam_size=opt.beam_size,
max_sequence_length=opt.max_sent_length
)

for testset_name, test_data_loader in zip(opt.test_dataset_names, test_data_loaders):
logger.info('Evaluating %s' % testset_name)
evaluate_beam_search(generator, test_data_loader, opt,
title='test_%s' % testset_name,
predict_save_path=opt.pred_path + '/%s_test_result/' % (testset_name))
valid_score_dict = evaluate_multiple_datasets(generator, valid_data_loaders, opt,
title='valid',
predict_save_path=opt.pred_path)
test_score_dict = evaluate_multiple_datasets(generator, test_data_loaders, opt,
title='test',
predict_save_path=opt.pred_path)

# test_data_loaders, word2id, id2word, vocab = load_vocab_and_datasets(opt)
# for testset_name, test_data_loader in zip(opt.test_dataset_names, test_data_loaders):
# logger.info('Evaluating %s' % testset_name)
# evaluate_beam_search(generator, test_data_loader, opt,
# title='test_%s' % testset_name,
# predict_save_path=opt.pred_path + '/%s_test_result/' % (testset_name))

except Exception as e:
logger.error(e, exc_info=True)
Expand Down
13 changes: 9 additions & 4 deletions preprocess.py
Expand Up @@ -44,9 +44,15 @@ def main():
if opt.dataset_name == 'kp20k':
src_fields = ['title', 'abstract']
trg_fields = ['keyword']
valid_check=True
elif opt.dataset_name == 'stackexchange':
src_fields = ['title', 'question']
trg_fields = ['tags']
valid_check=True
elif opt.dataset_name == 'twacg':
src_fields = ['observation']
trg_fields = ['admissible_commands']
valid_check=False
else:
raise Exception('Unsupported dataset name=%s' % opt.dataset_name)

Expand All @@ -56,21 +62,21 @@ def main():
src_fields=src_fields,
trg_fields=trg_fields,
opt=opt,
valid_check=True)
valid_check=valid_check)

tokenized_valid_pairs = pykp.io.load_src_trgs_pairs(source_json_path=opt.source_valid_file,
dataset_name=opt.dataset_name,
src_fields=src_fields,
trg_fields=trg_fields,
opt=opt,
valid_check=False)
valid_check=valid_check)

tokenized_test_pairs = pykp.io.load_src_trgs_pairs(source_json_path=opt.source_test_file,
dataset_name=opt.dataset_name,
src_fields=src_fields,
trg_fields=trg_fields,
opt=opt,
valid_check=False)
valid_check=valid_check)

print("Building Vocab...")
word2id, id2word, vocab = pykp.io.build_vocab(tokenized_train_pairs, opt)
Expand Down Expand Up @@ -108,7 +114,6 @@ def main():
data_type='test',
include_original=True)


print("Exporting complete dataset to %s" % opt.output_path)
pykp.io.process_and_export_dataset(tokenized_train_pairs,
word2id, id2word,
Expand Down
2 changes: 0 additions & 2 deletions pykp/data/test_dataset_producer.py
Expand Up @@ -337,7 +337,6 @@ def load_testset_from_json_and_add_pos_tag():
json_path = os.path.join(basedir, dataset_name, dataset_name + '_testing_postag.json')
with open(json_path, 'w') as json_file:
# postag title/abstract and insert into data example
postag_dataset_dict_list = []
for e_id, example_dict in enumerate(dataset_dict_list):
print('=' * 50)
print(e_id)
Expand All @@ -358,7 +357,6 @@ def load_testset_from_json_and_add_pos_tag():
# print('#(abstract token)=%d : %s' % (len(abstract_postag_tokens), str(abstract_postag_tokens)))
example_dict['title_postag'] = ' '.join([str(t[0])+'_'+str(t[1]) for t in title_postag_tokens])
example_dict['abstract_postag'] = ' '.join([str(t[0])+'_'+str(t[1]) for t in abstract_postag_tokens])
postag_dataset_dict_list.append(example_dict)

# for example_dict in postag_dataset_dict_list:
json_file.write(json.dumps(example_dict) + '\n')
Expand Down
4 changes: 2 additions & 2 deletions pykp/dataloader.py
Expand Up @@ -292,9 +292,9 @@ class KeyphraseDataLoader(object):

def __init__(self, dataset, max_batch_example=5, max_batch_pair=1, shuffle=False, sampler=None, batch_sampler=None,
num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False):
self.dataset = dataset
self.dataset = dataset
# used for generating one2many batches
self.num_trgs = [len(e['trg']) for e in dataset.examples]
self.num_trgs = [len(e['trg']) for e in dataset.get_examples()]
self.batch_size = max_batch_pair
self.max_example_number = max_batch_example
self.num_workers = num_workers
Expand Down

0 comments on commit 7f748b8

Please sign in to comment.