In [1]:
import logging
import math
import os
import random
import sys
import time

import numpy as np
import tensorflow as tf

import bigru_model
import data_util


In [2]:
model = ['model', '140000']
ckpt = '../model/model.ckpt-140000'
datasets = ["giga", "duc2003", "duc2004"]
geneos = [True, False, False]
beam_searchs = [1, 10]

test_params = {
    "--decode": True,
    "--fast_decode": True
}

data_pattern = "data/test.{}.txt"
OUTPUT_DIR = "../output/"
OUTPUT_PATTERN = OUTPUT_DIR + "{dataset}.{description}.txt"

In [3]:
doc_dict = data_util.load_dict("../data/doc_dict.txt")
sum_dict = data_util.load_dict("../data/sum_dict.txt")
if doc_dict is None or sum_dict is None:
    logging.warning("Dict not found.")

In [4]:
test_file = '../data/test.udc2004.txt'
data = data_util.load_test_data(test_file, doc_dict)

for i in range(3):
    data_in = data[i]
    data_str = ' '.join([doc_dict[1][x] for x in data_in])
    print('%s\n' % data_str)

cambodian leader hun sen on friday rejected opposition parties ' demands for talks outside the country , accusing them of trying to `` <UNK> '' the political crisis .

king norodom sihanouk has declined requests to chair a summit of cambodia 's top political leaders , saying the meeting would not bring any progress in deadlocked negotiations to form a government .

cambodia 's <UNK> opposition asked the asian development bank monday to stop providing loans to the incumbent government , which it calls illegal .



In [5]:
tf.app.flags.DEFINE_float("learning_rate", 1., "Learning rate.")
tf.app.flags.DEFINE_integer("size", 400, "Size of hidden layers.")
tf.app.flags.DEFINE_integer("embsize", 200, "Size of embedding.")
tf.app.flags.DEFINE_integer("num_layers", 1, "Number of layers in the model.")
tf.app.flags.DEFINE_string("data_dir", "data", "Data directory")
tf.app.flags.DEFINE_string("test_file", "", "Test filename.")
tf.app.flags.DEFINE_string("test_output", "output.txt", "Test output.")
tf.app.flags.DEFINE_string("train_dir", "model", "Training directory.")
tf.app.flags.DEFINE_string("tfboard", "tfboard", "Tensorboard log directory.")
tf.app.flags.DEFINE_boolean("decode", False, "Set to True for testing.")
tf.app.flags.DEFINE_boolean("geneos", True, "Do not generate EOS. ")
tf.app.flags.DEFINE_float(
    "max_gradient", 1.0, "Clip gradients l2 norm to this range.")
tf.app.flags.DEFINE_integer(
    "batch_size", 10, "Batch size in training / beam size in testing.")
tf.app.flags.DEFINE_integer(
    "doc_vocab_size", 30000, "Document vocabulary size.")
tf.app.flags.DEFINE_integer(
    "sum_vocab_size", 30000, "Summary vocabulary size.")
tf.app.flags.DEFINE_integer(
    "max_train", 0, "Limit on the size of training data (0: no limit).")
tf.app.flags.DEFINE_integer(
    "max_iter", 1000000, "Maximum training iterations.")
tf.app.flags.DEFINE_integer(
    "steps_per_validation", 1000, "Training steps between validations.")
tf.app.flags.DEFINE_integer(
    "steps_per_checkpoint", 10000, "Training steps between checkpoints.")
tf.app.flags.DEFINE_string(
    "checkpoint", "", "Checkpoint to load (use up-to-date if not set)")

FLAGS = tf.app.flags.FLAGS


In [6]:
_buckets = [(30, 10), (50, 20), (70, 20), (100, 20), (200, 30)]

logging.info("Creating %d layers of %d units." %
             (FLAGS.num_layers, FLAGS.size))

In [7]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

In [8]:
dtype = tf.float32
forward_only = True
model = bigru_model.BiGRUModel(
    FLAGS.doc_vocab_size,
    FLAGS.sum_vocab_size,
    _buckets,
    FLAGS.size,
    FLAGS.num_layers,
    FLAGS.embsize,
    FLAGS.max_gradient,
    FLAGS.batch_size,
    FLAGS.learning_rate,
    forward_only=forward_only,
    dtype=dtype)

In [9]:
ckpt = '../model/model.ckpt-140000'
logging.info("从 %s 读入模型参数" % ckpt)
model.saver.restore(sess, ckpt)

INFO:tensorflow:Restoring parameters from ../model/model.ckpt-140000


INFO:tensorflow:Restoring parameters from ../model/model.ckpt-140000


In [10]:
for idx in range(3):
    token_ids = data[idx]
    data_str = ' '.join([doc_dict[1][x] for x in token_ids])
    print('%s\n' % data_str)

cambodian leader hun sen on friday rejected opposition parties ' demands for talks outside the country , accusing them of trying to `` <UNK> '' the political crisis .

king norodom sihanouk has declined requests to chair a summit of cambodia 's top political leaders , saying the meeting would not bring any progress in deadlocked negotiations to form a government .

cambodia 's <UNK> opposition asked the asian development bank monday to stop providing loans to the incumbent government , which it calls illegal .



cambodian leader hun sen on friday rejected opposition parties ' demands for talks outside the country , accusing them of trying to `<UNK>` the political crisis .

柬埔寨领导人星期五拒绝了反对党在国外进行谈判的要求，指责他们试图`<unk>`政治危机。

king norodom sihanouk has declined requests to chair a summit of cambodia 's top political leaders , saying the meeting would not bring any progress in deadlocked negotiations to form a government .

国王诺罗蒙·西哈努克国王拒绝主持首脑会议的柬埔寨最高政治领导人的话，说会议不会在僵局谈判中取得任何进展，形成政府。

cambodia 's `<UNK>` opposition asked the asian development bank monday to stop providing loans to the incumbent government , which it calls illegal .

柬埔寨的反对派要求亚洲开发银行星期一停止向现任政府提供贷款，这称为非法活动。

In [11]:
outputs = []
inputs = []

for idx in range(0, 100, 10):
    token_ids = data[idx]
    # Get a 1-element batch to feed the sentence to the model.
    encoder_inputs, decoder_inputs, encoder_len, decoder_len =\
    model.get_batch(
        {0: [(token_ids, [data_util.ID_GO, data_util.ID_EOS])]}, 0)
    inputs.append(token_ids)
    outputs.append(model.step_beam(
        sess, encoder_inputs, encoder_len, geneos=FLAGS.geneos))

In [12]:
# If there is an EOS symbol in outputs, cut them at that point.
for (input_, output_) in zip(inputs, outputs):
    if data_util.ID_EOS in output_:
        gen_inp = " ".join(data_util.sen_map2tok(input_, doc_dict[1]))
        gen_inp = data_util.sen_postprocess(gen_inp)
        print('%s\n' % gen_inp)
        output_ = output_[:output_.index(data_util.ID_EOS)]
        gen_sum = " ".join(data_util.sen_map2tok(output_, sum_dict[1]))
        gen_sum = data_util.sen_postprocess(gen_sum)
        print('%s\n\n' % gen_sum)

cambodian leader hun sen on friday rejected opposition parties ' demands for talks outside the country , accusing them of trying to `` <UNK> '' the political crisis .

cambodian leader rejects opposition demands for talks


honduras braced for potential catastrophe tuesday as hurricane mitch roared through the northwest caribbean , churning up high waves and intense rain that sent coastal residents scurrying for safer ground .

honduras braces for potential catastrophe


cuban president fidel castro said sunday he disagreed with the arrest in london of former chilean dictator augusto pinochet , calling it a case of `` international meddling . ''

castro says he disagrees with arrest of pinochet


u.s. prosecutors have asked for a ##-day extension to provide germany with paperwork necessary to extradite a top lieutenant of saudi terrorist suspect osama bin laden , officials said saturday .

prosecutors ask for extension to extradite bin laden suspect


in a critical ruling for the north

1. 
    * cambodian leader hun sen on friday rejected opposition parties ' demands for talks outside the country , accusing them of trying to `<UNK>` the political crisis .
    * 柬埔寨领导人星期五拒绝了反对党在国外进行谈判的要求，指责他们试图“拒绝”政治危机。
    * cambodian leader rejects opposition demands for talks
    * 柬埔寨领导人反对反对的谈判要求

2. 
    * honduras braced for potential catastrophe tuesday as hurricane mitch roared through the northwest caribbean , churning up high waves and intense rain that sent coastal residents scurrying for safer ground .
    * 洪都拉斯抵抗潜在的灾难，因为飓风米奇咆哮着通过西北加勒比地区，搅动了高波浪和强烈的下雨，使沿海居民为更安全的地面而起飞。
    * honduras braces for potential catastrophe
    * 洪都拉斯支配潜在的灾难

3. 
    * cuban president fidel castro said sunday he disagreed with the arrest in london of former chilean dictator augusto pinochet , calling it a case of '' international meddling . ''
    * 古巴总统费德尔·卡斯特罗说，他星期天不同意在前司令奥古斯丁·皮诺切特在伦敦被捕，称之为“国际干涉”。 “”
    * castro says he disagrees with arrest of pinochet
    * 卡斯特罗说，他不同意逮捕轻罪

4. 
    * u.s. prosecutors have asked for a ##-day extension to provide germany with paperwork necessary to extradite a top lieutenant of saudi terrorist suspect osama bin laden , officials said saturday .
    * 我们。官员星期六说，检察官已经要求延长日期，为德国提供必要的文书工作，以引渡沙特恐怖嫌疑人奥萨马·本·拉登的顶尖中尉。
    * prosecutors ask for extension to extradite bin laden suspect
    * 检察机关要求延长引渡人员的嫌犯

5. 
    * in a critical ruling for the north american national basketball association and the players ' union , arbitrator john `<UNK>` decides monday whether more than ### players with guaranteed contracts should be paid during the lockout .
    * 在北美国家篮球协会和球员工会，仲裁员约翰·`<UNK>`关键裁决决定是否周一超过###的球员保障合同应该在停摆期间支付。
    * `<UNK>` wants more than ### players
    * `<UNK>`要超过###的球员

6. 
    * rebels attacked a village in western uganda and killed six civilians before soldiers drove them off , a military spokesman said thursday .
    * 一名军事发言人星期四说，反叛分子袭击了乌干达西部的一个村庄，造成六名平民死亡，士兵们将他们赶走。
    * rebels attack village in western uganda
    * 叛乱分子袭击乌干达西部的村庄

7. 
    * indonesian president `<UNK>` , habibie finds attending a summit of asia-pacific leaders '' difficult '' because of his concerns about the arrest of malaysia 's former deputy prime minister , a thai newspaper reported sunday .
    * 印度尼西亚总统伊斯兰教，哈比比发现参加亚太地区领导人首脑会议“困难”，因为他担心马来西亚前副总理被捕，一名泰国报纸上周日报道。
    * indonesian president says `<UNK>` summit difficult
    * 印尼总统称`<UNK>`峰会难

8. 
    * israel 's cabinet announced within hours of a market bombing friday that it will put off a vote indefinitely on whether to ratify the `<UNK>` river accord until palestinians crack down further on terrorism .
    * 以色列内阁市场轰炸小时内周五宣布，它将推迟表决无限期，直到巴勒斯坦人进一步打击恐怖主义是否批准`<UNK>`河流一致。
    * israeli cabinet to put off vote
    * 以色列内阁推迟投票

9. 
    * bruises on the face of jailed dissident anwar ibrahim , splashed on newspaper front pages for two days and downloaded from the internet , are `<UNK>` the image of malaysian police .
    * 面对被监禁的持不同政见者伊娃的脸上的瘀伤，在报纸头版上飞溅了两天，并从互联网上下载，是马来西亚警察的形象。
    * malaysia 's anwar `<UNK>` 's image
    * 马来西亚的安华`<UNK>`的图像

10. 
    * u.s. special envoy richard holbrooke said monday the military situation in kosovo was as bad now as two weeks ago .
    * 我们。特使理查德·霍尔布鲁克星期一说，科索沃的军事状况现在在两个星期前还是糟糕的。
    * holbrooke says situation in kosovo\
    * 霍尔布鲁克说，科索沃的局势

cambodian opposition asks adb to stop loans to government

柬埔寨反对党要求阿布政府停止贷款