In [3]:
from brnn_model_2 import *
import reader

import subprocess
import tensorflow as tf

In [4]:
"""
    Global variables
"""
model_type = "test"
data_path = "../data/"
save_path = "./saved_model"
global_prior_pi = 0.25
global_log_sigma1 = -1.0
global_log_sigma2 = -7.0
global_random_seed = 12
global_num_gpus = 0


# Model can be "test", "small", "medium", "large"
model_select = "test"

#Put the path to the data here
dat_path = "../data"

#Put the path to where you want to save the training data
sav_path = "tensorboard/"

# The mixing degree for the prior gaussian mixture
# As in Fortunato they report scanning
# mix_pi \in { 1/4, 1/2, 3/4 }
mixing_pi = 0.25

# As in Fortunato they report scanning
# log sigma1 \in { 0, -1, -2 }
# log sigma2 \in { -6, -7, -8 }
prior_log_sigma1 = -1.0
prior_log_sigma2 = -7.0


class SmallConfig(object):
    """Small config."""
    init_scale = 0.1
    learning_rate = 1.0
    max_grad_norm = 5
    num_layers = 2
    num_steps = 20
    hidden_size = 200
    max_epoch = 4
    max_max_epoch = 13
    keep_prob = 1.0
    lr_decay = 0.5
    
    batch_size = 20
    vocab_size = 10000
    
    X_dim = 200 # Size of the embedding

class MediumConfig(object):
    """
    Medium config.
    Slightly modified according to email.
    """
    init_scale = 0.05
    learning_rate = 1.0
    max_grad_norm = 5
    num_layers = 2
    num_steps = 35
    hidden_size = 650
    max_epoch = 20
    max_max_epoch = 70
    keep_prob = 1.0
    lr_decay = 0.9
    batch_size = 20
    vocab_size = 10000

    X_dim = 50 # Size of the embedding
    
class LargeConfig(object):
    """Large config."""
    init_scale = 0.04
    learning_rate = 1.0
    max_grad_norm = 10
    num_layers = 2
    num_steps = 35
    hidden_size = 1500
    max_epoch = 14
    max_max_epoch = 55
    keep_prob = 0.35
    lr_decay = 1 / 1.15
    batch_size = 20
    vocab_size = 10000

    X_dim = 100 # Size of the embedding
    
class TestConfig(object):
    """Tiny config, for testing."""
    init_scale = 0.1
    learning_rate = 1.0
    max_grad_norm = 1
    num_layers = 2
    num_steps = 20
    hidden_size = 15
    max_epoch = 1
    max_max_epoch = 1
    keep_prob = 1.0
    lr_decay = 0.5
    batch_size = 20
    vocab_size = 10000

    X_dim = 19 # Size of the embedding


#    global_random_seed = set_random_seed
    
def get_config():
    """Get model config."""
    if model_type == "small":
        config = SmallConfig()
    elif model_type == "medium":
        config = MediumConfig()
    elif model_type == "large":
        config = LargeConfig()
    elif model_type == "test":
        config = TestConfig()
    else:
        raise ValueError("Invalid model: %s", model_type)

    print ("Model Type")
    print (model_type)
    config.prior_pi = global_prior_pi
    config.log_sigma1 = global_log_sigma1
    config.log_sigma2 = global_log_sigma2

    return config

In [6]:

#    change_random_seed(global_random_seed)
raw_data = reader.ptb_raw_data(data_path)
train_data, valid_data, test_data, _ = raw_data

print (model_type)

config = get_config()
eval_config = get_config()
#eval_config.batch_size = 1
#eval_config.num_steps = 1

subprocess.Popen(["tensorboard","--logdir=tensorboard"])

with tf.Graph().as_default():
    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)

    with tf.name_scope("Train"):
        train_input = PTBInput(config=config, data=train_data, name="TrainInput")
        with tf.variable_scope("Model", reuse=None, initializer=initializer):
            m = PTBModel(is_training=True, config=config, input_=train_input)
        tf.summary.scalar("Training_Loss", m.cost)
        tf.summary.scalar("Learning_Rate", m.lr)
        tf.summary.scalar("KL Loss", m.kl_loss)
        tf.summary.scalar("Total Loss", m.total_loss)

    with tf.name_scope("Valid"):
        valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
        with tf.variable_scope("Model", reuse=True, initializer=initializer):
            mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
        tf.summary.scalar("Validation_Loss", mvalid.cost)

    with tf.name_scope("Test"):
        test_input = PTBInput(
            config=eval_config, data=test_data, name="TestInput")
        with tf.variable_scope("Model", reuse=True, initializer=initializer):
            mtest = PTBModel(is_training=False, config=eval_config,
                             input_=test_input)

    models = {"Train": m, "Valid": mvalid, "Test": mtest}
    for name, model in models.items():
        model.export_ops(name)
    metagraph = tf.train.export_meta_graph()
    soft_placement = False
    if global_num_gpus > 1:
        soft_placement = True
        util.auto_parallel(metagraph, m)




test
Model Type
test
Model Type
test
INFO:tensorflow:Summary name KL Loss is illegal; using KL_Loss instead.
INFO:tensorflow:Summary name Total Loss is illegal; using Total_Loss instead.


In [7]:

## Training !
with tf.Graph().as_default():
    tf.train.import_meta_graph(metagraph)
    for model in models.values():
        model.import_ops()
    sv = tf.train.Supervisor(logdir=save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
    with sv.managed_session(config=config_proto) as session:

        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity = run_epoch(session, m, eval_op=m.train_op,
                                         verbose=True)
            print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
            valid_perplexity = run_epoch(session, mvalid)
            print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
            
        test_perplexity = run_epoch(session, mtest)
        print("Test Perplexity: %.3f" % test_perplexity)
        
        if save_path:
            print("Saving model to %s." % save_path)
            sv.saver.save(session, save_path, global_step=sv.global_step)
    
        print ("----------------------------------------------------------------")
        print ("------------------ Prediction of Sentences ---------------------")

       #  inputs, predicted = fetch_output(session, mtest)

        costs = 0.0
        state = session.run(model.initial_state)

        inputs = []
        outputs = []
        fetches = {
            "final_state": model.final_state,
            "output": model.output,
            "input": model.input_data
        }

        for step in range(model.input.epoch_size):
            feed_dict = {}
            for i, (c, h) in enumerate(model.initial_state):
                feed_dict[c] = state[i].c
                feed_dict[h] = state[i].h

            print ("Computing batch %i/%i"%(step, model.input.epoch_size))
            vals = session.run(fetches, feed_dict)
            state = vals["final_state"]
            output = vals["output"]
            input_i = vals["input"]
            outputs.append(output)
            inputs.append(input_i)
            
            break;

INFO:tensorflow:Restoring parameters from ./saved_model/model.ckpt-0
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Saving checkpoint to path ./saved_model/model.ckpt
INFO:tensorflow:Starting queue runners.
INFO:tensorflow:Model/global_step/sec: 0
INFO:tensorflow:Recording summary at step 0.
Epoch: 1 Learning rate: 1.000
0.000 perplexity: 10137.709 speed: 727 wps
KL is 9.611169815063477
0.004 perplexity: 5492.541 speed: 2879 wps
KL is 9.61058521270752
0.104 perplexity: 973.709 speed: 3973 wps
KL is 9.64439868927002
0.204 perplexity: 879.184 speed: 3859 wps
KL is 9.647159576416016
0.304 perplexity: 836.946 speed: 3734 wps
KL is 9.664589881896973
0.404 perplexity: 814.747 speed: 3700 wps
KL is 9.670531272888184
INFO:tensorflow:Model/global_step/sec: 9.14941
INFO:tensorflow:Recording summary at step 1098.
0.504 perplexity: 793.601 speed: 3675 wps
KL is 9.686573028564453
0.604 perplexity: 756.858 speed: 3730 wps
KL is 9.691993713378906
0.703 perplexity: 722.835 speed: 3682 wps

In [25]:
## Testing
"""
print ("Testing")
predicted = []   # Variable to store predictions
with tf.Graph().as_default():
    #tf.train.import_meta_graph(metagraph)
    #for model in models.values():
    #    model.import_ops()
    #sv = tf.train.Supervisor(logdir=save_path)
    #config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
    with sv.managed_session(config=config_proto) as session:
        
       # session = tf.Session()
    
        test_perplexity = run_epoch(session, mtest)
        print("Test Perplexity: %.3f" % test_perplexity)

        print ("----------------------------------------------------------------")
        print ("------------------ Prediction of Sentences ---------------------")

       #  inputs, predicted = fetch_output(session, mtest)

        costs = 0.0
        state = session.run(model.initial_state)

        inputs = []
        outputs = []
        fetches = {
            "final_state": model.final_state,
            "output": model.output,
            "input": model.input_data
        }

        for step in range(model.input.epoch_size):
            feed_dict = {}
            for i, (c, h) in enumerate(model.initial_state):
                feed_dict[c] = state[i].c
                feed_dict[h] = state[i].h

            print ("Computing batch %i/%i"%(step, model.input.epoch_size))
            vals = session.run(fetches, feed_dict)
            state = vals["final_state"]
            output = vals["output"]
            input_i = vals["input"]
            outputs.append(output)
            inputs.append(input_i)
            
            break;
"""

Testing
INFO:tensorflow:Restoring parameters from ./saved_model/model.ckpt-0
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Saving checkpoint to path ./saved_model/model.ckpt
INFO:tensorflow:Starting queue runners.
INFO:tensorflow:Recording summary at step 0.
Test Perplexity: 10029.646
----------------------------------------------------------------
------------------ Prediction of Sentences ---------------------
Computing batch 0/206


In [8]:
print ("Input and output of the first chain of the first batch")
print (inputs[0][0])
print (outputs[0][0])

Input and output of the first chain of the first batch
[ 102   14   24   32  752  381    2   29  120    0   35   92   60  111  143
   32  616 3148  282   19]
[[  1.32543338e-03   2.65783314e-02   1.48836093e-03 ...,   6.07002657e-05
    6.44467800e-05   6.00701715e-05]
 [  8.04906420e-04   4.08028252e-02   7.98701122e-03 ...,   3.82030084e-05
    4.23077945e-05   3.96406213e-05]
 [  9.04081017e-03   5.55069894e-02   7.93239661e-03 ...,   3.27371308e-05
    3.17894919e-05   3.10358882e-05]
 ..., 
 [  4.10661921e-02   2.57064831e-02   9.40526500e-02 ...,   7.39673669e-06
    6.65309335e-06   6.96957613e-06]
 [  4.57760431e-02   3.04534305e-02   8.08057263e-02 ...,   7.61877936e-06
    6.88802356e-06   7.21738206e-06]
 [  2.11501971e-01   4.91102785e-02   4.78676893e-03 ...,   1.52480552e-05
    1.25552988e-05   1.29912460e-05]]


In [9]:
selected_words = np.argmax(outputs[0][0], axis = 1)
print (outputs[0][0].shape)
print (selected_words)

(20, 10000)
[ 1  1  1  1  1  2  0  0  0  1  1  1 13  1  1 25  5  2  2  0]
