In [0]:
import os
from google.colab import drive
drive.mount('/content/gdrive/')
os.chdir("./gdrive/My Drive/NLU/Projects/project 1/rok/")

import tensorflow as tf
import numpy as np
import pickle
import warnings
warnings.filterwarnings("ignore")

print("tf_version:\t" + tf.__version__)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "3"

from model import Model
from load_embedding import load_embedding
import utils

logger = utils.Logger("./logs/")
timer = utils.Timer()

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).
tf_version:	1.13.1


In [0]:
# -------------------------------------------------------------------------------------------------------------------- #
# DIRECTORIES
DATA_DIR = "./data/"
RESULTS_DIR = "./results/"
MODEL_DIR = "./model/"
WORD_EMBEDDINGS_FILE = "wordembeddings-dim100.word2vec"
SENTENCES_TRAIN_FILE = "sentences.train"
SENTENCES_TEST_FILE = "sentences_test.txt"
SENTENCES_EVAL_FILE = "sentences.eval"
SENTENCES_CONTINUATION_FILE = "sentences.continuation"

# LANGUAGE MODEL PARAMETERS
EMBEDDING_DIM = 100
STATE_DIM = 512
DOWN_STATE_DIM = 512
VOCABULARY_SIZE = 20000
SENT_DIM = 30
CONT_DIM = 20

# RNN PARAMETERS
BATCH_SIZE = 64
LEARNING_RATE = 0.001
MAX_GRAD_NORM = 5.0
NUM_EPOCHS = 1

with open(RESULTS_DIR + "vocabulary.pkl", "rb") as f:
    vocabulary, word_to_idx, idx_to_word = pickle.load(f)

# -------------------------------------------------------------------------------------------------------------------- #
# RUN
tf.reset_default_graph()
tf.set_random_seed(12345)
np.random.seed(12345)

# MODEL
EXPERIMENT = "C"
MODE = "G"
RESTORE = True

if EXPERIMENT == "C":
    STATE_DIM = 1024

model = Model(experiment=EXPERIMENT,
              mode=MODE,
              vocabulary_size=VOCABULARY_SIZE,
              embedding_dim=EMBEDDING_DIM,
              state_dim=STATE_DIM,
              down_state_dim=DOWN_STATE_DIM,
              sent_dim=SENT_DIM,
              cont_dim=CONT_DIM,
              initializer=tf.contrib.layers.xavier_initializer(),
              pad_idx=word_to_idx["<pad>"],
              eos_idx=word_to_idx["<eos>"],
              num_epochs=NUM_EPOCHS
              )

saver = tf.train.Saver()
timer.__enter__()

with tf.Session() as session:
    if MODE == "E":
        if not RESTORE:
            session.run(tf.global_variables_initializer())

            # LOAD EMBEDDING
            if EXPERIMENT == "B":
                load_embedding(session, word_to_idx, model.embedding_weight,
                               DATA_DIR + WORD_EMBEDDINGS_FILE, EMBEDDING_DIM,
                               VOCABULARY_SIZE)
        else:
            saver.restore(session, MODEL_DIR + "/experiment" + EXPERIMENT +
                          "/experiment" + EXPERIMENT + ".ckpt")
            print("Model restored.")
        
        # TRAINING
        session.run(model.iterator_op,
                    {model.sentences_file: RESULTS_DIR + "X_train.ids"})

        batch_count = 0
        total_batch = 60000
        while True:
            try:
                batch_loss, batch_perplexity, _ = session.run([model.loss, model.perplexity, model.optimize_op])
                epoch = 1
                if batch_count % 100 == 0:
                    print("epoch: {}/{:<6}batch: {:>5}/{:<10}loss = {:<13.2f}perp = {:<13.2f}".format(epoch, NUM_EPOCHS,
                                                                                                      batch_count + 1,
                                                                                                      total_batch,
                                                                                                      batch_loss,
                                                                                                      batch_perplexity))

                batch_count += 1
#                 if batch_count > total_batch:
#                     break
            except tf.errors.OutOfRangeError:
                break

        save_path = saver.save(session, MODEL_DIR + "/experiment" + EXPERIMENT +
                               "/experiment" + EXPERIMENT + ".ckpt")
        print("Model saved in path: %s" % save_path)

        # EVALUATION
        session.run(model.iterator_op, {model.sentences_file: RESULTS_DIR + "X_eval.ids"})
        eval_perplexities = np.array([], dtype=np.float32)
        batch_count = 0
        while True:
            try:
                batch_perplexities = session.run(model.perplexities)
                eval_perplexities = np.append(eval_perplexities, batch_perplexities)
                batch_count += 1
            except tf.errors.OutOfRangeError:
                break
        print("Evaluation finished.")

        with open(RESULTS_DIR + "groupXX.perplexity" + EXPERIMENT, "w") as f:
            for i in range(eval_perplexities.shape[0]):
                f.write("%0.3f" % eval_perplexities[i] + "\n")

    elif MODE == "G":
        saver.restore(session, MODEL_DIR + "/experiment" + EXPERIMENT +
                      "/experiment" + EXPERIMENT + ".ckpt")
        print("Model restored.")

        session.run(model.iterator_op, {model.sentences_file: RESULTS_DIR + "X_cont.ids"})


        continuation_ids = []
        batch_count = 0
        while True:
            try:
                batch_predictions = session.run(model.predictions)
                continuation_ids.append(batch_predictions)
                batch_count = batch_count + 1

                print(batch_count, end="\r")
            except tf.errors.OutOfRangeError:
                break

        continuation_ids = np.concatenate(continuation_ids, axis=0)
        print(continuation_ids.shape)

        with open(RESULTS_DIR + "groupXX.continuation", "w") as f:
            for i in range(continuation_ids.shape[0]):
                try:
                    eos_pos = continuation_ids[i, 1:].tolist().index(int(word_to_idx["<eos>"]))
                except:
                    eos_pos = 20

                gen_sent = " ".join([idx_to_word[token_id] if idx < eos_pos else "" for idx, token_id in
                                     enumerate(continuation_ids[i, 1:].tolist())])
                f.write(gen_sent + "\n")
                
timer.__exit__()

INFO:tensorflow:Restoring parameters from ./model//experimentC/experimentC.ckpt
Model restored.
(10000, 21)
Elapsed: 17.0077223777771s
