In [None]:
import random
import torch
import numpy as np
import gensim
import gensim.downloader as glove_api
import os

from matplotlib import pyplot as pl
import pickle

from ZorkGym.text_utils.text_parser import BagOfWords, Word2Vec, TextParser, tokenizer
from agents.OMP_DDPG import OMPDDPG

In [None]:
# if torch.cuda.is_available():
#     device = torch.device('cuda')
#     torch.backends.cudnn.enabled = False
# else:
device = torch.device('cpu')

In [None]:
def word2vec_padding(list_of_embeddings, length, embedding_length):
    zero_vec = np.zeros(embedding_length)
    for _ in range(length - len(list_of_embeddings)):
        list_of_embeddings.append(zero_vec)
    return list_of_embeddings[:length]


def word2vec_sum(list_of_embeddings, embedding_length):
    ret_value = np.zeros(embedding_length)
    for embedding in list_of_embeddings:
        ret_value += embedding
    return ret_value

class OneHotParser(TextParser):
    def __init__(self, vocabulary, type_func):
        """

        :param vocabulary: List of strings representing the vocabulary.
        :param type_func: Function which converts the output to the desired type, e.g. np.array.
        """
        self.vocab = vocabulary
        self.vocab_size = len(self.vocab)
        TextParser.__init__(self, type_func)

    def __call__(self, x):
        one_hot = np.zeros((len(x), self.vocab_size))  # +1 for out of vocabulary tokens.
        for idx, token_list in enumerate(x):
            sentence = ' '.join(token_list)
            vocab_idx = self.vocab.index(sentence)
            one_hot[idx, vocab_idx] = 1

        return self.convert_type(one_hot)

def load_list_from_file(file_path):
    with open(file_path) as file:
        content = file.readlines()
    ret = []
    for elem in content:
        clean_elem = elem.strip()
        if len(clean_elem) > 0:
            ret.append(clean_elem)
    return ret

In [None]:
verbs = ['go', 'take', 'open', 'grab', 'run', 'walk', 'climb']
vocabulary = load_list_from_file('./data/vocabulary.txt')

#basic_actions = ['open', 'egg', 'east', 'west', 'north', 'south', 'go', 'up', 'down', 'look', 'take']
basic_actions = ['open', 'egg', 'north', 'climb', 'tree', 'take']

extended_actions = ['grab', 'run', 'climb', 'walk', 'go', 'south', 'east', 'west']

basic_objects = ['egg', 'door', 'tree', 'leaves', 'nest']

obj_ext1 = ['bag', 'bottle', 'rope', 'sword', 'lantern', 'knife', 'mat', 'mailbox',
            'rug', 'case', 'axe', 'diamond', 'leaflet', 'news', 'brick']
action_ext1 = ['enter', 'open the window', 'turn lamp on', 'move rug', 'open trap door', 'hit troll with sword']

random_words = ['bring', 'wait', 'test', 'heave', 'squat', 'garbage', 'you', 'no', 'year']

def create_actions():
    actions = list(basic_actions)
    if task == 1:
        actions = list(basic_actions) + list(extended_actions)

    words = list()
    words.append('')
    for action in actions:
        tokens = tokenizer(action)
        for token in tokens:
            if token not in words:
                words.append(token)

    sentences = list()
    for i, word1 in enumerate(words):
        for word2 in words[i + 1:]:
            if word1 in verbs:
                sentences.append(word1 + ' ' + word2)
            else:
                sentences.append(word2 + ' ' + word1)
                
    words = set()
    for action in sentences:
        for word in tokenizer(action):
            words.add(word)
    action_vocabulary = {}
    if True: #action_w2v:
        for word in words:
            action_vocabulary[word] = word2vec_model[word]
        action_vocabulary[''] = [0 for _ in range(len(action_vocabulary['open']))]
    else:
        words.add('')
        for idx, word in enumerate(words):
            action_vocabulary[word] = np.zeros(len(words))
            action_vocabulary[word][idx] = 1.0

    embedding_size = len(action_vocabulary['open'])
    
    return actions, action_vocabulary, embedding_size, words, sentences

In [None]:
# vocab_size = len(vocabulary)
# bow_parser = BagOfWords(vocabulary=vocabulary,
#                         type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(1))

word2vec_model = glove_api.load('glove-wiki-gigaword-50')
embedding_size = word2vec_model.vector_size
word2vec_parser = Word2Vec(type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(0),
                           word2vec_model=word2vec_model,
                           return_func=lambda x: word2vec_padding(x, 65, embedding_size))
# onehot_parser = OneHotParser(type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(1),
#                              vocabulary=actions)

In [None]:
def test(additional_prints, test_iterations):
    total_reward = 0
    iteration = 0
    with torch.no_grad():
        while iteration < test_iterations:
            try:
                obs = agent.env.reset()
                done = False

                full_state = torch.zeros((agent.history_size,
                                          2,
                                          agent.input_width,
                                          agent.input_length), dtype=torch.float32).to(agent.device)

                episode_reward = 0
                while not done:
                    obs = agent._parse_state(obs).view(2, agent.input_width, agent.input_length)
                    full_state[:agent.history_size - 1] = full_state[1:]
                    full_state[-1] = obs

                    action, text_command = agent._get_action(full_state.unsqueeze(0),
                                                            tau=0,
                                                            eps=0,
                                                            test=True,
                                                            additional_prints=False,
                                                            number_of_neighbors=number_of_neighbors)
                    if additional_prints:
                        agent.env.render()
                        print(text_command)
                        print(action)
                        print(agent._get_q_value(agent.network,
                                                full_state.unsqueeze(0),
                                                action))

                    obs, reward, done, has_won = agent.env.step(text_command)

                    episode_reward += reward

                if additional_prints:
                    agent.env.render()

                total_reward += episode_reward
                iteration += 1
            except EnvironmentError:
                print('There was some issue with the Zork test env.')

    return total_reward * 1.0 / test_iterations

# Default Agent

In [None]:
task = 1
path = '/home/deep/ZorkDiscreteDDPG/egg_quest_baby_actions/ompddpg_cnn/neighbors=-1/w2v/omp_ddpg_50_-1_mse_loss/0/'
number_of_neighbors=1

actions, action_vocabulary, embedding_size, words, sentences= create_actions()

In [None]:
for word in words:
    print(word)
    print(word2vec_model[word])

In [None]:
agent = OMPDDPG(actions=action_vocabulary,
                state_parser=word2vec_parser,
                embedding_size=embedding_size,
                input_length=embedding_size,
                input_width=65,
                history_size=1,
                model_type='CNN',
                device=device,
                pomdp_mode=False,
                loss_weighting=1.0,
                linear=False,
                improved_omp=False)

## Calculate coherence

In [None]:
calc_coherence()

### Load data + simple plot to make sure data is OK

In [None]:
sub_dirs = sorted([int(d) for d in os.listdir(path)])

In [None]:
with open(path + str(sub_dirs[-1]) + '/results', 'rb') as f:
    baseline = np.array(pickle.load(f)['rewards']['nn=-1'])

In [None]:
f, axarr = pl.subplots(1, 1, figsize=(6, 3))

pl.plot(baseline[:,0], baseline[:,1])
    
pl.xlabel('Time step')
pl.ylabel('Reward')
pl.show()

In [None]:
baseline_results = []
for sub_dir in sub_dirs:
    agent.network[0].load_state_dict(torch.load(path + str(sub_dir) + '/actor'))
    agent.network[1].load_state_dict(torch.load(path + str(sub_dir) + '/critic'))
    baseline_results.append(test(False, 1))

In [None]:
f, axarr = pl.subplots(1, 1, figsize=(6, 3))

pl.plot(baseline[:,0], baseline[:,1], label='Baseline (collected)')
pl.plot(baseline[:,0], baseline_results, label='Baseline (tested)')

leg = pl.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), shadow=True, ncol=2, fontsize=10)
for legobj in leg.legendHandles:
    legobj.set_linewidth(3.0)

pl.xlabel('Time step')
pl.ylabel('Reward')

pl.show()

# Dictionary with synonyms

In [None]:
task = 1
number_of_neighbors=1

actions, action_vocabulary, embedding_size, words, sentences = create_actions()

In [None]:
agent = OMPDDPG(actions=action_vocabulary,
                state_parser=word2vec_parser,
                embedding_size=embedding_size,
                input_length=embedding_size,
                input_width=65,
                history_size=1,
                model_type='CNN',
                device=device,
                pomdp_mode=False,
                loss_weighting=1.0,
                linear=False,
                improved_omp=False)

In [None]:
calc_coherence()

In [None]:
results = []
for sub_dir in sub_dirs:
    agent.network[0].load_state_dict(torch.load(path + str(sub_dir) + '/actor'))
    agent.network[1].load_state_dict(torch.load(path + str(sub_dir) + '/critic'))
    results.append(test(False, 1))

In [None]:
f, axarr = pl.subplots(1, 1, figsize=(6, 3))

pl.plot(baseline[:,0], baseline_results, label='Baseline (collected)')
pl.plot(baseline[:,0], results, label='Synonyms')

leg = pl.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), shadow=True, ncol=2, fontsize=10)
for legobj in leg.legendHandles:
    legobj.set_linewidth(3.0)

pl.xlabel('Time step')
pl.ylabel('Reward')

pl.show()