In [1]:
import pickle
import random
import torch
from torch.nn import functional as F
import numpy as np
import gensim
import gensim.downloader as glove_api
import os

from ZorkGym.text_utils.text_parser import BagOfWords, Word2Vec, TextParser, tokenizer
from agents.OMP_DDPG import OMPDDPG

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    torch.backends.cudnn.enabled = False
else:
    device = torch.device('cpu')

In [3]:
def word2vec_padding(list_of_embeddings, length, embedding_length):
    zero_vec = np.zeros(embedding_length)
    for _ in range(length - len(list_of_embeddings)):
        list_of_embeddings.append(zero_vec)
    return list_of_embeddings[:length]


def word2vec_sum(list_of_embeddings, embedding_length):
    ret_value = np.zeros(embedding_length)
    for embedding in list_of_embeddings:
        ret_value += embedding
    return ret_value

In [4]:
word2vec_model = glove_api.load('glove-wiki-gigaword-50')
embedding_size = word2vec_model.vector_size
word2vec_parser = Word2Vec(type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(0),
                           word2vec_model=word2vec_model,
                           return_func=lambda x: word2vec_padding(x, 65, embedding_size))

# Generate Data

In [2]:
with open(os.getcwd() + '/data/zork_walkthrough.txt', 'rb') as f:
    data = pickle.load(f)

In [6]:
states = [word2vec_parser(state) for state in data['states']]
raw_actions = data['actions']
actions = []

for action in raw_actions:
    vect = 0
    for token in tokenizer(action):
        vect += word2vec_model[token]
    actions.append(torch.Tensor(vect).to(device))

  return f(*args, **kwds)
  return f(*args, **kwds)


In [3]:
for idx in range(len(data['states'])):
    print(data['states'][idx])
    print(data['actions'][idx])

[['west', 'of', 'house', 'you', 'are', 'standing', 'in', 'an', 'open', 'field', 'west', 'of', 'a', 'white', 'house', 'with', 'a', 'boarded', 'front', 'door', 'there', 'is', 'a', 'small', 'mailbox', 'here'], []]
north
[['north', 'of', 'house', 'you', 'are', 'facing', 'the', 'north', 'side', 'of', 'a', 'white', 'house', 'there', 'is', 'no', 'door', 'here', 'and', 'all', 'the', 'windows', 'are', 'boarded', 'up', 'to', 'the', 'north', 'a', 'narrow', 'path', 'winds', 'through', 'the', 'trees'], []]
north
[['forest', 'path', 'this', 'is', 'a', 'path', 'winding', 'through', 'a', 'dimly', 'lit', 'forest', 'the', 'path', 'heads', 'north', 'south', 'here', 'one', 'particularly', 'large', 'tree', 'with', 'some', 'low', 'branches', 'stands', 'at', 'the', 'edge', 'of', 'the', 'path'], []]
up
[['up', 'a', 'tree', 'you', 'are', 'about', 'feet', 'above', 'the', 'ground', 'nestled', 'among', 'some', 'large', 'branches', 'the', 'nearest', 'branch', 'above', 'you', 'is', 'above', 'your', 'reach', 'beside

IndexError: list index out of range

create vocabluary

In [None]:
words = set()
for action in raw_actions:
    for word in tokenizer(action):
        words.add(word)

# Train Agent using Imitation Learning

In [None]:
action_vocabulary = {}
for word in words:
    action_vocabulary[word] = word2vec_model[word]
action_vocabulary[''] = [0 for _ in range(len(action_vocabulary['open']))]

embedding_size = len(action_vocabulary['open'])

train_params = {
    'seed': 12,
    'number_of_neighbors': -1
}
test_params = {
    'nn=-1': {'number_of_neighbors': -1},
    'nn=1': {'number_of_neighbors': 1},
    'nn=3': {'number_of_neighbors': 3},
    'nn=11': {'number_of_neighbors': 11},
}

agent = OMPDDPG(actions=action_vocabulary,
                state_parser=word2vec_parser,
                embedding_size=embedding_size,
                input_length=embedding_size,
                input_width=65,
                history_size=1,
                model_type='CNN',
                device=device,
                pomdp_mode=True,
                loss_weighting=1.0,
                linear=False,
                improved_omp=False)

In [None]:
optimizer = agent._create_optimizer(lr=0.0001)
batch_size = 32
num_iters = 1000000

In [None]:
for iteration in range(num_iters):
    indices = np.random.randint(0, len(actions), batch_size)
    obs_batch = []
    action_batch = []
    
    for idx in indices:
        obs_batch.append(states[idx])
        action_batch.append(actions[idx])
        
    obs_batch = torch.stack(obs_batch).detach()
    action_batch = torch.stack(action_batch).detach()
    
    optimizer[0].zero_grad()
    predicted_actions = agent.network[0](obs_batch)
    loss = F.mse_loss(predicted_actions, action_batch.view(batch_size, -1).detach())
    loss.backward()
    print('Iteration: ' + str(loss.item()))
    optimizer[0].step()