In [1]:
import os
import re
import random
from typing import List, Dict, Any, Optional
from collections import defaultdict
from time import time
from glob import glob
import numpy as np
import gym
from textworld import EnvInfos
import textworld.gym

# for text similarity
import spacy
import wmd
import en_core_web_md

#import torch

#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
gamefiles = []
GAMES_PATH = "./sample_games/"
for file in os.listdir(GAMES_PATH):
    if file.endswith('.ulx'):
        gamefiles.append(GAMES_PATH + file)
        
MAX_STEPS = 50

## Play Function
- create gym environment to let the agent play textworld

In [3]:
def play(agent, path, max_step= MAX_STEPS, nb_episodes=10, verbose=True):
    request_infos = agent.select_additional_infos
    request_infos.max_score = True  # Needed to normalize the scores.

    gamefiles = [path]
    if os.path.isdir(path):
        gamefiles = glob(os.path.join(path, "*.ulx"))

    env_id = textworld.gym.register_games(gamefiles,
                                          request_infos=request_infos,
                                          max_episode_steps=max_step)
    env = gym.make(env_id)  # Create a Gym environment to play the text game.
    if verbose:
        if os.path.isdir(path):
            print(os.path.dirname(path), end="")
        else:
            print(os.path.basename(path), end="")

    # Collect some statistics: nb_steps, final reward.
    avg_moves, avg_scores, avg_norm_scores = [], [], []
    for no_episode in range(nb_episodes):
        obs, infos = env.reset()  # Start new episode.

        score = 0
        done = False
        nb_moves = 0
        while not done:
            # the agent chooses an action
            command = agent.act(obs, score, done, infos)
            # carry out action and update parameters
            obs, score, done, infos = env.step(command)
            nb_moves += 1

        # Let the agent know the game is done.
        agent.act(obs, score, done, infos)

        if verbose:
            print(".", end="")
        avg_moves.append(nb_moves)
        avg_scores.append(score)
        avg_norm_scores.append(score / infos["max_score"])

    env.close()
    msg = "  \tavg. steps: {:5.1f}; avg. score: {:4.1f} / {}."
    if verbose:
        if os.path.isdir(path):
            print(msg.format(np.mean(avg_moves), np.mean(avg_norm_scores), 1))
        else:
            print(msg.format(np.mean(avg_moves), np.mean(
                avg_scores), infos["max_score"]))

In [4]:
# get spacy package & word movers distance for spacy

nlp = en_core_web_md.load()
nlp.add_pipe(wmd.WMD.SpacySimilarityHook(nlp), last=True)

## Custom Agent
#### Handicap
- uses Handicap 5 (admissible commands from EnvInfos)
- uses Handicap 4 (recipe from EnvInfos)
- uses Handicap 2 (command templates from EnvInfos)
- uses Handicap 1 and 0 

-------------------

#### What does the agent learn?
- saves ranking for command templates
    - so far simplification: only use first word (verb) of template, don't differentiate between types of objects
    
    
- template ranking gets updated while the agent is playing depending on scores
        - calculate the reward by subtracting last round's score from this round's score
        - if the command led to a win reward each word an extra +5
            - in this case also reward last 3 commands instead of only last
        - if the command led to a defeat reward each word an extra -5
        - if the command did not get any reward, add a penalty of -1
            - necessary to avoid using the same command over and over again
    
----------------
    
#### How does the agent choose a command
- choose best command based on command template ranking + similarity between command and recipe
- if stuck using same commands use second best command
- penalize commands that caused GAME OVER in last x games

In [50]:
class CustomAgent():
    
    def __init__(self) -> None:
        self._initialized = False
        self._epsiode_has_started = False
        self.mode = "test"
        self.command_history = []
        self.history_counter = 0
        self.nlp = nlp
        
        # read in vocab.txt and map to id
        with open("./vocab.txt") as f:
            self.word_vocab = f.read().split("\n")
        self.word2id = {} #dictionary for converting words to ids
        self.id2word = [] #list for converting ids to words
        for i, w in enumerate(self.word_vocab):
            self.word2id[w] = i
            self.id2word.append(w)
    
        self.cmdvalues = [] #scores for command types
        self.cmdtoks = [] #ids of (so far only first word of) command type
        
        self.max_fail_memory = 10
        self.recipe = ""
        self.inventory = []
        self.found = 0


    def train(self) -> None:
        self.mode = "train"
        self.transitions = []
        self.failedcommands = []
        self.last_score = 0
        self.no_train_step = 0
        self.moves = 0
        self.command_history = []
        self.history_counter = 0

    def test(self) -> None:
        self.mode = "test"

    @property
    def select_additional_infos(self) -> EnvInfos:
        """
        Returns what additional information should be made available at each game step.

        Requested information will be included within the `infos` dictionary
        passed to `CustomAgent.act()`. To request specific information, create a
        :py:class:`textworld.EnvInfos <textworld.envs.wrappers.filter.EnvInfos>`
        and set the appropriate attributes to `True`. The possible choices are:

        * `description`: text description of the current room, i.e. output of the `look` command;
        * `inventory`: text listing of the player's inventory, i.e. output of the `inventory` command;
        * `max_score`: maximum reachable score of the game;
        * `objective`: objective of the game described in text;
        * `entities`: names of all entities in the game;
        * `verbs`: verbs understood by the the game;
        * `command_templates`: templates for commands understood by the the game;
        * `admissible_commands`: all commands relevant to the current state;

        In addition to the standard information, game specific information
        can be requested by appending corresponding strings to the `extras`
        attribute. For this competition, the possible extras are:

        * `'recipe'`: description of the cookbook;
        * `'walkthrough'`: one possible solution to the game (not guaranteed to be optimal);

        Example:
            Here is an example of how to request information and retrieve it.

            >>> from textworld import EnvInfos
            >>> request_infos = EnvInfos(description=True, inventory=True, extras=["recipe"])
            ...
            >>> env = gym.make(env_id)
            >>> ob, infos = env.reset()
            >>> print(infos["description"])
            >>> print(infos["inventory"])
            >>> print(infos["extra.recipe"])

            Handicap is defined as follows
                max_score, has_won, has_lost,               # Handicap 0
                description, inventory, verbs, objective,   # Handicap 1
                command_templates,                          # Handicap 2
                entities,                                   # Handicap 3
                extras=["recipe"],                          # Handicap 4
                admissible_commands,                        # Handicap 5
        """
        return EnvInfos(description=True, inventory=True, max_score = True, entities = True,
                        admissible_commands=True, has_won=True, has_lost=True, 
                        command_templates = True, extras=["recipe"])
    
    def _get_word_id(self, word):
        """
        look up id of a word in dictionary & add word to dictionaries if unknown.
        """
        if word not in self.word2id:        
            self.id2word.append(word)
            self.word2id[word] = len(self.word2id)
        return self.word2id[word]

    
    def _get_word_by_id(self, id):
        """
        look up word by id in dictionary.
        returns None, if id does not exist.
        """
        if id < len(self.id2word):
            return self.id2word[id]
        else:
            return None
    
    def _tokenize(self, text):
        """
        Simple tokenizer: strip out all non-alphabetic characters.
        """
        text = re.sub("[^a-zA-Z0-9\- ]", " ", text)
        word_ids = list(map(self._get_word_id, text.split()))
        return word_ids
    
    def _init(self, command_templates) -> None:
        """ Initialize the agent. """
        self._initialized = True
        for template in command_templates:
            #only safe first word of template
            initword = self._tokenize(template.split(' ', 1)[0])
            if initword not in self.cmdtoks:
                self.cmdtoks.append(initword)
                self.cmdvalues.append(0)
            
    def _start_episode(self, obs: List[str], infos: Dict[str, List[Any]]) -> None:
        """
        Prepare the agent for the upcoming episode.
        Arguments:
            obs: Initial feedback for each game.
            infos: Additional information for each game.
        """
        if not self._initialized:
            self._init(infos["command_templates"])
        
        # --- if it is a new game ---
        if self.recipe != infos["extra.recipe"]:
            self.recipe = infos["extra.recipe"]
            print(self.recipe)
            self.failedcommands = []
            self.ingredients, self.directions = self.process_recipe()
            self.recipe_tok = self._tokenize(self.recipe)

        self._epsiode_has_started = True


    def _end_episode(self, obs: List[str], scores: List[int], infos: Dict[str, List[Any]]) -> None:
        """
        Tell the agent the episode has terminated.
        Arguments:
            obs: Previous command's feedback for each game.
            score: The score obtained so far for each game.
            infos: Additional information for each game.
        """
        self._epsiode_has_started = False
    
    def tok_to_text(self, tok: List[int]):
        """
        convert ids to phrase e.g. for debug outputs
        """
        text = ""
        for id in tok:
            if id < len(self.id2word):
                text += self._get_word_by_id(id) + " "
            else:
                text += "[UNK] "
        return text
    
    def tok_to_nlp(self, tok1: List[int]):
        """ convert list of word ids into nlp string"""
        text = self.tok_to_text(tok1)
        text = self.nlp(text)
        return text
    
    def process_inventory(self, inv):
        inventory = []
        if ":" in inv:
            inventory = inv.split("\n")[1:-3]
            for i in range(len(inventory)):
                #inventory[i] = self._tokenize(inventory[i])
                inventory[i] = inventory[i].replace("  ","")
                inventory[i] = inventory[i].replace("\n","")
        return inventory
    
    def process_recipe(self):
        splitrecipe = self.recipe.split("\n\n")
        ingredients = splitrecipe[1].split("\n")[1:]
        directions = splitrecipe[2].split("\n")[1:-1]
        for i in range(len(ingredients)):
            #ingredients[i] = self._tokenize(ingredients[i])
            ingredients[i] = ingredients[i].replace("  ","")
        for i in range(len(directions)):
            #directions[i] = self._tokenize(directions[i])
            directions[i] = directions[i].replace("  ","")
        return ingredients, directions
    
    def find_ingredient(self, cmds):
        possiblecmds = []
        for index in range(len(self.ingredients)):
            if all(self.ingredients[index] not in item for item in self.inventory):
                for i in range(len(cmds)):
                    if self.ingredients[index] in self.tok_to_text(cmds[i]):
                        possiblecmds.append(i)
                if possiblecmds:
                    print(possiblecmds)
                    return possiblecmds
        return possiblecmds 
    
    def get_ingredient_from_item(self, item):
        for ingredient in self.ingredients:
            if ingredient in item:
                return ingredient
        return ""
    
    def prepare_ingredient(self,cmds):
        directionslist = []
        for item in self.inventory:
            ingredient = self.get_ingredient_from_item(item)
            if ingredient:
                for direction in self.directions:
                    if ingredient in direction:
                        directionslist.append(direction)
        dirs = ""
        for direction in directionslist:
            dirs += direction + "\n"
        return dirs 
    
    def calc_similarity(self, tok1: List[int], tok2: List[int]) -> float:
        """calculate the similarity between two nlp strings"""
        text1 = self.tok_to_nlp(tok1)
        text2 = self.tok_to_nlp(tok2)
        sim = 0.0
        # sometimes error when cost is negative...
        try:
            # usually 0.0 is best, and the worst i got was about 9
            # so normalized by 10 and subtracted from 1 to get best at 1.0
            sim = 1-text1.similarity(text2)/10
        except:
            pass
        return sim

    def get_index_of_template(self, cmd: List[int]) -> int:
        for i in range(len(self.cmdtoks)):
            if cmd[0] == self.cmdtoks[i][0]:
                return i
        return random.randint(0,len(self.cmdtoks)-1)
    
    def update_cmdvalues(self, cmd: List[int], reward):
        maxindex = self.get_index_of_template(cmd)
        self.cmdvalues[maxindex]  += reward
        #print(self.tok_to_text(cmd))
        #print(self.tok_to_text(self.cmdtoks[maxindex]))
    
    def calc_best_qvalue(self, commands, cmd_indices, recipe_tok, userecipe = True) -> int:
        # calculate qvalues and save in list
        qvalues =  []
        for cmd_index in cmd_indices:
            template_index = self.get_index_of_template(commands[cmd_index])
            if commands[cmd_index] not in self.failedcommands:
                qval = self.cmdvalues[template_index]
                if userecipe:
                    qval += self.calc_similarity(commands[cmd_index],recipe_tok)*10
            else:
                qval = -100000
            #print(str(self.tok_to_nlp(cmd)) + ": " +  str(self.calc_similarity(cmd,self.recipe_tok)))
            qvalues.append(qval)
        
        # save original indices of max commands in list, choose random from commands with max qvalue
        max_value = max(qvalues)
        max_commands = []
        for i in range(len(qvalues)):
            if max_value == qvalues[i]:
                max_commands.append(cmd_indices[i])
        return random.choice(max_commands)

    def choose_best_cmd(self,commands) -> int:
        """
        Gets randomly one of the best rewarded admissible commands.
        Assigns value -1000 to commands that caused GAME OVER in last x games.
        """
        if self.found < len(self.ingredients):
            ingredient_indices = self.find_ingredient(commands)
            if ingredient_indices:
                #ingredient is in room
                return self.calc_best_qvalue(commands,ingredient_indices, self.recipe_tok, userecipe=False)
            else:
                pass # remaining ingredients are hidden (fridge, other room)
        else: #all ingredients found
            print("COOK")
            dirs = self.prepare_ingredient(commands)
            if dirs: 
                dirs_tok = self._tokenize(dirs)
                return self.calc_best_qvalue(commands,range(len(commands)), dirs_tok)
        return self.calc_best_qvalue(commands, range(len(commands)), self.recipe_tok)
    
    def get_alternative_cmd(self, commands, recipe_tok) -> int:
        """
        Idea of an alternative command, if the current is used in the last 3 commands.
        Chooses the second best command.
        """
        qvalues =  []
        #print("alternative")
        for cmd in commands:
            template_index = self.get_index_of_template(cmd)
            if cmd not in self.failedcommands:
                qval = self.cmdvalues[template_index] + self.calc_similarity(cmd,recipe_tok)*10
            else:
                qval = -100000
            qvalues.append(qval)
     
        first_max = max(qvalues[0],qvalues[1]) 
        second_max = min(qvalues[0],qvalues[1]) 

        for i in range(2,len(qvalues)): 
            if qvalues[i] > first_max: 
                second_max = first_max
                first_max=qvalues[i] 
            else: 
                if qvalues[i]>second_max: 
                    second_max=qvalues[i] 

        #print("Second highest number is : ",str(second_max)) 
        return qvalues.index(second_max)
        
                        
    def act(self, obs: str, score: int, done: bool, 
            infos: Dict[str, List[Any]]) -> Optional[List[str]]:
        """
        Acts upon the current list of observations.

        One text command must be returned for each observation.

        Arguments:
            obs: Previous command's feedback for each game.
            scores: The score obtained so far for each game.
            dones: Whether a game is finished.
            infos: Additional information for each game.

        Returns:
            Text commands to be performed (one per observation).
            If episode had ended (e.g. `all(dones)`), the returned
            value is ignored.

        Notes:
            Commands returned for games marked as `done` have no effect.
            The states for finished games are simply copy over until all
            games are done.
        """

        if not self._epsiode_has_started:
            self._start_episode(obs, infos)
        
        print("-------------------------------------\n")
        
        self.inventory = self.process_inventory(infos["inventory"])
        
        self.found = 0
        for item in self.inventory:
            if any(ingredient in item for ingredient in self.ingredients):
                self.found += 1
        
        #choose best command based on "qvalues"
        cmds = []
        for i in range(len(infos["admissible_commands"])):
            cmds.append(self._tokenize(infos["admissible_commands"][i]))
        
        #print(infos["admissible_commands"])
        best_index = self.choose_best_cmd(cmds)
        action = infos["admissible_commands"][best_index]
        
        #building a historylist of commands to get out of a loop of death o_o
        if self.command_history is not None:
            last_commands = self.command_history[-3:]
            
        # compare-counters for comparing actions from the 3 last moves.
        if self.history_counter > self.moves:
            same_command_in_list = True 
            counter_use_same_command = 0
            """ 
            If the current admissible command (action) is one of the last 3 commands, the while-loop
            will be skipped. 
            If the same random chosen command is going 2 times through the while-loop, after tahat, the loops
            will be skipped, too.
            """
            while same_command_in_list is True and counter_use_same_command < 3:
                try:
                    if last_commands.index(action):
                        #best_index = self.choose_best_cmd(cmds)
                        best_index = self.get_alternative_cmd(cmds,  self.recipe_tok)
                        action = infos["admissible_commands"][best_index]
                        counter_use_same_command += 1
                    else:
                        same_command_in_list = False
                except ValueError:
                    same_command_in_list = False
                    #print("last_commands List does not contain value")
        # adds valid action to the command-history
        self.command_history.append(action)
        
        if self.mode == "test":
            return action
        
        #train mode, counter update
        self.no_train_step += 1 
        reward = 0
        self.moves += 1
        self.history_counter += 1
        
        #calculate rewards
        if self.transitions:
            reward = score - self.last_score
            self.last_score = score
            if infos["has_won"]:
                reward += 5
                #if won a game, reward last 3 transitions
                if len(self.transitions) > 1:
                    self.update_cmdvalues(self.transitions[-2],5)
                if len(self.transitions) > 2:
                    self.update_cmdvalues(self.transitions[-3],5)
            if infos["has_lost"]:
                reward -= 5
                #if lost game, save command if steps < MAX_STEPS-1
                if self.moves < MAX_STEPS-1:
                    #memory of failed commands
                    if len(self.failedcommands) < self.max_fail_memory:
                        self.failedcommands.append(self.transitions[-1])
                    else:
                        self.failedcommands.pop(0)
                        self.failedcommands.append(self.transitions[-1])
            if reward == 0:
                reward -= 1
                
            # update rewards for command from last step
            self.update_cmdvalues(self.transitions[-1],reward)

        # Debug output
        if(self.no_train_step % 1 == 0):
            print("")
            #print("Last 3 admissible commands: ", last_commands)
            #print("train step:" + str(self.no_train_step))
            
            print("LAST REWARD:" + str(reward))
            
            #command prints
            command = "LAST COMMAND: "
            if self.transitions:
                for id in self.transitions[-1]:
                    command += " " + self._get_word_by_id(id)
                print(command)
            
            #inventory prints
            print("INVENTORY:")
            print(self.inventory)
            print("found: " + str(self.found))
            
            #fcommand = "last failed command: "
            #if self.failedcommands:
                #for id in self.failedcommands[-1]:
                    #fcommand += " " + self._get_word_by_id(id)
                #print(fcommand)
        
        #save last command in order to calculate rewards in next step
        self.transitions.append(cmds[best_index])
        
        if done:
            if(infos["has_won"]):
                print("-------- WON GAME ----------")
            else:
                print("-------- LOST GAME ----------")
            print(" ")
            self.last_score = 0
            self.moves = 0
            self.transitions = []
            self.command_history = []
            self._end_episode(obs, score, infos)
            
        return action

## Training the Agent
- just training on the easiest game (3.0/3)

In [51]:
agent = CustomAgent()
agent.train()

In [47]:
starttime = time()
for i in range(2):
    print("-------------------------------------")
    print("GAME: " + str(i+1))
    print("-------------------------------------")
    play(agent, gamefiles[5])
print("Trained in {:.2f} secs".format(time() - starttime))


-------------------------------------
GAME: 1
-------------------------------------
tw-cooking-recipe1+take1-11Oeig8bSVdGSp78.ulx
Recipe #1
---------
Gather all following ingredients and follow the directions to prepare this tasty meal.

Ingredients:
  red hot pepper

Directions:
  slice the red hot pepper
  fry the red hot pepper
  prepare meal

-------------------------------------

[7, 15]

LAST REWARD:0
INVENTORY:
[]
found: 0
-------------------------------------

[7, 15]

LAST REWARD:-1
LAST COMMAND:  examine red hot pepper
INVENTORY:
[]
found: 0
-------------------------------------

[7, 15]

LAST REWARD:-1
LAST COMMAND:  examine red hot pepper
INVENTORY:
[]
found: 0
-------------------------------------

COOK


  result = entry_point.load(False)



LAST REWARD:1
LAST COMMAND:  take red hot pepper from counter
INVENTORY:
['a sliced fried red hot pepper']
found: 1
-------------------------------------

[7, 15]

LAST REWARD:-1
LAST COMMAND:  drop red hot pepper
INVENTORY:
[]
found: 0
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take red hot pepper
INVENTORY:
['a sliced fried red hot pepper']
found: 1
-------------------------------------

COOK

LAST REWARD:-5
LAST COMMAND:  eat red hot pepper
INVENTORY:
['a sliced fried red hot pepper']
found: 1
-------- LOST GAME ----------
 
.-------------------------------------

[7, 15]

LAST REWARD:0
INVENTORY:
[]
found: 0
-------------------------------------

COOK

LAST REWARD:1
LAST COMMAND:  take red hot pepper from counter
INVENTORY:
['a sliced fried red hot pepper']
found: 1
-------------------------------------

[7, 15]

LAST REWARD:-1
LAST COMMAND:  put red hot pepper on counter
INVENTORY:
[]
found: 0
-------------------------------------

COOK

LAST REWAR


LAST REWARD:1
LAST COMMAND:  take red hot pepper from counter
INVENTORY:
['a sliced fried red hot pepper']
found: 1
-------------------------------------


LAST REWARD:1
LAST COMMAND:  prepare meal
INVENTORY:
['a meal']
found: 0
-------------------------------------


LAST REWARD:-1
LAST COMMAND:  look
INVENTORY:
['a meal']
found: 0
-------------------------------------


LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a meal']
found: 0
-------------------------------------


LAST REWARD:-1
LAST COMMAND:  open fridge
INVENTORY:
['a meal']
found: 0
-------------------------------------


LAST REWARD:-1
LAST COMMAND:  look
INVENTORY:
['a meal']
found: 0
-------------------------------------


LAST REWARD:6
LAST COMMAND:  eat meal
INVENTORY:
[]
found: 0
-------- WON GAME ----------
 
.-------------------------------------

[7, 15]

LAST REWARD:0
INVENTORY:
[]
found: 0
-------------------------------------

COOK

LAST REWARD:1
LAST COMMAND:  take red hot pepper from counter
INVENT


LAST REWARD:1
LAST COMMAND:  take red hot pepper from counter
INVENTORY:
['a sliced fried red hot pepper']
found: 1
-------------------------------------


LAST REWARD:1
LAST COMMAND:  prepare meal
INVENTORY:
['a meal']
found: 0
-------------------------------------


LAST REWARD:6
LAST COMMAND:  eat meal
INVENTORY:
[]
found: 0
-------- WON GAME ----------
 
.-------------------------------------

[7, 15]

LAST REWARD:0
INVENTORY:
[]
found: 0
-------------------------------------

COOK

LAST REWARD:1
LAST COMMAND:  take red hot pepper from counter
INVENTORY:
['a sliced fried red hot pepper']
found: 1
-------------------------------------


LAST REWARD:1
LAST COMMAND:  prepare meal
INVENTORY:
['a meal']
found: 0
-------------------------------------


LAST REWARD:6
LAST COMMAND:  eat meal
INVENTORY:
[]
found: 0
-------- WON GAME ----------
 
.  	avg. steps:   3.0; avg. score:  3.0 / 3.
Trained in 27.73 secs


In [48]:
for i in range(len(agent.cmdvalues)):
    print( agent.tok_to_text(agent.cmdtoks[i]) + " : " + str(agent.cmdvalues[i]))

chop  : 0
close  : -5
cook  : -5
dice  : 0
drink  : 0
drop  : -6
eat  : 97
examine  : -13
go  : 0
insert  : -5
inventory  : 0
lock  : 0
look  : 4
open  : 0
prepare  : 97
put  : -6
slice  : 0
take  : 79
unlock  : 0


In [16]:
# testing text similarity
tok1= agent._tokenize("Cook a meal.")
tok2= agent._tokenize("Prepare the food.")
tok3= agent._tokenize("Germany is a country.")
sim1 = agent.calc_similarity(tok1,tok1)
sim2 = agent.calc_similarity(tok1,tok2)
sim3 = agent.calc_similarity(tok1,tok3)
print(sim1) #same text
print(sim2) #somewhat similar meaning
print(sim3) # very different

1.0
0.4282114982604981
0.13777608871459956


In [66]:
agent.test()
play(agent, gamefiles[5])

tw-cooking-recipe1+take1-11Oeig8bSVdGSp78.ulx..........  	avg. steps:   3.0; avg. score:  3.0 / 3.


### More difficult game (no navigation)
- pretty bad (2.1/6)

In [52]:
agent.train()
starttime = time()
for i in range(2):
    print("-------------------------------------")
    print("GAME: " + str(i+1))
    print("-------------------------------------")
    play(agent, gamefiles[8])
print("Trained in {:.2f} secs".format(time() - starttime))

-------------------------------------
GAME: 1
-------------------------------------
tw-cooking-recipe2+take2+cut+open-BnYEixa9iJKmFZxO.ulx
Recipe #1
---------
Gather all following ingredients and follow the directions to prepare this tasty meal.

Ingredients:
  yellow bell pepper
  yellow potato

Directions:
  slice the yellow bell pepper
  fry the yellow bell pepper
  dice the yellow potato
  fry the yellow potato
  prepare meal

-------------------------------------

[10, 18]

LAST REWARD:0
INVENTORY:
[]
found: 0
-------------------------------------



  result = entry_point.load(False)



LAST REWARD:1
LAST COMMAND:  take yellow potato from counter
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------


LAST REWARD:-5
LAST COMMAND:  eat yellow potato
INVENTORY:
['a fried yellow potato']
found: 1
-------- LOST GAME ----------
 
.-------------------------------------

[10, 18]

LAST REWARD:0
INVENTORY:
[]
found: 0
-------------------------------------


LAST REWARD:1
LAST COMMAND:  take yellow potato from counter
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------


LAST REWARD:-1
LAST COMMAND:  take red potato from counter
INVENTORY:
['a raw red potato', 'a fried yellow potato']
found: 1
-------------------------------------


LAST REWARD:-1
LAST COMMAND:  take purple potato from counter
INVENTORY:
['a raw purple potato', 'a raw red potato', 'a fried yellow potato']
found: 1
-------------------------------------


LAST REWARD:-1
LAST COMMAND:  take red hot pepper from counter
INVENTORY:
['a red hot pepper', '


LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

COOK

LAST REWARD:-5
LAST COMMAND:  eat yellow bell pepper
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------- LOST GAME ----------
 
.-------------------------------------

[10, 18]

LAST REWARD:0
INVENTORY:
[]
found: 0
-------------------------------------

[10, 18]

LAST REWARD:-1
LAST COMMAND:  examine yellow potato
INVENTORY:
[]
found: 0
-------------------------------------

[10, 18]

LAST REWARD:-1
LAST COMMAND:  examine yellow potato
INVENTORY:
[]
found: 0
-------------------------------------

[10, 18]

LAST REWARD:-1
LAST COMMAND:  examine yellow potato
INVENTORY:
[]
found: 0
-------------------------------------

[10, 18]

LAST REWARD:-1
LAST COMMAND:  examine yellow potato
INVENTORY:
[]
found: 0
-------------------------------------


LAST REWARD:1
LAST COMMAND:  take yellow potato fro


LAST REWARD:-1
LAST COMMAND:  examine yellow potato
INVENTORY:
['a fried yellow bell pepper']
found: 1
-------------------------------------


LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow bell pepper']
found: 1
-------------------------------------


LAST REWARD:-1
LAST COMMAND:  prepare meal
INVENTORY:
['a fried yellow bell pepper']
found: 1
-------------------------------------

[10, 18]

LAST REWARD:-1
LAST COMMAND:  drop yellow bell pepper
INVENTORY:
[]
found: 0
-------------------------------------

[10, 18]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
[]
found: 0
-------------------------------------

[13, 23]

LAST REWARD:-1
LAST COMMAND:  open fridge
INVENTORY:
[]
found: 0
-------------------------------------

[17, 31]

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow potato from fridge


LAST REWARD:-1
LAST COMMAND:  take yellow potato from fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  drop yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[10, 19]

LAST REWARD:-1
LAST COMMAND:  put yellow potato on table
INVENTORY:
[]
found: 0
-------------------------------------

[14, 26]

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper']
found: 1
-------------------------------------

[14, 26]

LAST REWARD:-1
LAST COMMAND:  examine yellow potato
INVENTORY:
['


LAST REWARD:-1
LAST COMMAND:  take yellow potato from fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  prepare meal
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  drop yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  put yellow bell pepper on table
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[14, 25]

LAST


LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  prepare meal
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  drop yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  put yellow bell pepper on table
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper from table
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

COOK

LAST


LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper from fridge
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  drop yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  put yellow bell pepper on table
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1



LAST REWARD:-1
LAST COMMAND:  prepare meal
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  look
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  drop yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  put yellow bell pepper on table
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  look
INVENTO


LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  put yellow bell pepper on table
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper from table
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  prepare meal
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  open fridge
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[17, 30]

LAST REWARD:-1
LAST COMMAND:  insert yellow bell pepper into fridge
INVENTORY:
['a fried yellow potato']
found: 1
------------------------------------


LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  put yellow bell pepper on table
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper from table
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  prepare meal
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  open fridge
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[17, 30]

LAST REWARD:-1
LAST COMMAND:  insert yellow bell pepper into fridge
INVENTORY:
['a fried yellow potato']
found: 1
------------------------------------


LAST REWARD:-1
LAST COMMAND:  take yellow potato from fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  drop yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[10, 19]

LAST REWARD:-1
LAST COMMAND:  put yellow potato on counter
INVENTORY:
[]
found: 0
-------------------------------------

[13, 23]

LAST REWARD:-1
LAST COMMAND:  open fridge
INVENTORY:
[]
found: 0
-------------------------------------

[13, 23]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
[]
found: 0
-----------------------


LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  drop yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  put yellow bell pepper on table
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper from table
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
L


LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
[]
found: 0
-------------------------------------

[13, 23]

LAST REWARD:-1
LAST COMMAND:  look
INVENTORY:
[]
found: 0
-------------------------------------

[17, 31]

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow potato from fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  put yellow bell pepper on table
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
---


LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[17, 30]

LAST REWARD:-1
LAST COMMAND:  open fridge
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper from table
INVENTORY:
['a fried yellow bell pepper', 'a fried yellow potato']
found: 2
-------------------------------------

[17, 30]

LAST REWARD:-1
LAST COMMAND:  insert yellow bell pepper into fridge
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[17, 30]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[13, 23]

LAST REWARD:-1
LAST COMMAND:  insert yellow potato into fridge
INVENTORY:
[]
found: 0
-------------------------------------

[13, 23]

LAST REWARD:-1
LAST COMMAND:  look
INVENTORY:
[]
found: 0
----------------


LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  drop yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 1
-------------------------------------

[10, 19]

LAST REWARD:-1
LAST COMMAND:  put yellow potato on table
INVENTORY:
[]
found: 0
-------------------------------------

[10, 19]

LAST REWARD:-1
LAST COMMAND:  look
INVENTORY:
[]
found: 0
-------------------------------------

[10, 19]

LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
[]
found: 0
-------------------------------------

[10, 19]

LAST REWARD:-1
LAST COMMAND:  look
INVENTORY:
[]
found: 0
-------------------------------------

[14, 26]

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVE


LAST REWARD:-1
LAST COMMAND:  examine yellow bell pepper
INVENTORY:
[]
found: 0
-------------------------------------

[13, 23]

LAST REWARD:-1
LAST COMMAND:  look
INVENTORY:
[]
found: 0
-------------------------------------

[17, 31]

LAST REWARD:-1
LAST COMMAND:  take yellow bell pepper
INVENTORY:
['a fried yellow bell pepper']
found: 1
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  take yellow potato from counter
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  close fridge
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

COOK

LAST REWARD:-1
LAST COMMAND:  prepare meal
INVENTORY:
['a fried yellow potato', 'a fried yellow bell pepper']
found: 2
-------------------------------------

[14, 25]

LAST REWARD:-1
LAST COMMAND:  drop yellow bell pepper
INVENTORY:
['a fried yellow potato']
found: 

### Game with Navigation 
- really bad (0.2/5)

In [64]:
agent.train()
starttime = time()
for i in range(2):
    print("-------------------------------------")
    print("GAME: " + str(i+1))
    print("-------------------------------------")
    play(agent, gamefiles[1])
print("Trained in {:.2f} secs".format(time() - starttime))

-------------------------------------
GAME: 1
-------------------------------------
tw-cooking-recipe3+cook+cut+drop+go12-B10qS58OTbe0T8Qk.ulx
Recipe #1
---------
Gather all following ingredients and follow the directions to prepare this tasty meal.

Ingredients:
  red hot pepper
  salt
  yellow potato

Directions:
  slice the red hot pepper
  slice the yellow potato
  grill the yellow potato
  prepare meal

['drop red hot pepper', 'drop red onion', 'drop red potato', 'drop salt', 'drop yellow bell pepper', 'drop yellow potato', 'eat red hot pepper', 'eat red onion', 'eat salt', 'eat yellow bell pepper', 'go south', 'look']

train step:1
last reward:0
0
['drop red onion', 'drop red potato', 'drop salt', 'drop yellow bell pepper', 'drop yellow potato', 'eat red onion', 'eat salt', 'eat yellow bell pepper', 'go south', 'look']
eat red hot pepper 
eat 

train step:2
last reward:-5
last command:  eat red hot pepper
last failed command:  eat red hot pepper
1
-------- LOST GAME ----------
 



train step:22
last reward:0
last failed command:  eat salt
4
['close barn door', 'close sliding patio door', 'cook red hot pepper with BBQ', 'cook red onion with BBQ', 'cook red potato with BBQ', 'cook yellow bell pepper with BBQ', 'cook yellow potato with BBQ', 'drop red hot pepper', 'drop red onion', 'drop red potato', 'drop salt', 'drop yellow bell pepper', 'drop yellow potato', 'eat red hot pepper', 'eat red onion', 'eat salt', 'eat yellow bell pepper', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look', 'put red hot pepper on patio chair', 'put red hot pepper on patio table', 'put red onion on patio chair', 'put red onion on patio table', 'put red potato on patio chair', 'put red potato on patio table', 'put salt on patio chair', 'put salt on patio table', 'put yellow bell pepper on patio chair', 'put yellow bell pepper on patio table', 'put yellow potato on patio chair', 'put yellow potato on patio table']
go south 
go 

train 

go north 
go 

train step:41
last reward:-1
last command:  go north
last failed command:  eat salt
5
['close front door', 'drop red hot pepper', 'drop red potato', 'drop salt', 'drop yellow potato', 'eat red hot pepper', 'eat salt', 'examine sofa', 'go east', 'go north', 'go west', 'look', 'put red hot pepper on sofa', 'put red potato on sofa', 'put salt on sofa', 'put yellow potato on sofa']
go south 
go 

train step:42
last reward:-1
last command:  go south
last failed command:  eat salt
5
['close front door', 'examine sofa', 'go east', 'go north', 'go west', 'look']
eat salt 
eat 

train step:43
last reward:-5
last command:  eat salt
last failed command:  eat salt
5
-------- LOST GAME ----------
 
.['drop red hot pepper', 'drop red onion', 'drop red potato', 'drop salt', 'drop yellow bell pepper', 'drop yellow potato', 'eat red hot pepper', 'eat red onion', 'eat salt', 'eat yellow bell pepper', 'go south', 'look']

train step:44
last reward:0
last failed command:  eat salt
5
['close

take red hot pepper 
take 

train step:63
last reward:-1
last command:  take red hot pepper
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'cook red onion with BBQ', 'cook red potato with BBQ', 'cook yellow bell pepper with BBQ', 'cook yellow potato with BBQ', 'drop red onion', 'drop red potato', 'drop salt', 'drop yellow bell pepper', 'drop yellow potato', 'eat red onion', 'eat salt', 'eat yellow bell pepper', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look', 'put red onion on patio chair', 'put red onion on patio table', 'put red potato on patio chair', 'put red potato on patio table', 'put salt on patio chair', 'put salt on patio table', 'put yellow bell pepper on patio chair', 'put yellow bell pepper on patio table', 'put yellow potato on patio chair', 'put yellow potato on patio table']
go south 
go 

train step:64
last reward:-1
last command:  go south
last failed command:  eat red h

take yellow potato 
take 

train step:75
last reward:-1
last command:  take yellow potato
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'cook red onion with BBQ', 'cook yellow bell pepper with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'examine red potato', 'examine yellow potato', 'go east', 'go north', 'go south', 'look', 'take red potato from patio table', 'take yellow potato']
take red potato from patio table 
take 

train step:76
last reward:-1
last command:  take red potato from patio table
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'cook red onion with BBQ', 'cook yellow bell pepper with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'examine red potato', 'examine yellow potato', 'go east', 'go north', 'go south', 'look', 'take red potato from patio table', 'take yellow potato']
take red potato from patio table 
take 

train step:77
last reward

cook yellow bell pepper with BBQ 
cook 

train step:95
last reward:-1
last command:  cook yellow bell pepper with BBQ
last failed command:  eat red hot pepper
5
['close sliding patio door', 'cook red onion with BBQ', 'cook yellow bell pepper with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'examine red potato', 'examine yellow potato', 'go north', 'go south', 'look', 'open barn door', 'take red potato from patio table', 'take yellow potato']
close barn door 
close 

train step:96
last reward:-1
last command:  close barn door
last failed command:  eat red hot pepper
5
['cook red onion with BBQ', 'cook yellow bell pepper with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'examine red potato', 'examine yellow potato', 'go north', 'look', 'open barn door', 'open sliding patio door', 'take red potato from patio table', 'take yellow potato']
close sliding patio door 
close 

train step:97
last reward:-1
last command:  close sliding patio door
last 

go north 
go 

train step:119
last reward:-1
last command:  go north
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'cook red onion with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
cook yellow bell pepper with BBQ 
cook 

train step:120
last reward:-1
last command:  cook yellow bell pepper with BBQ
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
cook red onion with BBQ 
cook 

train step:121
last reward:-1
last command:  cook red onion with BBQ
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
look 
look 

train step:122
last reward:-1
last command:  look
last failed command:  eat red hot pe

go north 
go 

train step:150
last reward:-1
last command:  go north
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
look 
look 

train step:151
last reward:-1
last command:  look
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
examine BBQ 
examine 

train step:152
last reward:-1
last command:  examine BBQ
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
examine patio table 
examine 

train step:153
last reward:-1
last command:  examine patio table
last failed command:  eat red hot pepper
5
-------- LOST GAME ----------
 
.['drop red hot pepper', 'drop r

examine red hot pepper 
examine 

train step:180
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook red potato with BBQ', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go south 
go 

train step:181
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook red potato with BBQ', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:182
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['cook red potato with BBQ', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'look', 'open barn door', 'open sliding pati

close barn door 
close 

train step:207
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'look', 'open barn door', 'open sliding patio door']
close sliding patio door 
close 

train step:208
last reward:-1
last command:  close sliding patio door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'look', 'open sliding patio door']
open barn door 
open 

train step:209
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open sliding patio door 
open 

train step:210
last reward:-

take red potato 
take 

train step:236
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
examine red hot pepper 
examine 

train step:237
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
examine red potato 
examine 

train step:238
last reward:-1
last command:  examine red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
look 
look 

train step:239
last reward:-1
last command:  look
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take

close barn door 
close 

train step:265
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'look', 'open barn door', 'open sliding patio door']
close sliding patio door 
close 

train step:266
last reward:-1
last command:  close sliding patio door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'look', 'open sliding patio door']
open barn door 
open 

train step:267
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open sliding patio door 
open 

train step:268
last reward:-

close barn door 
close 

train step:295
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:296
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'go east', 'go north', 'go south', 'go west', 'look']
go south 
go 

train step:297
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go north 
go 

train step:298
last reward:-1
last command:  go north
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook y

close barn door 
close 

train step:323
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:324
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'go east', 'go north', 'go south', 'go west', 'look']
go south 
go 

train step:325
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go north 
go 

train step:326
last reward:-1
last command:  go north
last failed command:  cook yellow potato with BBQ
5
-------- LOST GAME ----------
 
.['d

open barn door 
open 

train step:353
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:354
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:355
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:356
last reward:-1
last command

take red hot pepper 
take 

train step:382
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
examine red hot pepper 
examine 

train step:383
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
examine red hot pepper 
examine 

train step:384
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
take red hot pepper 
take 

train step:385
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook red potato with BBQ', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', '

go south 
go 

train step:411
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go north 
go 

train step:412
last reward:-1
last command:  go north
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:413
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:414
last reward:-1
last command:  open barn door


take red potato 
take 

train step:440
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red potato 
take 

train step:441
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red hot pepper 
take 

train step:442
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red potato 
take 

train step:443
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']


open barn door 
open 

train step:469
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:470
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:471
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
look 
look 

train step:472
last reward:-1
last command

close barn door 
close 

train step:499
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:500
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'go east', 'go north', 'go south', 'go west', 'look']
go south 
go 

train step:501
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go north 
go 

train step:502
last reward:-1
last command:  go north
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook y

close barn door 
close 

train step:527
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:528
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'go east', 'go north', 'go south', 'go west', 'look']
go south 
go 

train step:529
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go north 
go 

train step:530
last reward:-1
last command:  go north
last failed command:  cook yellow potato with BBQ
5
-------- LOST GAME ----------
 
.['d

open barn door 
open 

train step:557
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:558
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:559
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:560
last reward:-1
last command

take red hot pepper 
take 

train step:586
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
examine red hot pepper 
examine 

train step:587
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
examine red hot pepper 
examine 

train step:588
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
take red hot pepper 
take 

train step:589
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook red potato with BBQ', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', '

take red hot pepper 
take 

train step:614
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red potato 
take 

train step:615
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red potato 
take 

train step:616
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red hot pepper 
take 

train step:617
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red p

In [68]:
for i in range(len(agent.cmdvalues)):
    print( agent.tok_to_text(agent.cmdtoks[i]) + " : " + str(agent.cmdvalues[i]))

chop  : -5
close  : -22
cook  : -22
dice  : -4
drink  : 0
drop  : -19
eat  : -18
examine  : -23
go  : 0
insert  : -8
inventory  : 0
lock  : 0
look  : -22
open  : -17
prepare  : 93
put  : -19
slice  : -6
take  : -15
unlock  : 0


In [13]:
# versuche
wordids = agent._tokenize("cook a meal")
print(wordids)

print(agent._get_word_id("cook"))
print(agent._get_word_by_id(4664))

[4664, 785, 11449]
4664
cook


In [70]:
def play_render(agent,gamefile):
    requested_infos = agent.select_additional_infos
    env_id = textworld.gym.register_games([gamefile], requested_infos)

    env = gym.make(env_id)
    obs, infos = env.reset()

    env.render()  # Print the initial observation.

    score = 0
    done = False
    while not done:
        command = agent.act(obs,score,done,infos)
        ob, score, done, infos = env.step(command)
        env.render()

In [71]:
agent.test()
play_render(agent, gamefiles[5])




                    ________  ________  __    __  ________
                   |        \|        \|  \  |  \|        \
                    \$$$$$$$$| $$$$$$$$| $$  | $$ \$$$$$$$$
                      | $$   | $$__     \$$\/  $$   | $$
                      | $$   | $$  \     >$$  $$    | $$
                      | $$   | $$$$$    /  $$$$\    | $$
                      | $$   | $$_____ |  $$ \$$\   | $$
                      | $$   | $$     \| $$  | $$   | $$
                       \$$    \$$$$$$$$ \$$   \$$    \$$
              __       __   ______   _______   __        _______
             |  \  _  |  \ /      \ |       \ |  \      |       \
             | $$ / \ | $$|  $$$$$$\| $$$$$$$\| $$      | $$$$$$$\
             | $$/  $\| $$| $$  | $$| $$__| $$| $$      | $$  | $$
             | $$  $$$\ $$| $$  | $$| $$    $$| $$      | $$  | $$
             | $$ $$\$$\$$| $$  | $$| $$$$$$$\| $$      | $$  | $$
             | $$$$  \$$$$| $$__/ $$| $$  | $$| $$_____ | $$__/ $$
          