In [1]:
import os
import re
import random
from typing import List, Dict, Any, Optional
from collections import defaultdict
from time import time
from glob import glob
import numpy as np
import gym
from textworld import EnvInfos
import textworld.gym

# for text similarity
import spacy
import wmd
#import en_core_web_md
import substring

# Load English tokenizer, tagger, parser, NER and word vectors
nlp = spacy.load("en_core_web_lg")

#import torch

#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
gamefiles = []
GAMES_PATH = "./sample_games/"
for file in os.listdir(GAMES_PATH):
    if file.endswith('.ulx'):
        gamefiles.append(GAMES_PATH + file)
        
MAX_STEPS = 50

## Play Function
- create gym environment to let the agent play textworld

In [3]:
def play(agent, path, max_step= MAX_STEPS, nb_episodes=5, verbose=True):
    #episodes was 10
    request_infos = agent.select_additional_infos
    request_infos.max_score = True  # Needed to normalize the scores.

    gamefiles = [path]
    if os.path.isdir(path):
        gamefiles = glob(os.path.join(path, "*.ulx"))

    env_id = textworld.gym.register_games(gamefiles,
                                          request_infos=request_infos,
                                          max_episode_steps=max_step)
    env = gym.make(env_id)  # Create a Gym environment to play the text game.
    if verbose:
        if os.path.isdir(path):
            print(os.path.dirname(path), end="")
        else:
            print(os.path.basename(path), end="")

    # Collect some statistics: nb_steps, final reward.
    avg_moves, avg_scores, avg_norm_scores = [], [], []
    for no_episode in range(nb_episodes):
        obs, infos = env.reset()  # Start new episode.

        score = 0
        done = False
        nb_moves = 0
        while not done:
            # the agent chooses an action
            command = agent.act(obs, score, done, infos)
            # carry out action and update parameters
            obs, score, done, infos = env.step(command)
            nb_moves += 1

        # Let the agent know the game is done.
        agent.act(obs, score, done, infos)

        if verbose:
            print(".", end="")
        avg_moves.append(nb_moves)
        avg_scores.append(score)
        avg_norm_scores.append(score / infos["max_score"])

    env.close()
    msg = "  \tavg. steps: {:5.1f}; avg. score: {:4.1f} / {}."
    if verbose:
        if os.path.isdir(path):
            print(msg.format(np.mean(avg_moves), np.mean(avg_norm_scores), 1))
        else:
            print(msg.format(np.mean(avg_moves), np.mean(
                avg_scores), infos["max_score"]))

In [4]:
# get spacy package & word movers distance for spacy

#nlp = en_core_web_md.load()
nlp.add_pipe(wmd.WMD.SpacySimilarityHook(nlp), last=True)

## Custom Agent
#### Handicap
- uses Handicap 5 (admissible commands from EnvInfos)
- uses Handicap 4 (recipe from EnvInfos)
- uses Handicap 2 (command templates from EnvInfos)
- uses Handicap 1 and 0 

-------------------

#### What does the agent learn?
- saves ranking for command templates
    - so far simplification: only use first word (verb) of template, don't differentiate between types of objects
    
    
- template ranking gets updated while the agent is playing depending on scores
        - calculate the reward by subtracting last round's score from this round's score
        - if the command led to a win reward each word an extra +5
            - in this case also reward last 3 commands instead of only last
        - if the command led to a defeat reward each word an extra -5
        - if the command did not get any reward, add a penalty of -1
            - necessary to avoid using the same command over and over again
    
----------------
    
#### How does the agent choose a command
- choose best command based on command template ranking + similarity between command and recipe
- if stuck using same commands use second best command
- penalize commands that caused GAME OVER in last x games

In [49]:
class CustomAgent():
    
    def __init__(self) -> None:
        self._initialized = False
        self._epsiode_has_started = False
        self.mode = "test"
        self.command_history = []
        self.history_counter = 0
        self.nlp = nlp
        
        # read in vocab.txt and map to id
        with open("./vocab.txt") as f:
            self.word_vocab = f.read().split("\n")
        self.word2id = {} #dictionary for converting words to ids
        self.id2word = [] #list for converting ids to words
        for i, w in enumerate(self.word_vocab):
            self.word2id[w] = i
            self.id2word.append(w)
    
        self.cmdvalues = [] #scores for command types
        self.cmdtoks = [] #ids of (so far only first word of) command type
        
        self.max_fail_memory = 5
        self.recipe = ""


    def train(self) -> None:
        self.mode = "train"
        self.transitions = []
        self.failedcommands = []
        self.last_score = 0
        self.no_train_step = 0
        self.moves = 0
        self.command_history = []
        self.history_counter = 0

    def test(self) -> None:
        self.mode = "test"

    @property
    def select_additional_infos(self) -> EnvInfos:
        """
        Returns what additional information should be made available at each game step.

        Requested information will be included within the `infos` dictionary
        passed to `CustomAgent.act()`. To request specific information, create a
        :py:class:`textworld.EnvInfos <textworld.envs.wrappers.filter.EnvInfos>`
        and set the appropriate attributes to `True`. The possible choices are:

        * `description`: text description of the current room, i.e. output of the `look` command;
        * `inventory`: text listing of the player's inventory, i.e. output of the `inventory` command;
        * `max_score`: maximum reachable score of the game;
        * `objective`: objective of the game described in text;
        * `entities`: names of all entities in the game;
        * `verbs`: verbs understood by the the game;
        * `command_templates`: templates for commands understood by the the game;
        * `admissible_commands`: all commands relevant to the current state;

        In addition to the standard information, game specific information
        can be requested by appending corresponding strings to the `extras`
        attribute. For this competition, the possible extras are:

        * `'recipe'`: description of the cookbook;
        * `'walkthrough'`: one possible solution to the game (not guaranteed to be optimal);

        Example:
            Here is an example of how to request information and retrieve it.

            >>> from textworld import EnvInfos
            >>> request_infos = EnvInfos(description=True, inventory=True, extras=["recipe"])
            ...
            >>> env = gym.make(env_id)
            >>> ob, infos = env.reset()
            >>> print(infos["description"])
            >>> print(infos["inventory"])
            >>> print(infos["extra.recipe"])

            Handicap is defined as follows
                max_score, has_won, has_lost,               # Handicap 0
                description, inventory, verbs, objective,   # Handicap 1
                command_templates,                          # Handicap 2
                entities,                                   # Handicap 3
                extras=["recipe"],                          # Handicap 4
                admissible_commands,                        # Handicap 5
        """
        return EnvInfos(description=True, inventory=True, max_score = True, entities = True,
                        admissible_commands=False, verbs=True, has_won=True, has_lost=True, 
                        command_templates = True, extras=["recipe"])
    
    def _get_word_id(self, word):
        """
        look up id of a word in dictionary & add word to dictionaries if unknown.
        """
        if word not in self.word2id:        
            self.id2word.append(word)
            self.word2id[word] = len(self.word2id)
        return self.word2id[word]

    
    def _get_word_by_id(self, id):
        """
        look up word by id in dictionary.
        returns None, if id does not exist.
        """
        if id < len(self.id2word):
            return self.id2word[id]
        else:
            return None
    
    def _tokenize(self, text):
        """
        Simple tokenizer: strip out all non-alphabetic characters.
        """
        text = re.sub("[^a-zA-Z0-9\- ]", " ", text)
        word_ids = list(map(self._get_word_id, text.split()))
        return word_ids
    
    def _init(self, command_templates) -> None:
        """ Initialize the agent. """
        self._initialized = True
        for template in command_templates:
            #only safe first word of template
            initword = self._tokenize(template.split(' ', 1)[0])
            if initword not in self.cmdtoks:
                self.cmdtoks.append(initword)
                self.cmdvalues.append(0)
            
    def _start_episode(self, obs: List[str], infos: Dict[str, List[Any]]) -> None:
        """
        Prepare the agent for the upcoming episode.
        Arguments:
            obs: Initial feedback for each game.
            infos: Additional information for each game.
        """
        if not self._initialized:
            self._init(infos["command_templates"])
        
        # --- new game
        if self.recipe != infos["extra.recipe"]:
            self.recipe = infos["extra.recipe"]
            print(self.recipe)
            self.failedcommands = []
            self.ingredients, self.directions = self.process_recipe()
            self.recipe_tok = self._tokenize(self.recipe)

        self._epsiode_has_started = True


    def _end_episode(self, obs: List[str], scores: List[int], infos: Dict[str, List[Any]]) -> None:
        """
        Tell the agent the episode has terminated.
        Arguments:
            obs: Previous command's feedback for each game.
            score: The score obtained so far for each game.
            infos: Additional information for each game.
        """
        self._epsiode_has_started = False
    
    def tok_to_text(self, tok: List[int]):
        """
        convert ids to phrase e.g. for debug outputs
        """
        text = ""
        for id in tok:
            if id < len(self.id2word):
                text += self._get_word_by_id(id) + " "
            else:
                text += "[UNK] "
        return text
    
    def tok_to_nlp(self, tok1: List[int]):
        """ convert list of word ids into nlp string"""
        text = self.tok_to_text(tok1)
        text = self.nlp(text)
        return text
    
    def process_inventory(self, inv):
        inventory = []
        if ":" in inv:
            inventory = inv.split("\n")[1:-3]
            for i in range(len(inventory)):
                inventory[i] = inventory[i].replace("  ","")
                inventory[i] = inventory[i].replace("\n","")
        return inventory
    
    def process_recipe(self):
        splitrecipe = self.recipe.split("\n\n")
        ingredients = splitrecipe[1].split("\n")[1:]
        directions = splitrecipe[2].split("\n")[1:-1]
        for i in range(len(ingredients)):
            ingredients[i] = ingredients[i].replace("  ","")
        for i in range(len(directions)):
            directions[i] = directions[i].replace("  ","")
        return ingredients, directions
    
    def calc_similarity(self, tok1: List[int], tok2: List[int]) -> float:
        """calculate the similarity between two nlp strings"""
        text1 = self.tok_to_nlp(tok1)
        text2 = self.tok_to_nlp(tok2)
        sim = 0.0
        # sometimes error when cost is negative...
        try:
            # usually 0.0 is best, and the worst i got was about 9
            # so normalized by 10 and subtracted from 1 to get best at 1.0
            sim = 1-text1.similarity(text2)/10
        except:
            pass
        return sim

    def get_index_of_template(self, cmd: List[int]) -> int:
        for i in range(len(self.cmdtoks)):
            if cmd[0] == self.cmdtoks[i][0]:
                return i
        return random.randint(0,len(self.cmdtoks)-1)
    
    def update_cmdvalues(self, cmd: List[int], reward):
        maxindex = self.get_index_of_template(cmd)
        self.cmdvalues[maxindex]  += reward
        print(self.tok_to_text(cmd))
        print(self.tok_to_text(self.cmdtoks[maxindex]))

    def choose_best_cmd(self,commands, recipe_tok) -> int:
        """
        Gets randomly one of the best rewarded admissible commands.
        Assigns value -1000 to commands that caused GAME OVER in last x games.
        """
        qvalues =  []
        for cmd in commands:
            template_index = self.get_index_of_template(cmd)
            if cmd not in self.failedcommands:
                qval = self.cmdvalues[template_index] + self.calc_similarity(cmd,recipe_tok)*10
            else:
                qval = -10000
            #print(str(self.tok_to_nlp(cmd)) + ": " +  str(self.calc_similarity(cmd,recipe_tok)))
            qvalues.append(qval)
        max_value = max(qvalues)
        max_commands = []
        for i in range(len(commands)):
            if max_value == qvalues[i]:
                max_commands.append(i)
        return random.choice(max_commands)
    
    def get_alternative_cmd(self, commands, recipe_tok) -> int:
        """
        Idea of an alternative command, if the current is used in the last 3 commands.
        Chooses the second best command.
        """
        qvalues =  []
        print("get alternative cmd")
        for cmd in commands:
            template_index = self.get_index_of_template(cmd)
            if cmd not in self.failedcommands:
                qval = self.cmdvalues[template_index] + self.calc_similarity(cmd,recipe_tok)*10
            else:
                qval = -10000
            qvalues.append(qval)
     
        first_max = max(qvalues[0],qvalues[1]) 
        second_max = min(qvalues[0],qvalues[1]) 

        for i in range(2,len(qvalues)): 
            if qvalues[i] > first_max: 
                second_max = first_max
                first_max=qvalues[i] 
            else: 
                if qvalues[i]>second_max: 
                    second_max=qvalues[i] 

        #print("Second highest number is : ",str(second_max)) 
        return qvalues.index(second_max)
        
                        
    def act(self, obs: str, score: int, done: bool, 
            infos: Dict[str, List[Any]]) -> Optional[List[str]]:
        """
        Acts upon the current list of observations.

        One text command must be returned for each observation.

        Arguments:
            obs: Previous command's feedback for each game.
            scores: The score obtained so far for each game.
            dones: Whether a game is finished.
            infos: Additional information for each game.

        Returns:
            Text commands to be performed (one per observation).
            If episode had ended (e.g. `all(dones)`), the returned
            value is ignored.

        Notes:
            Commands returned for games marked as `done` have no effect.
            The states for finished games are simply copy over until all
            games are done.
        """

        if not self._epsiode_has_started:
            self._start_episode(obs, infos)
        
        print(infos["inventory"])
        print("----------")
        #print(infos["entities"])
        
        print(self.process_inventory(infos["inventory"]))
        inv = self.process_inventory(infos["inventory"])
        descr = infos["description"]
        verbs = infos["verbs"]
        # drop examine and look from adm_cmds
        stopverbs = ["examine", "look", "inventory", "prepare", "go", "eat", "close", "drink"]
        new_verbs = [word for word in verbs if word not in stopverbs]
        #print("new_verbs: ", new_verbs)
        recipe = infos["extra.recipe"]
        
        
        #building a historylist of commands to get out of a loop of death o_o
        if self.command_history is not None:
            last_commands = self.command_history[-3:]
        
        #print(recipe)
       
        # creating adm cmds from recipe and description
        ac = AdmissibleCommands()
        nouns_list, op_list = ac.get_nouns_from_recipe(recipe)
        adm_cmds = ac.create_cmds(nouns_list, op_list, inv, descr, last_commands)
        
        #poss_cmds = ac.create_all_possible_commands(new_verbs, nouns_list)
        #adm_cmds = ac.create_adm_cmds(nouns_list, descr, poss_cmds)
        
        
        #choose best command based on "qvalues"
        cmds = []
        for i in range(len(adm_cmds)):
            cmds.append(self._tokenize(adm_cmds[i]))
        best_index = self.choose_best_cmd(cmds, self.recipe_tok)
        action = adm_cmds[best_index]
        
        
        
        #choose best command based on "qvalues"
#         cmds = []
#         for i in range(len(infos["admissible_commands"])):
#             cmds.append(self._tokenize(infos["admissible_commands"][i]))
        
#         #print(infos["admissible_commands"])
#         best_index = self.choose_best_cmd(cmds, self.recipe_tok)
#         action = infos["admissible_commands"][best_index]
        
            
        # compare-counters for comparing actions from the 3 last moves.
        if self.history_counter > self.moves:
            same_command_in_list = True 
            counter_use_same_command = 0
            """ 
            If the current admissible command (action) is one of the last 3 commands, the while-loop
            will be skipped. 
            If the same random chosen command is going 2 times through the while-loop, after tahat, the loops
            will be skipped, too.
            """
            while same_command_in_list is True and counter_use_same_command < 3:
                try:
                    if last_commands.index(action):
                        #best_index = self.choose_best_cmd(cmds)
                        best_index = self.get_alternative_cmd(cmds,  self.recipe_tok)
                        #action = infos["admissible_commands"][best_index]
                        action = adm_cmds[best_index]
                        counter_use_same_command += 1
                    else:
                        same_command_in_list = False
                except ValueError:
                    same_command_in_list = False
                    #print("last_commands List does not contain value")
        # adds valid action to the command-history
        self.command_history.append(action)
        
        if self.mode == "test":
            return action
        
        #train mode, counter update
        self.no_train_step += 1 
        reward = 0
        self.moves += 1
        self.history_counter += 1
        
        #calculate rewards
        if self.transitions:
            reward = score - self.last_score
            self.last_score = score
            if infos["has_won"]:
                reward += 5
                #if won a game, reward last 3 transitions
                if len(self.transitions) > 1:
                    self.update_cmdvalues(self.transitions[-2],5)
                if len(self.transitions) > 2:
                    self.update_cmdvalues(self.transitions[-3],5)
            if infos["has_lost"]:
                reward -= 5
                # --- made it worse ---: if lost a game, penalize last 3 transitions
                #if len(self.transitions) > 1:
                #    self.update_cmdvalues(self.transitions[-2],-3)
                #if len(self.transitions) > 2:
                #    self.update_cmdvalues(self.transitions[-3],-2)
                #if lost game, save command if steps < MAX_STEPS-1
                if self.moves < MAX_STEPS-1:
                    #memory of failed commands
                    if len(self.failedcommands) < self.max_fail_memory:
                        self.failedcommands.append(self.transitions[-1])
                    else:
                        self.failedcommands.pop(0)
                        self.failedcommands.append(self.transitions[-1])
            if reward == 0:
                reward -= 1
            # update rewards for command from last step
            self.update_cmdvalues(self.transitions[-1],reward)

        # Debug output
        if(self.no_train_step % 1 == 0):
            print("")
            print("OBS: ", obs)
            #print("Last 3 admissible commands: ", last_commands)
            print("train step:" + str(self.no_train_step))
            print("last reward:" + str(reward))
            command = "last command: "
            if self.transitions:
                for id in self.transitions[-1]:
                    command += " " + self._get_word_by_id(id)
                print(command)
            fcommand = "last failed command: "
            if self.failedcommands:
                for id in self.failedcommands[-1]:
                    fcommand += " " + self._get_word_by_id(id)
                print(fcommand)
            print(len(self.failedcommands))
        
        #save last command in order to calculate rewards in next step
        self.transitions.append(cmds[best_index])
        
        if done:
            if(infos["has_won"]):
                print("-------- WON GAME ----------")
            else:
                print("-------- LOST GAME ----------")
            print(" ")
            self.last_score = 0
            self.moves = 0
            self.transitions = []
            self.command_history = []
            self._end_episode(obs, score, infos)
            
        return action

In [63]:
class AdmissibleCommands():
    
    def __init__(self) -> None:
        print("Created AdmCmds Obj.")

    def get_nouns_from_description(self, descr):
        """
        Get all nouns from description for finding admissible commands.
        """
        descr_nlp = nlp(descr)
        noun_list = []
        last_word_list = []
        sentences = descr.split(".")
        adm_objs = []
        
        # get the last word of a sentence from descr. because they can be significant.
        # reason: some words will not be found by nlp.
        # puts them in a list
        for sentence in sentences:
            last_word_list.append(sentence.split(" ")[-1]) 
            
        # get all nouns from the description with nlp. also not neccessary nouns.
        for chunk in descr_nlp.noun_chunks:
            if chunk is not None:
                noun_list.append(chunk.root.text)

        # compares last words of a list with all nouns in description, relevant words will be saved in a list
        for word in last_word_list:
            if word in noun_list:
                adm_objs.append(word)
        adm_objs.remove('TextWorld') # need better solution
        adm_objs.remove('it') # need better solution
        #print("adm_objs: ", adm_objs)
                    
        return adm_objs

    def get_nouns_from_recipe(self, recipe):
        """
        Get all nouns from recipe for finding admissible commands.
        """
        rec = recipe.replace("\n", " ")
        rec = rec.split(":")
        
        recipe_nlp = nlp(rec[2]) # get relevant nouns
        noun_list = []
        
        #for chunk in recipe_nlp.ent:
        for chunk in recipe_nlp.noun_chunks:
            if chunk is not None:
                noun_list.append(chunk.text)
                
        noun_list = noun_list[:-1] # drops meal 
        noun_list = list(dict.fromkeys(noun_list)) # drops duplicates
        
        for idx, n in enumerate(noun_list):
            #if "the " in n:
            noun_list[idx] = n.replace("the ", "")
        
        print("nouns from recipe:", noun_list)
        
        op_list = self.get_operation_from_recipe(rec[2])
        
        return noun_list, op_list
    
    def get_operation_from_recipe(self, operations):
        """
        Some formations are needed to get severeal operations in a list.
        
        ops from recipe:  ['slice the red apple', 'roast the red apple', 
        'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
        """
        operation_list = operations.replace("   ", ",")
        operation_list = operation_list.split(",")
        operation_list.pop(0) # drops ""
        
        last_item = operation_list[len(operation_list)-1]
        last_item = last_item[:-1] # deletes unnecessary space in last listitem 
        operation_list[len(operation_list)-1] = last_item
        print("ops from recipe: ", operation_list)
        
        return operation_list
    
    
    def create_cmds(self, nouns, ops, inv, descr, last_commands):
        """
        create cmds maybe adm cmds with look at ops in recipe, inventory and description.
        """
        cmds = []
        
        if len(inv) == 0 and "knife" in descr: #first take knife.
            cmds.append("take knife")
            
        if "a knife" in inv:
            for n in nouns:
                if n in descr and n not in inv:
                    for o in ops:
                        if n in o and "slice" in o:
                            cmds.append("take " + n)     

    
        if last_commands is not None and len(last_commands) >= 1:
            for n in nouns:
                if n in descr and n not in inv:
                    cmds.append("take " + n)
                else:
                    for i in inv:
                        if n in i:
                            for idx,op in enumerate(ops):
                                if n in op:
                                    cmds.append(ops[idx])

                    
                           
#         for n in nouns:
#             # if all recepy-food is in inventory, add preparing 
#             for i in inv:
#                 if n in i:
#                     for idx,op in enumerate(ops):
#                         if n in op:
#                             cmds.append(ops[idx])

        
#         if last_commands is not None and len(inv) >= 1:
#             if last_commands[-1] not in "prepare meal":
#                 cmds.append("prepare meal")
#                 cmds.append("look") # just to avoid Indexerror
#             else: 
#                 cmds.append("eat meal")
#                 cmds.append("look") # just to avoid Indexerror
     
            cmds.append("prepare meal")
            cmds.append("eat meal")
            # todo: inventar voll mit knife und 2 ingredients. 
            # erst aufgaben mit knife erledigen und dann droppen
            
        cmds = list(dict.fromkeys(cmds)) # delete duplicates
        print("CMDS FROM CREATE_CMDS: ", cmds)        
        
        return cmds
    
    
    def create_all_possible_commands(self, verb_list, nouns_list):
        """
        Try to create possible commands. 

        put ... into ..., put ... on ..., take ... from ...
        """
        # without recipe but description, can be filtered out of the text. but this is another task
        commands = ['open fridge', 'close fridge', 'take knife', 'drop knife',
                    'go north', 'go east', 'go south', 'go west', 'prepare meal', 'eat meal']
        
        for verb in verb_list:
            for noun in nouns_list:
                #cmd = verb.lemma_ + " " + noun
                cmd = verb + " " + noun
                if cmd not in commands:
                    commands.append(cmd)
        
        #print("created poss. cmds: ", commands)
        
        return commands
    
    def create_adm_cmds(self, nouns_list, descr, poss_cmds):
        """
        Try to create admissible commands. 
        """
        adm_cmds = []
        for noun in nouns_list:
            if descr.find(noun) >= -1:
                for cmd in poss_cmds:
                    if noun in cmd:
                        adm_cmds.append(cmd)
        
        #print("adm_cmds: ", adm_cmds)
        
        return adm_cmds
    

## Training the Agent
- just training on the easiest game (3.0/3)

In [7]:
agent = CustomAgent()
agent.train()

In [64]:
starttime = time()
for i in range(2):
    print("-------------------------------------")
    print("GAME: " + str(i+1))
    print("-------------------------------------")
    play(agent, gamefiles[5])
print("Trained in {:.2f} secs".format(time() - starttime))


-------------------------------------
GAME: 1
-------------------------------------
tw-cooking-recipe3+take3+cook+cut+drop-vNZGCrQVhoJdt7Nx.ulxYou are carrying nothing.



----------
[]
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take knife']

train step:10484
last reward:0
0
You are carrying:
  a knife



----------
['a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'take red onion', 'take yellow potato', 'prepare meal', 'eat meal']
take knife 
take 

train step:10485
last reward:-1
last command:  take knife
0
You are carrying:

nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'take red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
slice the yellow potato 
slice 

train step:10498
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced raw yellow potato
  a knife



----------
['a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'take red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
fry the yellow potat

get alternative cmd
fry the yellow potato 
prepare 

train step:10511
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
slice the yellow potato 
slice 

train step:10512
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from 

slice the red onion 
slice 

train step:10524
last reward:-1
last command:  slice the red onion
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
slice the yellow potato 
slice 

train step:10525
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recip

get alternative cmd
get alternative cmd
slice the yellow potato 
slice 

train step:10538
last reward:1
last command:  slice the yellow potato
0
You are carrying:
  a sliced raw yellow potato
  a knife



----------
['a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'take red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
fry the yellow potato 
cook 

train step:10539
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a red onion
  a sliced raw yellow potato
  a knife



----------
['a red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recip

fry the yellow potato 
insert 

train step:10551
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
slice the yellow potato 
slice 

train step:10552
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple',

get alternative cmd
get alternative cmd
slice the yellow potato 
slice 

train step:10564
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
slice the yellow potato 
slice 

train step:10565
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red appl

last command:  slice the red onion
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
fry the yellow potato 
look 

train step:10578
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red app

fry the yellow potato 
prepare 

train step:10591
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a red onion
  a sliced raw yellow potato
  a knife



----------
['a red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
get alternative cmd
take red apple 
take 

train step:10592
last reward:-1
last command:  take red apple
0
You are carrying:
  a red onion
  a sliced raw yellow potato
  a knife



----------
['a red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow pota

get alternative cmd
fry the yellow potato 
prepare 

train step:10604
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
get alternative cmd
take red apple 
take 

train step:10605
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from

slice the yellow potato 
slice 

train step:10617
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
fry the yellow potato 
take 

train step:10618
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from re

get alternative cmd
slice the yellow potato 
slice 

train step:10630
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
take red apple 
take 

train step:10631
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops f

take red apple 
take 

train step:10644
last reward:-1
last command:  take red apple
0
You are carrying:
  a red onion
  a raw yellow potato
  a knife



----------
['a red onion', 'a raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
get alternative cmd
take red apple 
take 

train step:10645
last reward:-1
last command:  take red apple
0
You are carrying:
  a red onion
  a raw yellow potato
  a knife



----------
['a red onion', 'a raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple'

get alternative cmd
fry the yellow potato 
lock 

train step:10657
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
take red apple 
take 

train step:10658
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow

fry the yellow potato 
take 

train step:10670
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
take red apple 
take 

train step:10671
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from r

take red apple 
take 

train step:10683
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
take red apple 
take 

train step:10684
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'r

get alternative cmd
slice the red onion 
slice 

train step:10697
last reward:1
last command:  slice the red onion
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
slice the yellow potato 
slice 

train step:10698
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow pota

take red apple 
take 

train step:10710
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
take red apple 
take 

train step:10711
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'r

fry the yellow potato 
close 

train step:10723
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
take red apple 
take 

train step:10724
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from 

get alternative cmd
take red apple 
take 

train step:10736
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
slice the yellow potato 
slice 

train step:10737
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops f

take red apple 
take 

train step:10750
last reward:-1
last command:  take red apple
0
You are carrying:
  a red onion
  a sliced raw yellow potato
  a knife



----------
['a red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
fry the yellow potato 
put 

train step:10751
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a red onion
  a sliced raw yellow potato
  a knife



----------
['a red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  [

get alternative cmd
get alternative cmd
fry the yellow potato 
prepare 

train step:10764
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
slice the yellow potato 
slice 

train step:10765
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmC

fry the yellow potato 
drink 

train step:10777
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
get alternative cmd
take red apple 
take 

train step:10778
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple',

You are carrying:
  a raw yellow potato
  a knife



----------
['a raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'take red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
take yellow potato 
take 

train step:10792
last reward:1
last command:  take yellow potato
0
You are carrying:
  a raw yellow potato
  a knife



----------
['a raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'take red onion', 'slice the yellow potato',

slice the yellow potato 
slice 

train step:10804
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
fry the yellow potato 
take 

train step:10805
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow 

get alternative cmd
fry the yellow potato 
chop 

train step:10817
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
slice the yellow potato 
slice 

train step:10818
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', '

get alternative cmd
get alternative cmd
get alternative cmd
take red apple 
take 

train step:10830
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
take red apple 
take 

train step:10831
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', '

fry the yellow potato 
lock 

train step:10844
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced raw yellow potato
  a knife



----------
['a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'take red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
get alternative cmd
slice the yellow potato 
slice 

train step:10845
last reward:1
last command:  slice the yellow potato
0
You are carrying:
  a sliced raw yellow potato
  a knife



----------
['a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', '

get alternative cmd
roast the red apple 
dice 

train step:10857
last reward:-1
last command:  roast the red apple
0
You are carrying:
  a red apple
  a sliced raw yellow potato
  a knife



----------
['a red apple', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red onion', 'slice the red apple', 'roast the red apple', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
take red onion 
take 

train step:10858
last reward:-1
last command:  take red onion
0
You are carrying:
  a red apple
  a sliced raw yellow potato
  a knife



----------
['a red apple', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onio

roast the red apple 
unlock 

train step:10870
last reward:-1
last command:  roast the red apple
0
You are carrying:
  a red apple
  a sliced raw yellow potato
  a knife



----------
['a red apple', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red onion', 'slice the red apple', 'roast the red apple', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
fry the yellow potato 
take 

train step:10871
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a red apple
  a sliced raw yellow potato
  a knife



----------
['a red apple', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops

take red onion 
take 

train step:10883
last reward:-1
last command:  take red onion
0
You are carrying:
  a red apple
  a sliced raw yellow potato
  a knife



----------
['a red apple', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red onion', 'slice the red apple', 'roast the red apple', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
fry the yellow potato 
drop 

train step:10884
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a red apple
  a sliced raw yellow potato
  a knife



----------
['a red apple', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red a

take red onion 
take 

train step:10897
last reward:1
last command:  take red onion
0
You are carrying:
  a red onion
  a sliced raw yellow potato
  a knife



----------
['a red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
slice the yellow potato 
slice 

train step:10898
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a red onion
  a sliced raw yellow potato
  a knife



----------
['a red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the r

take red apple 
take 

train step:10910
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
slice the yellow potato 
slice 

train step:10911
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice

get alternative cmd
get alternative cmd
take red apple 
take 

train step:10923
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
take red apple 
take 

train step:10924
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops 

You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
fry the yellow potato 
lock 

train step:10937
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice th

get alternative cmd
get alternative cmd
slice the yellow potato 
slice 

train step:10950
last reward:1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
slice the red onion 
slice 

train step:10951
last reward:1
last command:  slice the red onion
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red o

get alternative cmd
get alternative cmd
slice the yellow potato 
slice 

train step:10963
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
slice the red onion 
slice 

train step:10964
last reward:-1
last command:  slice the red onion
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red

get alternative cmd
fry the yellow potato 
slice 

train step:10977
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
get alternative cmd
take red apple 
take 

train step:10978
last reward:-1
last command:  take red apple
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from r

fry the yellow potato 
chop 

train step:10990
last reward:-1
last command:  fry the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from recipe: ['red apple', 'red onion', 'yellow potato']
ops from recipe:  ['slice the red apple', 'roast the red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal']
CMDS FROM CREATE_CMDS:  ['take red apple', 'slice the red onion', 'slice the yellow potato', 'fry the yellow potato', 'prepare meal', 'eat meal']
get alternative cmd
get alternative cmd
get alternative cmd
slice the yellow potato 
slice 

train step:10991
last reward:-1
last command:  slice the yellow potato
0
You are carrying:
  a sliced red onion
  a sliced raw yellow potato
  a knife



----------
['a sliced red onion', 'a sliced raw yellow potato', 'a knife']
Created AdmCmds Obj.
nouns from rec

In [10]:
for i in range(len(agent.cmdvalues)):
    print( agent.tok_to_text(agent.cmdtoks[i]) + " : " + str(agent.cmdvalues[i]))

chop  : 0
close  : -22
cook  : -7
dice  : 0
drink  : 0
drop  : -9
eat  : 88
examine  : -23
go  : 0
insert  : -8
inventory  : 0
lock  : 0
look  : -22
open  : -17
prepare  : 93
put  : -9
slice  : 0
take  : 72
unlock  : 0


In [11]:
# testing text similarity
tok1= agent._tokenize("Cook a meal.")
tok2= agent._tokenize("Prepare the food.")
tok3= agent._tokenize("Germany is a country.")
sim1 = agent.calc_similarity(tok1,tok1)
sim2 = agent.calc_similarity(tok1,tok2)
sim3 = agent.calc_similarity(tok1,tok3)
print(sim1) #same text
print(sim2) #somewhat similar meaning
print(sim3) # very different

1.0
0.4282114982604981
0.13777608871459956


In [66]:
agent.test()
play(agent, gamefiles[5])

tw-cooking-recipe1+take1-11Oeig8bSVdGSp78.ulx..........  	avg. steps:   3.0; avg. score:  3.0 / 3.


### More difficult game (no navigation)
- pretty bad (2.1/6)

In [12]:
agent.train()
starttime = time()
for i in range(2):
    print("-------------------------------------")
    print("GAME: " + str(i+1))
    print("-------------------------------------")
    play(agent, gamefiles[8])
print("Trained in {:.2f} secs".format(time() - starttime))

-------------------------------------
GAME: 1
-------------------------------------
tw-cooking-recipe2+take2+cut+open-BnYEixa9iJKmFZxO.ulx
Recipe #1
---------
Gather all following ingredients and follow the directions to prepare this tasty meal.

Ingredients:
  yellow bell pepper
  yellow potato

Directions:
  slice the yellow bell pepper
  fry the yellow bell pepper
  dice the yellow potato
  fry the yellow potato
  prepare meal

['examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine purple potato', 'examine red hot pepper', 'examine red potato', 'examine stove', 'examine table', 'examine yellow potato', 'look', 'open fridge', 'take cookbook from counter', 'take knife from table', 'take purple potato from counter', 'take red hot pepper from counter', 'take red potato from counter', 'take yellow potato from counter']

train step:1
last reward:0
0
['cook yellow potato with oven', 'cook yellow potato with stove', 'drop yellow potato', 'eat yel

take knife from table 
take 

train step:11
last reward:-1
last command:  take knife from table
last failed command:  eat yellow potato
1
['chop purple potato with knife', 'chop red potato with knife', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice purple potato with knife', 'dice red potato with knife', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', '

take knife from table 
take 

train step:20
last reward:-1
last command:  take knife from table
last failed command:  slice yellow potato with knife
2
['chop purple potato with knife', 'chop red potato with knife', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice purple potato with knife', 'dice red potato with knife', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato

take red hot pepper from counter 
take 

train step:29
last reward:-1
last command:  take red hot pepper from counter
last failed command:  cook yellow potato with oven
3
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table', 'take cookbook from counter', 'take knife from table']
eat red hot pepper 
eat 

train step:30
last reward:-1
last command:  eat red hot pepp

take red hot pepper from counter 
take 

train step:38
last reward:-1
last command:  take red hot pepper from counter
last failed command:  chop yellow potato with knife
4
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table', 'take cookbook from counter', 'take knife from table']
eat red hot pepper 
eat 

train step:39
last reward:-1
last command:  eat red hot pep

put yellow potato on counter 
put 

train step:47
last reward:-1
last command:  put yellow potato on counter
last failed command:  chop yellow potato with knife
4
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table']
take yello

take yellow potato from counter 
take 

train step:56
last reward:-1
last command:  take yellow potato from counter
last failed command:  chop yellow potato with knife
4
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'examine yellow potato', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'take yellow potato']
drop yellow potato 
drop 

train step:57
last reward:-1
last command:  drop yellow potato
last failed command:  chop yellow potato with knife
4
['cook purpl

take red hot pepper from counter 
take 

train step:66
last reward:-1
last command:  take red hot pepper from counter
last failed command:  cook yellow potato with stove
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table', 'take cookbook from counter', 'take knife from table']
eat red hot pepper 
eat 

train step:67
last reward:-1
last command:  eat red hot pep

put yellow potato on counter 
put 

train step:75
last reward:-1
last command:  put yellow potato on counter
last failed command:  cook yellow potato with stove
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table']
take yello

take yellow potato from counter 
take 

train step:84
last reward:-1
last command:  take yellow potato from counter
last failed command:  cook yellow potato with stove
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'examine yellow potato', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'take yellow potato']
drop yellow potato 
drop 

train step:85
last reward:-1
last command:  drop yellow potato
last failed command:  cook yellow potato with stove
5
['cook purpl

cook red potato with oven 
cook 

train step:93
last reward:-1
last command:  cook red potato with oven
last failed command:  cook yellow potato with stove
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table']
eat red potato 
eat 

train step:94
last reward:-1
last command:  eat red potato
last failed command:  cook yellow potato with stove
5
['cook purple potato with oven', 'cook pu

open fridge 
open 

train step:105
last reward:-1
last command:  open fridge
last failed command:  cook yellow potato with stove
5
['chop yellow bell pepper with knife', 'close fridge', 'cook yellow bell pepper with oven', 'cook yellow bell pepper with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice yellow bell pepper with knife', 'drop cookbook', 'drop knife', 'drop yellow bell pepper', 'drop yellow potato', 'eat yellow bell pepper', 'eat yellow potato', 'examine cilantro', 'examine counter', 'examine fridge', 'examine orange bell pepper', 'examine oven', 'examine stove', 'examine table', 'insert cookbook into fridge', 'insert knife into fridge', 'insert yellow bell pepper into fridge', 'insert yellow potato into fridge', 'look', 'prepare meal', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put yellow bell pepper on counter', 'put yellow bell pepper on sto

take purple potato from counter 
take 

train step:115
last reward:-1
last command:  take purple potato from counter
last failed command:  eat yellow potato
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red hot pepper with oven', 'cook red hot pepper with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop purple potato', 'drop red hot pepper', 'drop red potato', 'drop yellow potato', 'eat red hot pepper', 'eat yellow potato', 'examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red hot pepper on counter', 'put red hot pepper on stove', 'put red hot pepper on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put y

cook red potato with oven 
cook 

train step:123
last reward:-1
last command:  cook red potato with oven
last failed command:  eat yellow potato
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table']
eat red potato 
eat 

train step:124
last reward:-1
last command:  eat red potato
last failed command:  eat yellow potato
5
['cook purple potato with oven', 'cook purple potato with stove

alternative
take orange bell pepper from fridge 
take 

train step:132
last reward:-1
last command:  take orange bell pepper from fridge
last failed command:  eat yellow potato
5
['chop yellow bell pepper with knife', 'close fridge', 'cook purple potato with oven', 'cook purple potato with stove', 'cook yellow bell pepper with oven', 'cook yellow bell pepper with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice yellow bell pepper with knife', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop yellow bell pepper', 'drop yellow potato', 'eat yellow bell pepper', 'eat yellow potato', 'examine cilantro', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'insert cookbook into fridge', 'insert knife into fridge', 'insert purple potato into fridge', 'insert yellow bell pepper into fridge', 'insert yellow potato into fridge', 'look', 'prepare meal', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on ta

alternative
dice yellow bell pepper with knife 
dice 

train step:138
last reward:-5
last command:  dice yellow bell pepper with knife
last failed command:  dice yellow bell pepper with knife
5
-------- LOST GAME ----------
 
.['examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine purple potato', 'examine red hot pepper', 'examine red potato', 'examine stove', 'examine table', 'examine yellow potato', 'look', 'open fridge', 'take cookbook from counter', 'take knife from table', 'take purple potato from counter', 'take red hot pepper from counter', 'take red potato from counter', 'take yellow potato from counter']

train step:139
last reward:0
last failed command:  dice yellow bell pepper with knife
5
['cook yellow potato with oven', 'cook yellow potato with stove', 'drop yellow potato', 'eat yellow potato', 'examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine purple potato', 'examine red hot pepp

slice yellow potato with knife 
slice 

train step:147
last reward:-5
last command:  slice yellow potato with knife
last failed command:  slice yellow potato with knife
5
-------- LOST GAME ----------
 
.['examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine purple potato', 'examine red hot pepper', 'examine red potato', 'examine stove', 'examine table', 'examine yellow potato', 'look', 'open fridge', 'take cookbook from counter', 'take knife from table', 'take purple potato from counter', 'take red hot pepper from counter', 'take red potato from counter', 'take yellow potato from counter']

train step:148
last reward:0
last failed command:  slice yellow potato with knife
5
['cook yellow potato with oven', 'cook yellow potato with stove', 'drop yellow potato', 'eat yellow potato', 'examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine purple potato', 'examine red hot pepper', 'examine red potato', 

chop yellow potato with knife 
chop 

train step:156
last reward:-5
last command:  chop yellow potato with knife
last failed command:  chop yellow potato with knife
5
-------- LOST GAME ----------
 
.  	avg. steps:  14.6; avg. score:  1.6 / 6.
-------------------------------------
GAME: 2
-------------------------------------
tw-cooking-recipe2+take2+cut+open-BnYEixa9iJKmFZxO.ulx['examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine purple potato', 'examine red hot pepper', 'examine red potato', 'examine stove', 'examine table', 'examine yellow potato', 'look', 'open fridge', 'take cookbook from counter', 'take knife from table', 'take purple potato from counter', 'take red hot pepper from counter', 'take red potato from counter', 'take yellow potato from counter']

train step:157
last reward:0
last failed command:  chop yellow potato with knife
5
['cook yellow potato with oven', 'cook yellow potato with stove', 'drop yellow potato', 'eat ye

dice yellow potato with knife 
dice 

train step:165
last reward:1
last command:  dice yellow potato with knife
last failed command:  chop yellow potato with knife
5
['chop purple potato with knife', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice purple potato with knife', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', '

take cookbook from counter 
take 

train step:175
last reward:-1
last command:  take cookbook from counter
last failed command:  cook yellow potato with oven
5
['chop purple potato with knife', 'chop red potato with knife', 'chop yellow potato with knife', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice purple potato with knife', 'dice red potato with knife', 'dice yellow potato with knife', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on tabl

open fridge 
open 

train step:184
last reward:-1
last command:  open fridge
last failed command:  cook yellow potato with oven
5
['chop yellow bell pepper with knife', 'close fridge', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow bell pepper with oven', 'cook yellow bell pepper with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice yellow bell pepper with knife', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow bell pepper', 'drop yellow potato', 'eat yellow bell pepper', 'eat yellow potato', 'examine cilantro', 'examine counter', 'examine fridge', 'examine orange bell pepper', 'examine oven', 'examine stove', 'examine table', 'insert cookbook into fridge', 'insert knife into fridge', 'insert purple potato into fridge', 'insert red potato into fridge', 'insert yellow bell pepper into fridge', 'insert yellow potato into fridge', 'lo

take red potato from counter 
take 

train step:193
last reward:-1
last command:  take red potato from counter
last failed command:  eat yellow potato
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine red hot pepper', 'examine stove', 'examine table', 'look', 'open fridge', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table', 'take cookbook from counter', 'take knife from table', 'take red hot pepper from counter']
take purple potato from counter 
take 

tr

dice purple potato with knife 
dice 

train step:201
last reward:-1
last command:  dice purple potato with knife
last failed command:  eat yellow potato
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'examine yellow potato', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'take yellow potato']
drop yellow potato 
drop 

train step:202
last reward:-1
last command:  drop yellow potato
last failed command:  eat yellow potato
5
['cook purple potato with oven', 'cook

alternative
prepare meal 
prepare 

train step:208
last reward:-1
last command:  prepare meal
last failed command:  eat yellow potato
5
['chop orange bell pepper with knife', 'chop yellow bell pepper with knife', 'close fridge', 'cook orange bell pepper with oven', 'cook orange bell pepper with stove', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow bell pepper with oven', 'cook yellow bell pepper with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice orange bell pepper with knife', 'dice yellow bell pepper with knife', 'drop cookbook', 'drop knife', 'drop orange bell pepper', 'drop purple potato', 'drop red potato', 'drop yellow bell pepper', 'drop yellow potato', 'eat orange bell pepper', 'eat yellow bell pepper', 'eat yellow potato', 'examine cilantro', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'insert cookbook into fridge

eat cilantro 
eat 

train step:213
last reward:-1
last command:  eat cilantro
last failed command:  eat yellow potato
5
['chop yellow bell pepper with knife', 'close fridge', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow bell pepper with oven', 'cook yellow bell pepper with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice yellow bell pepper with knife', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow bell pepper', 'drop yellow potato', 'eat yellow bell pepper', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'insert cookbook into fridge', 'insert knife into fridge', 'insert purple potato into fridge', 'insert red potato into fridge', 'insert yellow bell pepper into fridge', 'insert yellow potato into fridge', 'look', 'prepare meal', 'put cookbook on counter', 'put cookboo

take yellow bell pepper from fridge 
take 

train step:219
last reward:-1
last command:  take yellow bell pepper from fridge
last failed command:  eat yellow potato
5
['chop yellow bell pepper with knife', 'close fridge', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow bell pepper with oven', 'cook yellow bell pepper with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice yellow bell pepper with knife', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow bell pepper', 'drop yellow potato', 'eat yellow bell pepper', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'insert cookbook into fridge', 'insert knife into fridge', 'insert purple potato into fridge', 'insert red potato into fridge', 'insert yellow bell pepper into fridge', 'insert yellow potato into fridge', 'look', 'prepare

eat red hot pepper 
eat 

train step:227
last reward:-1
last command:  eat red hot pepper
last failed command:  chop yellow bell pepper with knife
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop cookbook', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table', 'take knife from table']
take cookbook from counter 
take 

train step:228
last reward:-1

eat red hot pepper 
eat 

train step:236
last reward:-1
last command:  eat red hot pepper
last failed command:  slice yellow potato with knife
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop cookbook', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table', 'take knife from table']
take cookbook from counter 
take 

train step:237
last reward:-1
las

take yellow potato from counter 
take 

train step:245
last reward:-1
last command:  take yellow potato from counter
last failed command:  slice yellow potato with knife
5
['close fridge', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine cilantro', 'examine counter', 'examine fridge', 'examine orange bell pepper', 'examine oven', 'examine stove', 'examine table', 'examine yellow bell pepper', 'insert cookbook into fridge', 'insert knife into fridge', 'insert purple potato into fridge', 'insert red potato into fridge', 'insert yellow potato into fridge', 'look', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on c

eat orange bell pepper 
eat 

train step:250
last reward:-1
last command:  eat orange bell pepper
last failed command:  slice yellow potato with knife
5
['chop yellow bell pepper with knife', 'close fridge', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow bell pepper with oven', 'cook yellow bell pepper with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice yellow bell pepper with knife', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow bell pepper', 'drop yellow potato', 'eat yellow bell pepper', 'eat yellow potato', 'examine cilantro', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'insert cookbook into fridge', 'insert knife into fridge', 'insert purple potato into fridge', 'insert red potato into fridge', 'insert yellow bell pepper into fridge', 'insert yellow potato into fridge', 'look', 'p

take yellow bell pepper from fridge 
take 

train step:256
last reward:-1
last command:  take yellow bell pepper from fridge
last failed command:  slice yellow potato with knife
5
['chop yellow bell pepper with knife', 'close fridge', 'cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow bell pepper with oven', 'cook yellow bell pepper with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'dice yellow bell pepper with knife', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'drop yellow bell pepper', 'drop yellow potato', 'eat yellow bell pepper', 'eat yellow potato', 'examine cilantro', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'insert cookbook into fridge', 'insert knife into fridge', 'insert purple potato into fridge', 'insert red potato into fridge', 'insert yellow bell pepper into fridge', 'insert yellow pota

eat red hot pepper 
eat 

train step:264
last reward:-1
last command:  eat red hot pepper
last failed command:  dice yellow bell pepper with knife
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop cookbook', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table', 'take knife from table']
take cookbook from counter 
take 

train step:265
last reward:-1

eat red hot pepper 
eat 

train step:273
last reward:-1
last command:  eat red hot pepper
last failed command:  chop yellow potato with knife
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop cookbook', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table', 'take knife from table']
take cookbook from counter 
take 

train step:274
last reward:-1
last

take yellow potato 
take 

train step:281
last reward:-1
last command:  take yellow potato
last failed command:  chop yellow potato with knife
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'examine yellow potato', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'take yellow potato from counter']
put yellow potato on counter 
put 

train step:282
last reward:-1
last command:  put yellow potato on counter
last failed command:  chop yellow potato with knife
5
['co

take yellow bell pepper from fridge 
take 

train step:290
last reward:1
last command:  take yellow bell pepper from fridge
last failed command:  chop yellow potato with knife
5
['close fridge', 'cook yellow bell pepper with oven', 'cook yellow bell pepper with stove', 'drop yellow bell pepper', 'eat yellow bell pepper', 'examine cilantro', 'examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine orange bell pepper', 'examine oven', 'examine purple potato', 'examine red potato', 'examine stove', 'examine table', 'examine yellow potato', 'insert yellow bell pepper into fridge', 'look', 'prepare meal', 'put yellow bell pepper on counter', 'put yellow bell pepper on stove', 'put yellow bell pepper on table', 'take cilantro from fridge', 'take cookbook from fridge', 'take knife from fridge', 'take orange bell pepper from fridge', 'take purple potato from fridge', 'take red potato from fridge', 'take yellow potato from fridge']
alternative
prepare meal 
prepare 

t

eat red hot pepper 
eat 

train step:301
last reward:-1
last command:  eat red hot pepper
last failed command:  eat yellow potato
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'cook yellow potato with oven', 'cook yellow potato with stove', 'drop cookbook', 'drop purple potato', 'drop red potato', 'drop yellow potato', 'eat yellow potato', 'examine counter', 'examine fridge', 'examine knife', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'put yellow potato on counter', 'put yellow potato on stove', 'put yellow potato on table', 'take knife from table']
take cookbook from counter 
take 

train step:302
last reward:-1
last command:  t

take yellow potato 
take 

train step:310
last reward:-1
last command:  take yellow potato
last failed command:  eat yellow potato
5
['cook purple potato with oven', 'cook purple potato with stove', 'cook red potato with oven', 'cook red potato with stove', 'drop cookbook', 'drop knife', 'drop purple potato', 'drop red potato', 'examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'examine yellow potato', 'look', 'open fridge', 'put cookbook on counter', 'put cookbook on stove', 'put cookbook on table', 'put knife on counter', 'put knife on stove', 'put knife on table', 'put purple potato on counter', 'put purple potato on stove', 'put purple potato on table', 'put red potato on counter', 'put red potato on stove', 'put red potato on table', 'take yellow potato from counter']
put yellow potato on counter 
put 

train step:311
last reward:-1
last command:  put yellow potato on counter
last failed command:  eat yellow potato
5
['cook purple potato with ov

insert knife into fridge 
insert 

train step:319
last reward:-1
last command:  insert knife into fridge
last failed command:  eat yellow potato
5
['examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge']
close fridge 
close 

train step:320
last reward:-1
last command:  close fridge
last failed command:  eat yellow potato
5
['examine counter', 'examine fridge', 'examine oven', 'examine stove', 'examine table', 'look', 'open fridge']
alternative
look 
look 

train step:321
last reward:-1
last command:  look
last failed command:  eat yellow potato
5
['close fridge', 'examine cilantro', 'examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine orange bell pepper', 'examine oven', 'examine purple potato', 'examine red potato', 'examine stove', 'examine table', 'examine yellow bell pepper', 'examine yellow potato', 'look', 'take cilantro from fridge', 'take cookbook from fridge', 'take knife from fridge', 'take o

take yellow bell pepper from fridge 
take 

train step:331
last reward:-1
last command:  take yellow bell pepper from fridge
last failed command:  eat yellow potato
5
['close fridge', 'cook yellow bell pepper with oven', 'cook yellow bell pepper with stove', 'drop yellow bell pepper', 'eat yellow bell pepper', 'examine cilantro', 'examine cookbook', 'examine counter', 'examine fridge', 'examine knife', 'examine orange bell pepper', 'examine oven', 'examine purple potato', 'examine red potato', 'examine stove', 'examine table', 'examine yellow potato', 'insert yellow bell pepper into fridge', 'look', 'prepare meal', 'put yellow bell pepper on counter', 'put yellow bell pepper on stove', 'put yellow bell pepper on table', 'take cilantro from fridge', 'take cookbook from fridge', 'take knife from fridge', 'take orange bell pepper from fridge', 'take purple potato from fridge', 'take red potato from fridge', 'take yellow potato from fridge']
alternative
prepare meal 
prepare 

train step:3

### Game with Navigation 
- really bad (0.2/5)

In [64]:
agent.train()
starttime = time()
for i in range(2):
    print("-------------------------------------")
    print("GAME: " + str(i+1))
    print("-------------------------------------")
    play(agent, gamefiles[1])
print("Trained in {:.2f} secs".format(time() - starttime))

-------------------------------------
GAME: 1
-------------------------------------
tw-cooking-recipe3+cook+cut+drop+go12-B10qS58OTbe0T8Qk.ulx
Recipe #1
---------
Gather all following ingredients and follow the directions to prepare this tasty meal.

Ingredients:
  red hot pepper
  salt
  yellow potato

Directions:
  slice the red hot pepper
  slice the yellow potato
  grill the yellow potato
  prepare meal

['drop red hot pepper', 'drop red onion', 'drop red potato', 'drop salt', 'drop yellow bell pepper', 'drop yellow potato', 'eat red hot pepper', 'eat red onion', 'eat salt', 'eat yellow bell pepper', 'go south', 'look']

train step:1
last reward:0
0
['drop red onion', 'drop red potato', 'drop salt', 'drop yellow bell pepper', 'drop yellow potato', 'eat red onion', 'eat salt', 'eat yellow bell pepper', 'go south', 'look']
eat red hot pepper 
eat 

train step:2
last reward:-5
last command:  eat red hot pepper
last failed command:  eat red hot pepper
1
-------- LOST GAME ----------
 



train step:22
last reward:0
last failed command:  eat salt
4
['close barn door', 'close sliding patio door', 'cook red hot pepper with BBQ', 'cook red onion with BBQ', 'cook red potato with BBQ', 'cook yellow bell pepper with BBQ', 'cook yellow potato with BBQ', 'drop red hot pepper', 'drop red onion', 'drop red potato', 'drop salt', 'drop yellow bell pepper', 'drop yellow potato', 'eat red hot pepper', 'eat red onion', 'eat salt', 'eat yellow bell pepper', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look', 'put red hot pepper on patio chair', 'put red hot pepper on patio table', 'put red onion on patio chair', 'put red onion on patio table', 'put red potato on patio chair', 'put red potato on patio table', 'put salt on patio chair', 'put salt on patio table', 'put yellow bell pepper on patio chair', 'put yellow bell pepper on patio table', 'put yellow potato on patio chair', 'put yellow potato on patio table']
go south 
go 

train 

go north 
go 

train step:41
last reward:-1
last command:  go north
last failed command:  eat salt
5
['close front door', 'drop red hot pepper', 'drop red potato', 'drop salt', 'drop yellow potato', 'eat red hot pepper', 'eat salt', 'examine sofa', 'go east', 'go north', 'go west', 'look', 'put red hot pepper on sofa', 'put red potato on sofa', 'put salt on sofa', 'put yellow potato on sofa']
go south 
go 

train step:42
last reward:-1
last command:  go south
last failed command:  eat salt
5
['close front door', 'examine sofa', 'go east', 'go north', 'go west', 'look']
eat salt 
eat 

train step:43
last reward:-5
last command:  eat salt
last failed command:  eat salt
5
-------- LOST GAME ----------
 
.['drop red hot pepper', 'drop red onion', 'drop red potato', 'drop salt', 'drop yellow bell pepper', 'drop yellow potato', 'eat red hot pepper', 'eat red onion', 'eat salt', 'eat yellow bell pepper', 'go south', 'look']

train step:44
last reward:0
last failed command:  eat salt
5
['close

take red hot pepper 
take 

train step:63
last reward:-1
last command:  take red hot pepper
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'cook red onion with BBQ', 'cook red potato with BBQ', 'cook yellow bell pepper with BBQ', 'cook yellow potato with BBQ', 'drop red onion', 'drop red potato', 'drop salt', 'drop yellow bell pepper', 'drop yellow potato', 'eat red onion', 'eat salt', 'eat yellow bell pepper', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look', 'put red onion on patio chair', 'put red onion on patio table', 'put red potato on patio chair', 'put red potato on patio table', 'put salt on patio chair', 'put salt on patio table', 'put yellow bell pepper on patio chair', 'put yellow bell pepper on patio table', 'put yellow potato on patio chair', 'put yellow potato on patio table']
go south 
go 

train step:64
last reward:-1
last command:  go south
last failed command:  eat red h

take yellow potato 
take 

train step:75
last reward:-1
last command:  take yellow potato
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'cook red onion with BBQ', 'cook yellow bell pepper with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'examine red potato', 'examine yellow potato', 'go east', 'go north', 'go south', 'look', 'take red potato from patio table', 'take yellow potato']
take red potato from patio table 
take 

train step:76
last reward:-1
last command:  take red potato from patio table
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'cook red onion with BBQ', 'cook yellow bell pepper with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'examine red potato', 'examine yellow potato', 'go east', 'go north', 'go south', 'look', 'take red potato from patio table', 'take yellow potato']
take red potato from patio table 
take 

train step:77
last reward

cook yellow bell pepper with BBQ 
cook 

train step:95
last reward:-1
last command:  cook yellow bell pepper with BBQ
last failed command:  eat red hot pepper
5
['close sliding patio door', 'cook red onion with BBQ', 'cook yellow bell pepper with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'examine red potato', 'examine yellow potato', 'go north', 'go south', 'look', 'open barn door', 'take red potato from patio table', 'take yellow potato']
close barn door 
close 

train step:96
last reward:-1
last command:  close barn door
last failed command:  eat red hot pepper
5
['cook red onion with BBQ', 'cook yellow bell pepper with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'examine red potato', 'examine yellow potato', 'go north', 'look', 'open barn door', 'open sliding patio door', 'take red potato from patio table', 'take yellow potato']
close sliding patio door 
close 

train step:97
last reward:-1
last command:  close sliding patio door
last 

go north 
go 

train step:119
last reward:-1
last command:  go north
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'cook red onion with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
cook yellow bell pepper with BBQ 
cook 

train step:120
last reward:-1
last command:  cook yellow bell pepper with BBQ
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
cook red onion with BBQ 
cook 

train step:121
last reward:-1
last command:  cook red onion with BBQ
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
look 
look 

train step:122
last reward:-1
last command:  look
last failed command:  eat red hot pe

go north 
go 

train step:150
last reward:-1
last command:  go north
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
look 
look 

train step:151
last reward:-1
last command:  look
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
examine BBQ 
examine 

train step:152
last reward:-1
last command:  examine BBQ
last failed command:  eat red hot pepper
5
['close barn door', 'close sliding patio door', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
examine patio table 
examine 

train step:153
last reward:-1
last command:  examine patio table
last failed command:  eat red hot pepper
5
-------- LOST GAME ----------
 
.['drop red hot pepper', 'drop r

examine red hot pepper 
examine 

train step:180
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook red potato with BBQ', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go south 
go 

train step:181
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook red potato with BBQ', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:182
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['cook red potato with BBQ', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'look', 'open barn door', 'open sliding pati

close barn door 
close 

train step:207
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'look', 'open barn door', 'open sliding patio door']
close sliding patio door 
close 

train step:208
last reward:-1
last command:  close sliding patio door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'look', 'open sliding patio door']
open barn door 
open 

train step:209
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open sliding patio door 
open 

train step:210
last reward:-

take red potato 
take 

train step:236
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
examine red hot pepper 
examine 

train step:237
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
examine red potato 
examine 

train step:238
last reward:-1
last command:  examine red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
look 
look 

train step:239
last reward:-1
last command:  look
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take

close barn door 
close 

train step:265
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'look', 'open barn door', 'open sliding patio door']
close sliding patio door 
close 

train step:266
last reward:-1
last command:  close sliding patio door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'look', 'open sliding patio door']
open barn door 
open 

train step:267
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open sliding patio door 
open 

train step:268
last reward:-

close barn door 
close 

train step:295
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:296
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'go east', 'go north', 'go south', 'go west', 'look']
go south 
go 

train step:297
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go north 
go 

train step:298
last reward:-1
last command:  go north
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook y

close barn door 
close 

train step:323
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:324
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'go east', 'go north', 'go south', 'go west', 'look']
go south 
go 

train step:325
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go north 
go 

train step:326
last reward:-1
last command:  go north
last failed command:  cook yellow potato with BBQ
5
-------- LOST GAME ----------
 
.['d

open barn door 
open 

train step:353
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:354
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:355
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:356
last reward:-1
last command

take red hot pepper 
take 

train step:382
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
examine red hot pepper 
examine 

train step:383
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
examine red hot pepper 
examine 

train step:384
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
take red hot pepper 
take 

train step:385
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook red potato with BBQ', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', '

go south 
go 

train step:411
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go north 
go 

train step:412
last reward:-1
last command:  go north
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:413
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:414
last reward:-1
last command:  open barn door


take red potato 
take 

train step:440
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red potato 
take 

train step:441
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red hot pepper 
take 

train step:442
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red potato 
take 

train step:443
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']


open barn door 
open 

train step:469
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:470
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:471
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
look 
look 

train step:472
last reward:-1
last command

close barn door 
close 

train step:499
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:500
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'go east', 'go north', 'go south', 'go west', 'look']
go south 
go 

train step:501
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go north 
go 

train step:502
last reward:-1
last command:  go north
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook y

close barn door 
close 

train step:527
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:528
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'go east', 'go north', 'go south', 'go west', 'look']
go south 
go 

train step:529
last reward:-1
last command:  go south
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
go north 
go 

train step:530
last reward:-1
last command:  go north
last failed command:  cook yellow potato with BBQ
5
-------- LOST GAME ----------
 
.['d

open barn door 
open 

train step:557
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:558
last reward:-1
last command:  close barn door
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', 'go south', 'look']
open barn door 
open 

train step:559
last reward:-1
last command:  open barn door
last failed command:  cook yellow potato with BBQ
5
['close sliding patio door', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go north', 'go south', 'look', 'open barn door']
close barn door 
close 

train step:560
last reward:-1
last command

take red hot pepper 
take 

train step:586
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
examine red hot pepper 
examine 

train step:587
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
examine red hot pepper 
examine 

train step:588
last reward:-1
last command:  examine red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'go south', 'look', 'take red hot pepper']
take red hot pepper 
take 

train step:589
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['close barn door', 'close sliding patio door', 'cook red potato with BBQ', 'cook yellow potato with BBQ', 'examine BBQ', 'examine patio chair', 'examine patio table', 'go east', 'go north', '

take red hot pepper 
take 

train step:614
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red potato 
take 

train step:615
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red potato 
take 

train step:616
last reward:-1
last command:  take red potato
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red potato']
take red hot pepper 
take 

train step:617
last reward:-1
last command:  take red hot pepper
last failed command:  cook yellow potato with BBQ
5
['examine red hot pepper', 'examine red potato', 'go south', 'look', 'take red hot pepper', 'take red p

In [68]:
for i in range(len(agent.cmdvalues)):
    print( agent.tok_to_text(agent.cmdtoks[i]) + " : " + str(agent.cmdvalues[i]))

chop  : -5
close  : -22
cook  : -22
dice  : -4
drink  : 0
drop  : -19
eat  : -18
examine  : -23
go  : 0
insert  : -8
inventory  : 0
lock  : 0
look  : -22
open  : -17
prepare  : 93
put  : -19
slice  : -6
take  : -15
unlock  : 0


In [13]:
# versuche
wordids = agent._tokenize("cook a meal")
print(wordids)

print(agent._get_word_id("cook"))
print(agent._get_word_by_id(4664))

[4664, 785, 11449]
4664
cook


In [70]:
def play_render(agent,gamefile):
    requested_infos = agent.select_additional_infos
    env_id = textworld.gym.register_games([gamefile], requested_infos)

    env = gym.make(env_id)
    obs, infos = env.reset()

    env.render()  # Print the initial observation.

    score = 0
    done = False
    while not done:
        command = agent.act(obs,score,done,infos)
        ob, score, done, infos = env.step(command)
        env.render()

In [71]:
agent.test()
play_render(agent, gamefiles[5])




                    ________  ________  __    __  ________
                   |        \|        \|  \  |  \|        \
                    \$$$$$$$$| $$$$$$$$| $$  | $$ \$$$$$$$$
                      | $$   | $$__     \$$\/  $$   | $$
                      | $$   | $$  \     >$$  $$    | $$
                      | $$   | $$$$$    /  $$$$\    | $$
                      | $$   | $$_____ |  $$ \$$\   | $$
                      | $$   | $$     \| $$  | $$   | $$
                       \$$    \$$$$$$$$ \$$   \$$    \$$
              __       __   ______   _______   __        _______
             |  \  _  |  \ /      \ |       \ |  \      |       \
             | $$ / \ | $$|  $$$$$$\| $$$$$$$\| $$      | $$$$$$$\
             | $$/  $\| $$| $$  | $$| $$__| $$| $$      | $$  | $$
             | $$  $$$\ $$| $$  | $$| $$    $$| $$      | $$  | $$
             | $$ $$\$$\$$| $$  | $$| $$$$$$$\| $$      | $$  | $$
             | $$$$  \$$$$| $$__/ $$| $$  | $$| $$_____ | $$__/ $$
          