In [38]:
import os
import re
import random
from typing import List, Dict, Any, Optional
from collections import defaultdict
from time import time
from glob import glob
import numpy as np
import gym
from textworld import EnvInfos
import textworld.gym

#import torch

#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
gamefiles = []
GAMES_PATH = "./sample_games/"
for file in os.listdir(GAMES_PATH):
    if file.endswith('.ulx'):
        gamefiles.append(GAMES_PATH + file)

In [3]:
def play(agent, path, max_step=50, nb_episodes=10, verbose=True):
    request_infos = agent.select_additional_infos
    request_infos.max_score = True  # Needed to normalize the scores.

    gamefiles = [path]
    if os.path.isdir(path):
        gamefiles = glob(os.path.join(path, "*.ulx"))

    env_id = textworld.gym.register_games(gamefiles,
                                          request_infos=request_infos,
                                          max_episode_steps=max_step)
    env = gym.make(env_id)  # Create a Gym environment to play the text game.
    if verbose:
        if os.path.isdir(path):
            print(os.path.dirname(path), end="")
        else:
            print(os.path.basename(path), end="")

    # Collect some statistics: nb_steps, final reward.
    avg_moves, avg_scores, avg_norm_scores = [], [], []
    for no_episode in range(nb_episodes):
        obs, infos = env.reset()  # Start new episode.

        score = 0
        done = False
        nb_moves = 0
        while not done:
            command = agent.act(obs, score, done, infos)
            obs, score, done, infos = env.step(command)
            nb_moves += 1

        # Let the agent know the game is done.
        agent.act(obs, score, done, infos)

        if verbose:
            print(".", end="")
        avg_moves.append(nb_moves)
        avg_scores.append(score)
        avg_norm_scores.append(score / infos["max_score"])

    env.close()
    msg = "  \tavg. steps: {:5.1f}; avg. score: {:4.1f} / {}."
    if verbose:
        if os.path.isdir(path):
            print(msg.format(np.mean(avg_moves), np.mean(avg_norm_scores), 1))
        else:
            print(msg.format(np.mean(avg_moves), np.mean(
                avg_scores), infos["max_score"]))

In [74]:
class CustomAgent():

    def __init__(self) -> None:
        self._initialized = False
        self._epsiode_has_started = False
        self.mode = "test"
        
        # read in vocab.txt and map to id
        with open("./vocab.txt") as f:
            self.word_vocab = f.read().split("\n")
        self.word2id = {}
        self.id2word = []
        for i, w in enumerate(self.word_vocab):
            self.word2id[w] = i
            self.id2word.append(w)
        
        self.qvalues = [0] * len(self.id2word)

    def train(self) -> None:
        self.mode = "train"
        self.transitions = []
        self.last_score = 0
        self.no_train_step = 0
        self.moves = 0

    def test(self) -> None:
        self.mode = "test"

    @property
    def select_additional_infos(self) -> EnvInfos:
        """
        Returns what additional information should be made available at each game step.

        Requested information will be included within the `infos` dictionary
        passed to `CustomAgent.act()`. To request specific information, create a
        :py:class:`textworld.EnvInfos <textworld.envs.wrappers.filter.EnvInfos>`
        and set the appropriate attributes to `True`. The possible choices are:

        * `description`: text description of the current room, i.e. output of the `look` command;
        * `inventory`: text listing of the player's inventory, i.e. output of the `inventory` command;
        * `max_score`: maximum reachable score of the game;
        * `objective`: objective of the game described in text;
        * `entities`: names of all entities in the game;
        * `verbs`: verbs understood by the the game;
        * `command_templates`: templates for commands understood by the the game;
        * `admissible_commands`: all commands relevant to the current state;

        In addition to the standard information, game specific information
        can be requested by appending corresponding strings to the `extras`
        attribute. For this competition, the possible extras are:

        * `'recipe'`: description of the cookbook;
        * `'walkthrough'`: one possible solution to the game (not guaranteed to be optimal);

        Example:
            Here is an example of how to request information and retrieve it.

            >>> from textworld import EnvInfos
            >>> request_infos = EnvInfos(description=True, inventory=True, extras=["recipe"])
            ...
            >>> env = gym.make(env_id)
            >>> ob, infos = env.reset()
            >>> print(infos["description"])
            >>> print(infos["inventory"])
            >>> print(infos["extra.recipe"])

            Handicap is defined as follows
                max_score, has_won, has_lost,               # Handicap 0
                description, inventory, verbs, objective,   # Handicap 1
                command_templates,                          # Handicap 2
                entities,                                   # Handicap 3
                extras=["recipe"],                          # Handicap 4
                admissible_commands,                        # Handicap 5
        """
        return EnvInfos(description=True, inventory=True, max_score = True, 
                        admissible_commands=True, has_won=True, has_lost=True)

    def _init(self) -> None:
        """ Initialize the agent. """
        self._initialized = True

        # [You can insert code here.]
    
    # look up id of a word in dictionary & add word to dictionaries if unknown
    def _get_word_id(self, word):
        if word not in self.word2id:        
            self.id2word.append(word)
            self.qvalues.append(0)
            self.word2id[word] = len(self.word2id)
        return self.word2id[word]

    # look up word by id in dictionary
    # returns None, if id does not exist
    def _get_word_by_id(self, id):
        if id < len(self.id2word):
            return self.id2word[id]
        else:
            return None
    
    def _get_qvalue(self, id):
        if id < len(self.qvalues):
            return self.qvalues[id]
        else:
            return None
    
    def _tokenize(self, text):
        # Simple tokenizer: strip out all non-alphabetic characters.
        text = re.sub("[^a-zA-Z0-9\- ]", " ", text)
        word_ids = list(map(self._get_word_id, text.split()))
        return word_ids
    
    def _start_episode(self, obs: List[str], infos: Dict[str, List[Any]]) -> None:
        """
        Prepare the agent for the upcoming episode.
        Arguments:
            obs: Initial feedback for each game.
            infos: Additional information for each game.
        """
        if not self._initialized:
            self._init()

        self._epsiode_has_started = True

        # [You can insert code here.]

    def _end_episode(self, obs: List[str], scores: List[int], infos: Dict[str, List[Any]]) -> None:
        """
        Tell the agent the episode has terminated.
        Arguments:
            obs: Previous command's feedback for each game.
            score: The score obtained so far for each game.
            infos: Additional information for each game.
        """
        self._epsiode_has_started = False

        # [You can insert code here.]
    
    def update_qvalues(self, command : List[int],reward):
        for id in command:
            self.qvalues[id] += reward
    
    def calc_avg_qvalue(self, command) -> int:
        sum = 0
        for id in command:
            sum += self._get_qvalue(id)
        sum = sum/ len(command)
        return sum

    def choose_best_cmd(self,commands) -> int:
        avg_qvalues =  []
        for cmd in commands:
            avg_qvalues.append(self.calc_avg_qvalue(cmd))
        max_value = max(avg_qvalues)
        max_commands = []
        for i in range(len(commands)):
            if(max_value == avg_qvalues[i]):
                max_commands.append(i)
        return random.choice(max_commands)
        
                        
    def act(self, obs: str, score: int, done: bool, 
            infos: Dict[str, List[Any]]) -> Optional[List[str]]:
        """
        Acts upon the current list of observations.

        One text command must be returned for each observation.

        Arguments:
            obs: Previous command's feedback for each game.
            scores: The score obtained so far for each game.
            dones: Whether a game is finished.
            infos: Additional information for each game.

        Returns:
            Text commands to be performed (one per observation).
            If episode had ended (e.g. `all(dones)`), the returned
            value is ignored.

        Notes:
            Commands returned for games marked as `done` have no effect.
            The states for finished games are simply copy over until all
            games are done.
        """
        #if all(dones):
        #   self._end_episode(obs, scores, infos)
        #    return  # Nothing to return.

        if not self._epsiode_has_started:
            self._start_episode(obs, infos)
        
        #choose best command based on "qvalues"
        cmds = []
        for i in range(len(infos["admissible_commands"])):
            cmds.append(self._tokenize(infos["admissible_commands"][i]))
        
        best_index = self.choose_best_cmd(cmds)
    
        action = infos["admissible_commands"][best_index]
            
        if self.mode == "test":
            return action
        
        #train mode
        self.no_train_step += 1 
        reward = 0
        self.moves += 1
        
        #calculate rewards
        if self.transitions:
            reward = score - self.last_score
            self.last_score = score
            if infos["has_won"]:
                reward += 10
            if infos["has_lost"]:
                reward -= 10
            if reward == 0:
                reward -= 1
            self.update_qvalues(self.transitions[-1],reward)

        # Debug output
        if(self.no_train_step % 1 == 0):
            print("")
            print("train step:" + str(self.no_train_step))
            print("last reward:" + str(reward))
            command = "last command: "
            if self.transitions:
                for id in self.transitions[-1]:
                    command += " " + self._get_word_by_id(id)
                print(command)
        
        #save last command
        self.transitions.append(cmds[best_index])
        
        if done:
            if(infos["has_won"]):
                print("-------- WON GAME ----------")
            else:
                print("-------- LOST GAME ----------")
            print(" ")
            self.last_score = 0
            self.moves = 0
            self.transitions = []
            
        return action

In [75]:
agent = CustomAgent()
agent.train()
starttime = time()
for i in range(5):
    print("-------------------------------------")
    print("GAME: " + str(i+1))
    print("-------------------------------------")
    play(agent, gamefiles[5])
print("Trained in {:.2f} secs".format(time() - starttime))


-------------------------------------
GAME: 1
-------------------------------------
tw-cooking-recipe1+take1-11Oeig8bSVdGSp78.ulx

  result = entry_point.load(False)



train step:1
last reward:0

train step:2
last reward:-1
last command:  examine yellow bell pepper

train step:3
last reward:-1
last command:  examine table

train step:4
last reward:-1
last command:  look

train step:5
last reward:-1
last command:  take red apple from counter

train step:6
last reward:-1
last command:  take knife from counter

train step:7
last reward:-1
last command:  close fridge

train step:8
last reward:-1
last command:  open fridge

train step:9
last reward:-1
last command:  put knife on stove

train step:10
last reward:-1
last command:  put red apple on stove

train step:11
last reward:-1
last command:  examine red hot pepper

train step:12
last reward:-1
last command:  examine cookbook

train step:13
last reward:-1
last command:  look

train step:14
last reward:-1
last command:  look

train step:15
last reward:-1
last command:  close fridge

train step:16
last reward:-1
last command:  open fridge

train step:17
last reward:-1
last command:  take cookbook from t

.
train step:128
last reward:0

train step:129
last reward:1
last command:  take red hot pepper from counter

train step:130
last reward:1
last command:  prepare meal

train step:131
last reward:11
last command:  eat meal
-------- WON GAME ----------
 
.
train step:132
last reward:0

train step:133
last reward:1
last command:  take red hot pepper from counter

train step:134
last reward:1
last command:  prepare meal

train step:135
last reward:11
last command:  eat meal
-------- WON GAME ----------
 
.
train step:136
last reward:0

train step:137
last reward:1
last command:  take red hot pepper from counter

train step:138
last reward:1
last command:  prepare meal

train step:139
last reward:11
last command:  eat meal
-------- WON GAME ----------
 
.
train step:140
last reward:0

train step:141
last reward:1
last command:  take red hot pepper from counter

train step:142
last reward:1
last command:  prepare meal

train step:143
last reward:11
last command:  eat meal
-------- WON GAME -

In [77]:
agent.test()
play(agent, gamefiles[5])

  result = entry_point.load(False)


tw-cooking-recipe1+take1-11Oeig8bSVdGSp78.ulx..........  	avg. steps:   3.0; avg. score:  3.0 / 3.


In [72]:
for i in range(len(agent.qvalues)):
    if(agent.qvalues[i] != 0 ):
        print(agent._get_word_by_id(i) + " : " + str(agent.qvalues[i]))

apple : -4
bell : -6
close : -3
cook : -3
cookbook : -3
counter : 46
drop : -2
eat : 550
examine : -7
fridge : -7
from : 44
hot : 49
insert : -1
into : -1
knife : -5
look : -5
meal : 600
on : -3
open : -3
oven : -4
pepper : 43
prepare : 50
put : -3
red : 45
stove : -4
table : -3
take : 44
with : -3
yellow : -6


In [69]:
agent.train()
starttime = time()
for i in range(5):
    print("-------------------------------------")
    print("GAME: " + str(i+1))
    print("-------------------------------------")
    play(agent, gamefiles[8])
print("Trained in {:.2f} secs".format(time() - starttime))

-------------------------------------
GAME: 1
-------------------------------------
tw-cooking-recipe2+take2+cut+open-BnYEixa9iJKmFZxO.ulx
train step:1
last reward:0

train step:2
last reward:-1
last command:  take red hot pepper from counter

train step:3
last reward:-1
last command:  eat red hot pepper

train step:4
last reward:-1
last command:  take red potato from counter

train step:5
last reward:-1
last command:  take cookbook from counter

train step:6
last reward:-1
last command:  take purple potato from counter

train step:7
last reward:1
last command:  take yellow potato from counter

train step:8
last reward:-10
last command:  eat yellow potato
-------- LOST GAME ----------
 
.
train step:9
last reward:0

train step:10
last reward:-1
last command:  take red hot pepper from counter

train step:11
last reward:-1
last command:  eat red hot pepper

train step:12
last reward:-1
last command:  take red potato from counter

train step:13
last reward:-1
last command:  take cookbook 


train step:129
last reward:-1
last command:  prepare meal

train step:130
last reward:-1
last command:  prepare meal

train step:131
last reward:-1
last command:  prepare meal

train step:132
last reward:-1
last command:  prepare meal

train step:133
last reward:-1
last command:  prepare meal

train step:134
last reward:-1
last command:  prepare meal

train step:135
last reward:-1
last command:  prepare meal

train step:136
last reward:-1
last command:  prepare meal

train step:137
last reward:-1
last command:  prepare meal

train step:138
last reward:-1
last command:  prepare meal

train step:139
last reward:-1
last command:  prepare meal

train step:140
last reward:-1
last command:  prepare meal

train step:141
last reward:-1
last command:  prepare meal

train step:142
last reward:-1
last command:  prepare meal

train step:143
last reward:-1
last command:  prepare meal

train step:144
last reward:-1
last command:  prepare meal

train step:145
last reward:-1
last command:  prepare me


train step:263
last reward:-1
last command:  take knife from table

train step:264
last reward:-1
last command:  look

train step:265
last reward:-1
last command:  put cookbook on stove

train step:266
last reward:-1
last command:  take cookbook from stove

train step:267
last reward:-1
last command:  drop knife

train step:268
last reward:-1
last command:  open fridge

train step:269
last reward:-1
last command:  take orange bell pepper from fridge

train step:270
last reward:-1
last command:  eat orange bell pepper

train step:271
last reward:-1
last command:  take cilantro from fridge

train step:272
last reward:-1
last command:  eat cilantro

train step:273
last reward:-1
last command:  insert cookbook into fridge

train step:274
last reward:1
last command:  take yellow bell pepper from fridge

train step:275
last reward:-1
last command:  prepare meal

train step:276
last reward:-1
last command:  prepare meal

train step:277
last reward:-1
last command:  prepare meal

train step:2


train step:396
last reward:-1
last command:  open fridge

train step:397
last reward:1
last command:  take yellow bell pepper from fridge

train step:398
last reward:-1
last command:  prepare meal

train step:399
last reward:-1
last command:  prepare meal

train step:400
last reward:-1
last command:  prepare meal

train step:401
last reward:-1
last command:  prepare meal

train step:402
last reward:-1
last command:  prepare meal

train step:403
last reward:-1
last command:  prepare meal

train step:404
last reward:-1
last command:  prepare meal

train step:405
last reward:-1
last command:  prepare meal

train step:406
last reward:-1
last command:  prepare meal

train step:407
last reward:-1
last command:  prepare meal
-------- LOST GAME ----------
 
.
train step:408
last reward:0

train step:409
last reward:-1
last command:  examine red hot pepper

train step:410
last reward:-1
last command:  take red hot pepper from counter

train step:411
last reward:-1
last command:  eat red hot pe


train step:524
last reward:-1
last command:  look

train step:525
last reward:-1
last command:  open fridge

train step:526
last reward:-1
last command:  examine cilantro

train step:527
last reward:-1
last command:  examine orange bell pepper

train step:528
last reward:-1
last command:  close fridge

train step:529
last reward:-1
last command:  take cookbook from counter

train step:530
last reward:-1
last command:  put cookbook on table

train step:531
last reward:-1
last command:  look

train step:532
last reward:-1
last command:  take knife

train step:533
last reward:-1
last command:  drop knife

train step:534
last reward:-1
last command:  examine oven

train step:535
last reward:-1
last command:  look

train step:536
last reward:-1
last command:  take cookbook from table

train step:537
last reward:-1
last command:  put cookbook on stove

train step:538
last reward:-1
last command:  open fridge

train step:539
last reward:-1
last command:  examine orange bell pepper

train ste


train step:659
last reward:-1
last command:  close fridge

train step:660
last reward:-1
last command:  take cookbook from table

train step:661
last reward:-1
last command:  put cookbook on table

train step:662
last reward:-1
last command:  look
-------- LOST GAME ----------
 
.
train step:663
last reward:0

train step:664
last reward:-1
last command:  take red hot pepper from counter

train step:665
last reward:-1
last command:  eat red hot pepper

train step:666
last reward:-1
last command:  take knife from table

train step:667
last reward:-1
last command:  drop knife

train step:668
last reward:-1
last command:  take purple potato from counter

train step:669
last reward:-1
last command:  cook purple potato with oven

train step:670
last reward:-1
last command:  eat purple potato

train step:671
last reward:-1
last command:  open fridge

train step:672
last reward:-1
last command:  examine orange bell pepper

train step:673
last reward:-1
last command:  examine cilantro

train s

.
train step:789
last reward:0

train step:790
last reward:-1
last command:  take red hot pepper from counter

train step:791
last reward:-1
last command:  eat red hot pepper

train step:792
last reward:-1
last command:  take knife from table

train step:793
last reward:-1
last command:  drop knife

train step:794
last reward:-1
last command:  take purple potato from counter

train step:795
last reward:-1
last command:  cook purple potato with oven

train step:796
last reward:-1
last command:  eat purple potato

train step:797
last reward:-1
last command:  look

train step:798
last reward:-1
last command:  look

train step:799
last reward:-1
last command:  open fridge

train step:800
last reward:-1
last command:  examine cilantro

train step:801
last reward:-1
last command:  examine yellow bell pepper

train step:802
last reward:-1
last command:  close fridge

train step:803
last reward:-1
last command:  look

train step:804
last reward:-1
last command:  examine oven

train step:805
la


train step:919
last reward:-1
last command:  look

train step:920
last reward:-1
last command:  take cookbook from stove

train step:921
last reward:-1
last command:  put cookbook on table

train step:922
last reward:-1
last command:  examine oven

train step:923
last reward:-1
last command:  open fridge

train step:924
last reward:-1
last command:  examine orange bell pepper

train step:925
last reward:-1
last command:  examine orange bell pepper

train step:926
last reward:-1
last command:  close fridge

train step:927
last reward:-1
last command:  look

train step:928
last reward:1
last command:  take yellow potato from counter
-------- LOST GAME ----------
 
.
train step:929
last reward:0

train step:930
last reward:-1
last command:  take red hot pepper from counter

train step:931
last reward:-1
last command:  eat red hot pepper

train step:932
last reward:-1
last command:  take knife from table

train step:933
last reward:-1
last command:  drop knife

train step:934
last reward:


train step:1051
last reward:-1
last command:  examine orange bell pepper

train step:1052
last reward:-1
last command:  examine cilantro

train step:1053
last reward:-1
last command:  close fridge

train step:1054
last reward:-1
last command:  take cookbook from stove

train step:1055
last reward:-1
last command:  put cookbook on table

train step:1056
last reward:-1
last command:  look

train step:1057
last reward:-1
last command:  take knife

train step:1058
last reward:-1
last command:  drop knife

train step:1059
last reward:-1
last command:  examine oven

train step:1060
last reward:-1
last command:  look

train step:1061
last reward:-1
last command:  open fridge

train step:1062
last reward:-1
last command:  examine orange bell pepper

train step:1063
last reward:-1
last command:  examine orange bell pepper

train step:1064
last reward:-1
last command:  close fridge

train step:1065
last reward:-1
last command:  take cookbook from table

train step:1066
last reward:-1
last comma


train step:1178
last reward:-1
last command:  put cookbook on stove

train step:1179
last reward:-1
last command:  look

train step:1180
last reward:-1
last command:  take cookbook from stove

train step:1181
last reward:-1
last command:  put cookbook on counter

train step:1182
last reward:-1
last command:  open fridge

train step:1183
last reward:-1
last command:  examine cilantro

train step:1184
last reward:-1
last command:  examine orange bell pepper

train step:1185
last reward:-1
last command:  close fridge

train step:1186
last reward:-1
last command:  look

train step:1187
last reward:-1
last command:  examine oven

train step:1188
last reward:-1
last command:  take knife

train step:1189
last reward:-1
last command:  drop knife

train step:1190
last reward:-1
last command:  open fridge

train step:1191
last reward:-1
last command:  examine orange bell pepper

train step:1192
last reward:-1
last command:  close fridge

train step:1193
last reward:-1
last command:  look

train


train step:1304
last reward:-1
last command:  close fridge

train step:1305
last reward:-1
last command:  look

train step:1306
last reward:-1
last command:  look

train step:1307
last reward:-1
last command:  take cookbook from counter

train step:1308
last reward:-1
last command:  put cookbook on stove

train step:1309
last reward:-1
last command:  open fridge

train step:1310
last reward:-1
last command:  examine cilantro

train step:1311
last reward:-1
last command:  examine orange bell pepper

train step:1312
last reward:-1
last command:  close fridge

train step:1313
last reward:-1
last command:  look

train step:1314
last reward:-1
last command:  take cookbook from stove

train step:1315
last reward:-1
last command:  put cookbook on table

train step:1316
last reward:-1
last command:  examine oven

train step:1317
last reward:-1
last command:  take red potato from counter

train step:1318
last reward:-1
last command:  cook red potato with oven

train step:1319
last reward:-1
la


train step:1433
last reward:1
last command:  take yellow potato from counter

train step:1434
last reward:-10
last command:  eat yellow potato
-------- LOST GAME ----------
 
.
train step:1435
last reward:0

train step:1436
last reward:-1
last command:  take red hot pepper from counter

train step:1437
last reward:-1
last command:  eat red hot pepper

train step:1438
last reward:-1
last command:  take knife from table

train step:1439
last reward:-1
last command:  drop knife

train step:1440
last reward:-1
last command:  take purple potato from counter

train step:1441
last reward:-1
last command:  cook purple potato with oven

train step:1442
last reward:-1
last command:  eat purple potato

train step:1443
last reward:-1
last command:  open fridge

train step:1444
last reward:-1
last command:  examine cilantro

train step:1445
last reward:-1
last command:  close fridge

train step:1446
last reward:-1
last command:  look

train step:1447
last reward:-1
last command:  examine oven

tra


train step:1559
last reward:-1
last command:  cook purple potato with oven

train step:1560
last reward:-1
last command:  eat purple potato

train step:1561
last reward:-1
last command:  open fridge

train step:1562
last reward:-1
last command:  examine cilantro

train step:1563
last reward:-1
last command:  examine orange bell pepper

train step:1564
last reward:-1
last command:  close fridge

train step:1565
last reward:-1
last command:  look

train step:1566
last reward:-1
last command:  look

train step:1567
last reward:-1
last command:  open fridge

train step:1568
last reward:-1
last command:  examine cilantro

train step:1569
last reward:-1
last command:  examine orange bell pepper

train step:1570
last reward:-1
last command:  close fridge

train step:1571
last reward:-1
last command:  take cookbook from counter

train step:1572
last reward:-1
last command:  put cookbook on stove

train step:1573
last reward:-1
last command:  take cookbook from stove

train step:1574
last rewa


train step:1688
last reward:-1
last command:  examine oven

train step:1689
last reward:-1
last command:  look

train step:1690
last reward:-1
last command:  take red potato from counter

train step:1691
last reward:-1
last command:  cook red potato with oven

train step:1692
last reward:-1
last command:  eat red potato

train step:1693
last reward:-1
last command:  open fridge

train step:1694
last reward:-1
last command:  examine orange bell pepper

train step:1695
last reward:-1
last command:  examine cilantro

train step:1696
last reward:-1
last command:  examine orange bell pepper

train step:1697
last reward:-1
last command:  close fridge

train step:1698
last reward:-1
last command:  take cookbook from stove

train step:1699
last reward:-1
last command:  put cookbook on table

train step:1700
last reward:-1
last command:  look

train step:1701
last reward:-1
last command:  take knife

train step:1702
last reward:-1
last command:  drop knife

train step:1703
last reward:-1
last 


train step:1812
last reward:-1
last command:  examine orange bell pepper

train step:1813
last reward:-1
last command:  close fridge

train step:1814
last reward:-1
last command:  look

train step:1815
last reward:-1
last command:  open fridge

train step:1816
last reward:-1
last command:  examine orange bell pepper

train step:1817
last reward:-1
last command:  close fridge

train step:1818
last reward:-1
last command:  take cookbook from counter

train step:1819
last reward:-1
last command:  put cookbook on stove

train step:1820
last reward:-1
last command:  look

train step:1821
last reward:-1
last command:  take knife

train step:1822
last reward:-1
last command:  drop knife

train step:1823
last reward:-1
last command:  look

train step:1824
last reward:-1
last command:  take cookbook from stove

train step:1825
last reward:-1
last command:  put cookbook on table

train step:1826
last reward:-1
last command:  open fridge

train step:1827
last reward:-1
last command:  examine cil


train step:1942
last reward:-1
last command:  put cookbook on stove

train step:1943
last reward:-1
last command:  look

train step:1944
last reward:-1
last command:  take cookbook from stove

train step:1945
last reward:-1
last command:  put cookbook on table

train step:1946
last reward:-1
last command:  open fridge

train step:1947
last reward:-1
last command:  examine orange bell pepper

train step:1948
last reward:-1
last command:  examine cilantro

train step:1949
last reward:-1
last command:  close fridge

train step:1950
last reward:-1
last command:  look

train step:1951
last reward:-1
last command:  take cookbook from table

train step:1952
last reward:-1
last command:  put cookbook on stove

train step:1953
last reward:-1
last command:  examine oven

train step:1954
last reward:1
last command:  take yellow potato from counter

train step:1955
last reward:-10
last command:  eat yellow potato
-------- LOST GAME ----------
 
.
train step:1956
last reward:0

train step:1957
las


train step:2070
last reward:-1
last command:  examine cilantro

train step:2071
last reward:-1
last command:  close fridge

train step:2072
last reward:-1
last command:  look

train step:2073
last reward:-1
last command:  open fridge

train step:2074
last reward:-1
last command:  examine orange bell pepper

train step:2075
last reward:-1
last command:  examine yellow bell pepper

train step:2076
last reward:-1
last command:  close fridge

train step:2077
last reward:-1
last command:  look

train step:2078
last reward:1
last command:  take yellow potato from counter

train step:2079
last reward:-10
last command:  eat yellow potato
-------- LOST GAME ----------
 
.
train step:2080
last reward:0

train step:2081
last reward:-1
last command:  take red hot pepper from counter

train step:2082
last reward:-1
last command:  eat red hot pepper

train step:2083
last reward:-1
last command:  take knife from table

train step:2084
last reward:-1
last command:  drop knife

train step:2085
last re

In [44]:
# versuche
wordids = agent._tokenize("cook a meal")
print(wordids)

print(agent._get_word_id("cook"))
print(agent._get_word_by_id(4664))

[4664, 785, 11449]
4664
cook


In [78]:
def play_render(agent,gamefile):
    requested_infos = agent.select_additional_infos
    env_id = textworld.gym.register_games([gamefile], requested_infos)

    env = gym.make(env_id)
    obs, infos = env.reset()

    env.render()  # Print the initial observation.

    score = 0
    done = False
    while not done:
        command = agent.act(obs,score,done,infos)
        ob, score, done, infos = env.step(command)
        env.render()

In [79]:
agent.test()
play_render(agent, gamefiles[5])




                    ________  ________  __    __  ________
                   |        \|        \|  \  |  \|        \
                    \$$$$$$$$| $$$$$$$$| $$  | $$ \$$$$$$$$
                      | $$   | $$__     \$$\/  $$   | $$
                      | $$   | $$  \     >$$  $$    | $$
                      | $$   | $$$$$    /  $$$$\    | $$
                      | $$   | $$_____ |  $$ \$$\   | $$
                      | $$   | $$     \| $$  | $$   | $$
                       \$$    \$$$$$$$$ \$$   \$$    \$$
              __       __   ______   _______   __        _______
             |  \  _  |  \ /      \ |       \ |  \      |       \
             | $$ / \ | $$|  $$$$$$\| $$$$$$$\| $$      | $$$$$$$\
             | $$/  $\| $$| $$  | $$| $$__| $$| $$      | $$  | $$
             | $$  $$$\ $$| $$  | $$| $$    $$| $$      | $$  | $$
             | $$ $$\$$\$$| $$  | $$| $$$$$$$\| $$      | $$  | $$
             | $$$$  \$$$$| $$__/ $$| $$  | $$| $$_____ | $$__/ $$
          