In [1]:
import gym
import time
import tqdm
import textworld.gym

import numpy as np

from glob import glob
from pprint import pprint
from textworld import EnvInfos
from numpy.random import RandomState

from os.path import join as pjoin

In [2]:
GAMES_PATH = "sample_games"  # This assumes `sample_games.zip` was first unzipped.
gamefiles = glob(pjoin(GAMES_PATH, "*.ulx"))
print("Found {} games.".format(len(gamefiles)))

Found 10 games.


In [3]:
class RandomAgent:
    def __init__(self):
        self.rng = RandomState(42)
        
    def act(self, obs, scores, dones, infos):
        return [self.rng.choice(admissible_commands) for admissible_commands in infos["admissible_commands"]]
    
    def reset(self, env):
        pass

In [4]:
def flatten(nested_list):
    return [item for sublist in nested_list for item in sublist]

In [5]:
NB_EPISODES = 10
BATCH_SIZE = 10
gamefile = gamefiles[8]  # Pick a game.

requested_infos = EnvInfos(
    max_score=True,
    has_won=True,
    has_lost=True,
    admissible_commands=True,
)
env_id = textworld.gym.register_games([gamefile], requested_infos)
env_id = textworld.gym.make_batch(env_id, batch_size=BATCH_SIZE, parallel=True)

agent = RandomAgent()
env = gym.make(env_id)
agent.reset(env)

In [6]:
stats = {}
start_time = time.time()
for no_episode in tqdm.tqdm(range(NB_EPISODES)):
    obs, infos = env.reset()
        
    scores = [0] * BATCH_SIZE
    dones = [False] * BATCH_SIZE
    steps = [0] * BATCH_SIZE
    while not all(dones):
        steps = [step + int(not done) for step, done in zip(steps, dones)]
        commands = agent.act(obs, scores, dones, infos)
        obs, scores, dones, infos = env.step(commands)
        
    # Collect stats
    stats[no_episode] = {}
    stats[no_episode]["score"] = scores
    stats[no_episode]["steps"] = steps
    stats[no_episode]["has_won"] = infos["has_won"]
    stats[no_episode]["has_lost"] = infos["has_lost"]
    
elapsed = time.time() - start_time
env.close()
pprint(elapsed)

100%|██████████| 10/10 [00:13<00:00,  1.28s/it]

13.156073331832886





In [7]:
# Max possible score for the episode
pprint(infos["max_score"][0])

3


In [8]:
# Max score achieved by the agent
pprint(np.max([stats[no_episode]["score"] for no_episode in range(NB_EPISODES)]))

3


In [9]:
# Average score per episode
pprint(np.sum([stats[no_episode]["score"] for no_episode in range(NB_EPISODES)]) / (NB_EPISODES * BATCH_SIZE))

0.97


In [10]:
# Average number of steps per episode
pprint(np.sum([stats[no_episode]["steps"] for no_episode in range(NB_EPISODES)]) / (NB_EPISODES * BATCH_SIZE))

35.56


In [11]:
# The number of episodes won by the agent, out of NB_EPISODES * BATCH_SIZE 
pprint(np.sum([stats[no_episode]["has_won"] for no_episode in range(NB_EPISODES)]))

2
