In [1]:
import utils
import numpy as np
import copy
from rtfm import featurizer as X

In [2]:
class TrueSimulator():
    def __init__(self, env, featurizer=None):
        self.env = env
        self.action_space = len(gym_env.action_space)
        self.featurizer = featurizer
        
    def reset(self):
        frame = self.env.reset()
        valid_moves = frame['valid'].numpy().astype(bool) # boolean mask of shape (action_space)
        actions = np.arange(self.action_space)
        valid_actions = actions[valid_moves]
        return valid_actions
    
    def step(self, action):
        frame, reward, done, _ = self.env.step(int(action))
        valid_moves = frame['valid'].numpy().astype(bool) # boolean mask of shape (action_space)
        actions = np.arange(self.action_space)
        valid_actions = actions[valid_moves]
        return valid_actions, reward, done
    
    def render(self):
        self.featurizer.featurize(self.env)

In [3]:
### Define parameters ###
ucb_C = 1.0
discount = 0.997
episode_length = 100
max_actions = 100
num_simulations = 10 

flags = utils.Flags()
gym_env = utils.create_env(flags)
#gym_env = utils.create_env(flags, featurizer=X.Concat([X.Text(), X.ValidMoves(), X.Render()]))
featurizer = X.Render()
game_simulator = TrueSimulator(gym_env, featurizer)

In [4]:
game_simulator.render()


██████
█?   █
█ @  █
█y  n█
█ !  █
██████


## Time profiling

In [None]:
%%time
e = copy.deepcopy(gym_env)

In [None]:
%%time
m = copy.deepcopy(gym_env.world.map)

In [None]:
%%time
w = copy.deepcopy(gym_env.world)

In [None]:
%%time
_ = gym_env.step(0)

In [None]:
%%time
c = copy.deepcopy(gym_env.configs)

In [None]:
%%time
_ = gym_env.reset()

In [None]:
%%time
_ = utils.create_env(flags)

We want to be able to store just some internal info of the simulator in order to be able to take a step and then roll back to a generic previous state.

My hypothesis is that the only thing which is time consuming to create or to copy is the 'configs' attribute, which is something like a list with millions of possible configurations of monsters, attributes, items and teams. However there is no need to deepcopy it.

In [None]:
#%%time
# deepcopy everything except 'configs'
#d = {}
#for k in gym_env.__dict__.keys():
#    if k != 'configs':
#        d[k] = copy.deepcopy(gym_env.__dict__[k])

In [5]:
d = gym_env.save_state_dict()

In [6]:
d

{'target_monster': Monster(cold bat, hp=20/20, dmg=106, ac=-2),
 'target_group': 'order of the forest',
 'distractor_monster': Monster(fire ghost, hp=20/20, dmg=106, ac=-2),
 'distractor_item': mysterious sword(4, 3),
 'modifier_assignment': [(rtfm.dynamics.element.Cold,
   ('gleaming', 'grandmasters')),
  (rtfm.dynamics.element.Fire, ('mysterious', 'soldiers')),
  (rtfm.dynamics.element.Lightning, ('blessed', 'shimmering')),
  (rtfm.dynamics.element.Poison, ('arcane', 'fanatical'))],
 'group_assignment': [('order of the forest', ('bat', 'jaguar', 'zombie')),
  ('rebel enclave', ('ghost', 'goblin', 'shaman')),
  ('star alliance', ('imp', 'panther', 'wolf'))],
 '_cache': {},
 'world_shape': (6, 6),
 'world': <rtfm.dynamics.world.World at 0x7f9c82690b90>,
 'engine': <rtfm.dynamics.engine.Engine at 0x7f9c82690dd0>,
 'partially_observable': False,
 'history': [],
 'iter': 0,
 'max_iter': 1000,
 'max_placement': 2,
 'max_name': 2,
 'max_inv': 2,
 'max_wiki': 80,
 'max_task': 40,
 'time_pena

In [7]:
%%time
d1 = copy.deepcopy(d)

CPU times: user 6.53 ms, sys: 0 ns, total: 6.53 ms
Wall time: 6.45 ms


In [8]:
d['agent'].queue

[]

In [9]:
featurizer.featurize(gym_env)


██████
█?   █
█ @  █
█y  n█
█ !  █
██████


{}

In [10]:
_ = gym_env.step(0)

In [11]:
new_gym_env = utils.create_env(flags)

In [12]:
featurizer.featurize(new_gym_env)


██████
█y   █
█  ? █
█ @  █
█n  !█
██████


{}

In [13]:
#for k in d.keys():
#    setattr(gym_env, k, d[k])

In [14]:
new_gym_env.load_state_dict(copy.deepcopy(d))

In [15]:
featurizer.featurize(new_gym_env)


██████
█?   █
█ @  █
█y  n█
█ !  █
██████


{}

In [16]:
d['agent'].position

(2, 2)

In [17]:
old_agent = list(new_gym_env.world.agents)[0]
pos = old_agent.position
new_gym_env.world.remove_object(old_agent)
new_gym_env.agent.position = None
new_gym_env.world.place_object_at_pos(new_gym_env.agent, pos)

In [18]:
_ = new_gym_env.step(2)

In [19]:
f, r, done, i = _

In [20]:
r

-1

In [22]:
done

True

In [23]:
new_gym_env.agent.position

In [24]:
d['agent'].position

(2, 2)

In [None]:
gym_env.world

In [None]:
new_gym_env.world

In [19]:
if False:
    pass
else:
    raise Exception("{}".format(2))

Exception: 2