In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import gym
import ray

ray.init()

**Encapsulating simulator state with actors:** MDPs are stateful, and Ray actors can be used to encapsulate this state. The box below wraps a gym environment in an actor. Instantiating the actor will create a new worker process that holds the state of the gym environment. Method calls on the actor will essentially be tasks or remote procedure calls executed on the actor.

In [None]:
# Define an actor.
@ray.remote
class EnvActor(object):
    def __init__(self):
        self.env = gym.make("CartPole-v0")
        self.env.reset()
    
    def step(self, action):
        return self.env.step(action)

# Instantiate an actor.
actor = EnvActor.remote()

# Run one step of the simulator on the actor (taking action 0).
result_id = actor.step.remote(0)

# Get the results.
ray.get(result_id)

**Exercise:** Create an actor with a method `do_rollout`, which takes a policy and uses the policy to perform a rollout and return the cumulative reward.

In [None]:
@ray.remote
class RolloutActor(object):
    def __init__(self):
        self.env = gym.make("CartPole-v0")
    
    def do_rollout(self, policy):
        # This method should perform a rollout in the environment
        # using the policy and it should return the cumulative
        # reward.
        raise NotImplementedError


def sample_policy(state):
    return 0 if state[0] < 0 else 1


# Create an actor.
rollout_actor = RolloutActor.remote()
# Do a rollout and get the result.
reward = ray.get(rollout_actor.do_rollout.remote(sample_policy))
print(reward)
# Do a rollout and get the result.
reward = ray.get(rollout_actor.do_rollout.remote(sample_policy))
print(reward)

**Exercise:** Create a new actor, which is the same as `RolloutActor` except that it has a method `do_rollouts` which takes a policy and a number `N` and performs `N` rollouts, returning a list of the rewards obtained.

In [None]:
@ray.remote
class MultiRolloutActor(object):
    def __init__(self):
        self.env = gym.make("CartPole-v0")
        self.env.reset()
    
    def do_rollouts(self, policy, n):
        raise NotImplementedError


# Create an actor.
multi_rollout_actor = MultiRolloutActor.remote()
# Do ten rollouts and get the results.
rewards = ray.get(multi_rollout_actor.do_rollouts.remote(sample_policy, 10))
assert len(rewards) == 10