# Agent anatomy

In [13]:
import random

class Environment:
  def __init__(self):
    self.steps_left = 10
        
  def get_observation(self):
    return [0.0, 0.0, 0.0]
    
  def get_actions(self):
    return [0, 1]
    
  def is_done(self):
    return self.steps_left == 0
    
  def action(self, action):
    if self.is_done():
        raise Exception("Game is over")
    self.steps_left -= 1
    return random.random()

class Agent:
  def __init__(self):
    self.total_reward = 0.0
        
  def step(self, env):
    current_obs = env.get_observation()
    actions = env.get_actions()
    reward = env.action(random.choice(actions))
    self.total_reward += reward

env = Environment()
agent = Agent()

while not env.is_done():
  agent.step(env)

print("Total reward: %.4f" % agent.total_reward)

Total reward: 4.3641


# CartPole random

In [16]:
import gym

env = gym.make('CartPole-v0')
total_reward = 0.0
total_steps = 0
obs = env.reset()

while True:
  action = env.action_space.sample()
  obs, reward, done, _ = env.step(action)
  total_reward += reward
  total_steps += 1
  if done:
    break

print("Episode done in %d steps, total reward %.2f" % (total_steps, total_reward))

Episode done in 31 steps, total reward 31.00


# Random action wrapper

In [18]:
class RandomActionWrapper(gym.ActionWrapper):
  def __init__(self, env, epsilon=0.1):
    super(RandomActionWrapper, self).__init__(env)
    self.epsilon = epsilon

  def action(self, action):
    if random.random() < self.epsilon:
      print("Random!")
      return self.env.action_space.sample()
    return action

env = RandomActionWrapper(gym.make("CartPole-v0"))

In [30]:
obs = env.reset()
total_reward = 0.0

while True:
  obs, reward, done, _ = env.step(0)
  total_reward += reward
  if done:
    break;

print("Reward got: %.2f" % total_reward)

Random!
Random!
Reward got: 12.00


# CartPole random monitor

In [31]:
env = gym.make("CartPole-v0")
env = gym.wrappers.Monitor(env, "recording")