# Unit 2

## Agent Anatomy

In [2]:
import random
from typing import List

In [3]:
# Creating a class for Enviroment
class Environment:
    # initiating the class with total set of moves agent can take
    def __init__(self):
        self.steps_left = 10
        
    # observation indicates the frame
    def get_observation(self) -> List[float]:
        return [0.0, 0.0, 0.0]
    
    # action indicates the move taken according to the observation
    def get_actions(self) -> List[int]:
        return [0, 1]
    
    # indication when environment is complete
    def is_done(self) -> bool:
        return self.steps_left == 0
    
    # receiving a random action to take
    def action(self, action:int) ->float:
        if self.is_done():
            raise Exception("Game is over")
        self.steps_left -= 1
        return random.random()

In [4]:
# Creating a class for Agent
class Agent:
    # initalizing class with 0 reward
    def __init__(self):
        self.total_reward = 0.0
        
    # getting the observation and action from environemnt and stepping it
    def step(self, env: Environment):
        current_obs = env.get_observation()
        actions = env.get_actions()
        reward = env.action(random.choice(actions))
        self.total_reward += reward

In [5]:
env = Environment()
agent = Agent()

while not env.is_done():
    agent.step(env)
    
print("Total reward got: %.4f" % agent.total_reward)

Total reward got: 4.4392


## Cartpole Random

In [6]:
import gym

In [8]:
# Loading the CartPole-vo through gym 
env = gym.make("CartPole-v0")

total_reward = 0.0
total_steps = 0
obs = env.reset()

# getting the action and step in environment until the game is over
while True:
    action = env.action_space.sample()
    obs, reward, done, _ = env.step(action)
    total_reward += reward
    total_steps += 1
    if done:
        break
        
print("Episode done in %d steps, total reward %.2f" % (
    total_steps, total_reward))

Episode done in 22 steps, total reward 22.00


# Random Action Wrapper
Wrapper is the adding additional logics or set of rules, like preprocessing and changing image of frame so that agent can process it bettor, to an existing gym environment for better performance.

In [9]:
import gym
from typing import TypeVar
import random

In [10]:
# Initializing a datatype named Action
Action = TypeVar('Action')

# Creating a class for Random Action Wrapper
class RandomActionWrapper(gym.ActionWrapper):
    # introducing epsilon function
    def __init__(self, env, epsilon=0.1):
        super(RandomActionWrapper, self).__init__(env)
        self.epsilon = epsilon
        
    # for every step action function is called it generated random no. and compare it with epsilon
    def action(self, action: Action) -> Action:
        if random.random() < self.epsilon:
            print("Random!")
            return self.env.action_space.sample()
        return action

In [11]:
env = RandomActionWrapper(gym.make("CartPole-v0"))

obs = env.reset()
total_reward = 0.0

while True:
    obs, reward, done, _ = env.step(0)
    total_reward += reward
    if done:
        break

print("Reward got: %.2f" % total_reward)

Random!
Random!
Reward got: 10.00


As we can see, every thing is same except for initializing for environment. But still on the call of step function, it works as instructed iin the action fuction of RandomActionWrapper

# Cartpole Random Monitor
Monitoring is the process of recording the performance of agent and saving in a video

In [12]:
import gym

In [13]:
env = gym.make("CartPole-v0")
env = gym.wrappers.Monitor(env, "recording")

total_reward = 0.0
total_steps = 0
obs = env.reset()

while True:
    action = env.action_space.sample()
    obs, reward, done, _ = env.step(action)
    total_reward += reward
    total_steps += 1
    if done:
        break
        
print("Episode done in %d steps, total reward %.2f" % (
    total_steps, total_reward))
env.close()
env.env.close()

Episode done in 23 steps, total reward 23.00
