# Random action agent for cart pole environment #

In [1]:
import gym
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
env = gym.make("CartPole-v1")

In [3]:
 """
 If the pole is leaning a direction, push the cart accordingly to make the pole move to 
 the oposite direction being careful not to push too hard.
 
    State:
        Num	Observation                 Min         Max
        0	Cart Position             -4.8            4.8
        1	Cart Velocity             -Inf            Inf
        2	Pole Angle                 -24°           24°
        3	Pole Velocity At Tip      -Inf            Inf
        
    Actions:
        Type: Discrete(2)
        Num	Action
        0	Push cart to the left
        1	Push cart to the right
"""
def getSmartAction(state):
    
    # initialize variables
    position, velocity, angle, tipVelocity = state
    pushLeft, pushRight = (0,1)
    tooFast = 0.5 # threshold to know when to stop pushing in a direction
    
    # pole leaning to the right, push to the right unless it is moving too fast to the left already 
    if angle >= 0:
        if tipVelocity < (-1*tooFast):
            return pushLeft
        return pushRight
    
    # pole leaning to the left, push to the left unless it is moving too fast to the right already
    if angle < 0:
        if tipVelocity > tooFast:
            return pushRight
        return pushLeft
    

## Random action agent ##

In [4]:
episodes = 1000
steps = 200
state = None

reward_list = []
for episode in range(1, episodes + 1):
    env.reset()
    total_reward = 0
    for step in range(1, steps + 1):
        if state is None:
            action = env.action_space.sample()
        else:
            action = getSmartAction(state)
        next_state, reward, done, _ = env.step(action)
        total_reward += reward
        if done: break
        state = next_state
    reward_list.append(total_reward)

## Performance of random agent ##

best episode, average episode, worst episode:

In [5]:
np.max(reward_list), np.average(reward_list), np.min(reward_list)

(200.0, 200.0, 200.0)

In [6]:
np.max(reward_list), np.average(reward_list), np.min(reward_list)

(200.0, 200.0, 200.0)