In [None]:
# References
# https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py

In [1]:
import gym
import random
import time

In [2]:
env_name = "CartPole-v1"

In [15]:
# V4
class Agent():
    def __init__(self, env):
        self.action_size = env.action_space.n
        
    def get_action(self, state):
        pole_angle, pole_angular_velocity = state[2:4]
        if (abs(pole_angle) < 0.02):
            action  = 0 if pole_angular_velocity < 0 else 1
        else:
            action  = 0 if pole_angle < 0 else 1
        return action

In [27]:
# V5 PID
class Agent():
    def __init__(self, env):
        self.action_size = env.action_space.n
        self.angle_integral = 0
        
    def get_action(self, state):
        pole_angle, pole_angular_velocity = state[2:4]
        self.angle_integral += pole_angle
        p = 0.7
        d = 0.2
        i = 0.1
        composite = p*pole_angle + d*pole_angular_velocity + i*self.angle_integral
        action = 0 if composite < 0 else 1
        return action

In [28]:
env = gym.make(env_name)
agent = Agent(env)
results = list()
final_positions = list()

for i_episode in range(3):
    observation = env.reset()
    for t in range(500):
        env.render()
        action = agent.get_action(observation)
#         print(f"Observation:{observation}, Action:{action}",)

        observation, reward, done, info = env.step(action)
        #time.sleep(2)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            results.append(t+1)
            final_positions.append(observation[0])
            break
            
print(f"Final Results:{results}")
print(f"Final Positions:{final_positions}")
env.close()

Episode finished after 500 timesteps
Episode finished after 500 timesteps
Episode finished after 500 timesteps
Final Results:[500, 500, 500]
Final Positions:[0.34805933, 0.098428704, -0.14230487]
