In [0]:
!apt update

In [0]:
!pip install gym pyvirtualdisplay > /dev/null
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null

In [0]:
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) #error only
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import glob
import io
import os
import base64
from IPython.display import HTML
from IPython import display as ipythondisplay

In [0]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

In [0]:
"""
Utility functions to enable video recording of gym environment and displaying it
To enable video, just do "env = wrap_env(env)""
"""

def show_video():
  mp4list = glob.glob('videos/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")

In [0]:
import types
from gym import spaces, logger
from gym.envs.classic_control.cartpole import CartPoleEnv

def infstep(self, action):
    state = self.state
    x, x_dot, theta, theta_dot = state

    force = 0 if not action else (self.force_mag if action==1 else -self.force_mag)

    costheta = math.cos(theta)
    sintheta = math.sin(theta)
    temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
    thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
    xacc  = temp - self.polemass_length * thetaacc * costheta / self.total_mass


    # x_dot = 0
    # x  = 0

    x_dot = x_dot + self.tau * xacc
    x  = x + self.tau * x_dot
    theta_dot = theta_dot + self.tau * thetaacc
    # theta = np.mod(theta + self.tau * theta_dot, np.pi * 2)
    theta = theta + self.tau * theta_dot

    self.state = (x,x_dot,theta,theta_dot)

    return np.array(self.state), 0, False, np.array([xacc, thetaacc])

def reset(self, initial_state=None):
  self.steps_beyond_done = None

  self.state = initial_state if initial_state else self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
  return np.array(self.state)

CartPoleEnv.step = infstep
CartPoleEnv.reset = reset

In [0]:
envname='CartPole-v2'
if envname in gym.envs.registry.env_specs:
    del gym.envs.registry.env_specs[envname]

gym.envs.register(
    id=envname,
    entry_point='gym.envs.classic_control:CartPoleEnv',
    max_episode_steps=100000,
    reward_threshold=0,
)

In [0]:
env = gym.make(envname)
env = gym.wrappers.Monitor(env, "videos", force=True)

# reset env and, optionally, set initial state (x, x_dot, theta, theta_dot)
observation = env.reset(initial_state=[0,0,0.01,0.1])

trajectory = []

for step in range(5000):
  
    env.render()
    
    action = env.action_space.sample()        
    observation, reward, done, info = env.step(None)

    if step % 5 == 0:
      trajectory.append(np.append(observation, info))

    if done:
      break
            
env.close()

In [0]:
show_video()

In [0]:
a = np.round(np.array(trajectory), decimals=4)

In [0]:
last_step=100

X = a[:last_step, 0]
X_dot = a[:last_step, 1]
Theta = a[:last_step, 2]
Theta_dot = a[:last_step, 3]
X_acc = a[:last_step, 4]
Theta_acc = a[:last_step, 5]

In [0]:
print(Theta)

In [0]:
Q = plt.quiver(range(len(Theta)), Theta, 0, Theta_dot)

In [0]:
Q = plt.quiver(Theta, Theta_dot, Theta_dot, Theta_acc)