In [None]:
import gym
from gym import envs
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Download and build packages for visualization in jupyter
### git clone https://github.com/jakevdp/JSAnimation
### cd JSAnimation
### python3 setup.py install

In [None]:
# Imports specifically so we can render outputs in Jupyter.
from JSAnimation.IPython_display import display_animation
from matplotlib import animation
from IPython.display import display


def display_frames_as_gif(frames):
    patch = plt.imshow(frames[0])
    plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])

    anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=5)
    display(display_animation(anim, default_mode='loop'))

# List of Environment

In [None]:
envids = [spec.id for spec in envs.registry.all()]
for envid in sorted(envids):
    if envid in ["CartPole-v1","FrozenLake8x8-v0","PongDeterministic-v4"]:
        print(envid)

# FrozenLake

In [None]:
env = gym.make('FrozenLake8x8-v0')
env.reset()
render_flag = env.render(mode = 'rgb_array')

### State and Action Space

In [None]:
obs_space = env.observation_space
print('Observation space')
print(type(obs_space))
print("Total {} states".format(obs_space.n))
print()

act_space = env.action_space
print('Action space')
print(type(act_space))
print("Total {} actions".format(act_space.n))
print()

In [None]:
env.reset()
for t in range(10000):
    print('t = %d'%t)
    env.render(mode = 'rgb_array')
    print('')
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    if done:
        break
env.render(mode = 'rgb_array')
env.close()

### Transition Probability

In [None]:
P = env.unwrapped.P
# P(s'|s,a)
print('State:')
print(type(P))
print(P.keys())
print()

print('Action:')
print(P[0].keys())
print(P[0][0])
print()

print('Prob, Next state, Reward, Done:')
print(P[0][0][0])

In [None]:
for s in P.keys():
    for a in P[s].keys():
        for next_s_info in P[s][a]:
            print('P(s\':{}|s:{},a:{}) = {:.2}, Reward:{}'.format(next_s_info[1],s,a,next_s_info[0],next_s_info[2]))

# CartPole

In [None]:
env = gym.make('CartPole-v1')
obs = env.reset()

In [None]:
obs_space = env.observation_space
print('Observation space')
print(type(obs_space))
print(obs_space.shape)
print("Dimension:{}".format(obs_space.shape[0]))
print("High: {}".format(obs_space.high))
print("Low: {}".format(obs_space.low))
print()

act_space = env.action_space
print('Action space')
print(type(act_space))
print("Total {} actions".format(act_space.n))
print()

In [None]:
env.reset()
cum_reward = 0
frames = []
for t in range(10000):
    # Render into buffer. 
    frames.append(env.render(mode = 'rgb_array'))
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    if done:
        break
env.close()
display_frames_as_gif(frames)