In [1]:
from IPython.display import HTML

import PIL.Image

import numpy as np
import gym

# Import local script
import actors

## What is cartpole?

https://github.com/openai/gym/wiki/CartPole-v0

In [2]:
env = gym.make('CartPole-v0')

# Random actor
observation = env.reset()
cum_reward = 0
frames = []
for t in range(1000):
    # Render into buffer. 
    # You will still see the window.
    frames.append(PIL.Image.fromarray(env.render(mode = 'rgb_array'), "RGB"))
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    if done:
        break
env.close()

# Save the GIF
frames[0].save('random_actor.gif', format='GIF', append_images=frames[1:], save_all=True, duration=10, loop=0)

# Display the GIF in Jupyter
HTML('<img src="./random_actor.gif">')

In [3]:
# Create a simple first-order model
def simple_actor(w, x):
    z = w[0] + w[1]*x[0] + w[2]*x[1]
    a = 1 / (1 + np.exp(-z))
    return int(round(a))

random_weight = np.random.uniform(-1, 1, 3)
random_inputs = np.random.rand(4)

simple_actor(random_weight, random_inputs)

1

In [4]:
# Create a more complex second-order model
def complex_actor(w, x):
    # First-order part
    z = w[0] + w[1]*x[0] + w[2]*x[1]
    
    # Interaction term
    x += w[3]*x[0]*x[1]
    
    # Second-order part
    z += w[4]*x[0]**2 + w[5]*x[1]**2
    
    a = 1 / (1 + np.exp(-z))
    return int(round(a))

random_weight = np.random.uniform(-1, 1, 6)
random_inputs = np.random.rand(4)

complex_actor(random_weight, random_inputs)

1

## Trying the simple model randomly

In [5]:
env = gym.make('CartPole-v0')

best_w = None
best_score = 0
# Simple model
for m in range(25):
    actor = actors.SimpleActor()
    scores = []
    for i in range(100):
        observation = env.reset()
        score = 0
        frames = []
        for t in range(1000):
            action = actor.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            score += reward
        scores.append(score)
    print(f"[{m+1:3}] Minimum {min(scores):5.1f} Maximum {max(scores):5.1f} Average {sum(scores)/len(scores):5.1f}")
    if sum(scores)/len(scores) > best_score:
        best_score = sum(scores)/len(scores)
        best_w = actor
env.close()
print(best_score)
print(best_w)

[  1] Minimum  10.0 Maximum  20.0 Average  14.1
[  2] Minimum  20.0 Maximum  95.0 Average  54.6
[  3] Minimum   7.0 Maximum  10.0 Average   8.4
[  4] Minimum   8.0 Maximum  13.0 Average  10.2
[  5] Minimum  14.0 Maximum  47.0 Average  23.9
[  6] Minimum  19.0 Maximum  94.0 Average  37.3
[  7] Minimum   7.0 Maximum  10.0 Average   8.5
[  8] Minimum   7.0 Maximum  10.0 Average   8.4
[  9] Minimum   7.0 Maximum  10.0 Average   8.3
[ 10] Minimum   7.0 Maximum  10.0 Average   8.4
[ 11] Minimum  20.0 Maximum  60.0 Average  35.5
[ 12] Minimum   7.0 Maximum  10.0 Average   8.4
[ 13] Minimum   7.0 Maximum  10.0 Average   8.3
[ 14] Minimum   7.0 Maximum  10.0 Average   8.4
[ 15] Minimum  35.0 Maximum 121.0 Average  82.6
[ 16] Minimum   7.0 Maximum  10.0 Average   8.4
[ 17] Minimum   7.0 Maximum  10.0 Average   8.3
[ 18] Minimum   7.0 Maximum  10.0 Average   8.6
[ 19] Minimum  11.0 Maximum 199.0 Average  19.8
[ 20] Minimum  14.0 Maximum  59.0 Average  28.8
[ 21] Minimum   9.0 Maximum  15.0 Averag

In [6]:
HTML(f"<img src='./{best_w.render('random_simple.gif')}'>")

## Trying the complex model randomly

In [7]:
env = gym.make('CartPole-v0')

best_w = None
best_score = 0
# Complex model
for m in range(25):
    actor = actors.ComplexActor()
    scores = []
    for i in range(100):
        observation = env.reset()
        score = 0
        frames = []
        for t in range(1000):
            action = actor.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            score += reward
        scores.append(score)
    print(f"[{m+1:3}] Minimum {min(scores):5.1f} Maximum {max(scores):5.1f} Average {sum(scores)/len(scores):5.1f}")
    if sum(scores)/len(scores) > best_score:
        best_score = sum(scores)/len(scores)
        best_w = actor
env.close()
print(best_score)
print(best_w)

[  1] Minimum   8.0 Maximum  13.0 Average   9.9
[  2] Minimum  12.0 Maximum  91.0 Average  29.4
[  3] Minimum  13.0 Maximum  21.0 Average  17.6
[  4] Minimum   8.0 Maximum  11.0 Average   9.5
[  5] Minimum   7.0 Maximum  10.0 Average   8.9
[  6] Minimum   7.0 Maximum  10.0 Average   8.4
[  7] Minimum   7.0 Maximum  10.0 Average   8.3
[  8] Minimum   7.0 Maximum  10.0 Average   8.4
[  9] Minimum   9.0 Maximum  15.0 Average  11.3
[ 10] Minimum   7.0 Maximum  10.0 Average   8.4
[ 11] Minimum   7.0 Maximum  10.0 Average   8.3
[ 12] Minimum   7.0 Maximum  10.0 Average   8.4
[ 13] Minimum   7.0 Maximum  10.0 Average   8.3
[ 14] Minimum  11.0 Maximum  17.0 Average  13.7
[ 15] Minimum   7.0 Maximum  10.0 Average   8.3
[ 16] Minimum   7.0 Maximum   9.0 Average   8.2
[ 17] Minimum  12.0 Maximum  18.0 Average  14.9
[ 18] Minimum  10.0 Maximum  15.0 Average  12.6
[ 19] Minimum   7.0 Maximum  10.0 Average   8.3
[ 20] Minimum  15.0 Maximum  26.0 Average  20.0
[ 21] Minimum  14.0 Maximum  25.0 Averag

In [8]:
HTML(f"<img src='./{best_w.render('random_complex.gif')}'>")