In [1]:
from IPython.display import HTML

import gym

# Import local script
import actors

# For modelling the scores
import numpy as np
from sklearn.linear_model import ElasticNetCV
from scipy.optimize import minimize

In [2]:
def trial_actor(actor, trials=100, limit=1000):
    env = gym.make('CartPole-v0')

    scores = []
    for i in range(trials):
        observation = env.reset()
        score = 0
        for t in range(limit):
            action = actor.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            score += reward
        scores.append(score)
        
    data_dict = {
        "actor" : actor, 
        "weights" : actor.w, 
        "minimum" : min(scores), 
        "maximum" : max(scores), 
        "mean" : sum(scores)/len(scores)
    }
    
    env.close()
    
    return data_dict

## Simple actor

We'll see that this technique can improve the agent, but it's not consistent. I was bery nearly able to solve it once, by luck, with a score of 191.

In [3]:
results = []

for a in range(25):
    results.append(trial_actor(actors.SimpleActor(None)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='./{winner['actor'].render('model_simple_test.gif')}'>")

{'actor': <actors.SimpleActor object at 0x7fab4a791908>, 'weights': array([-0.02130483, -0.5729782 ,  0.37991054,  0.12018435,  0.82631067]), 'minimum': 116.0, 'maximum': 199.0, 'mean': 169.32}


In [4]:
for rounds in range(25):
    X = np.array([i["actor"].w for i in results])
    y = [-i["mean"] for i in results]
    model = ElasticNetCV()

    model.fit(X, y)

    def function(x):
        return model.predict([x])

    optim_result = minimize(
        function, 
        X[np.random.randint(X.shape[0]),:],
        bounds = ((-1,1),(-1,1),(-1,1),(-1,1),(-1,1))
        )

    new_weights = optim_result["x"]
    
    print(f"New weights: {new_weights}")

    new_actors = [i["actor"] for i in results]
    new_actors.append(actors.SimpleActor(new_weights))

    results = []

    for actor in new_actors:
        results.append(trial_actor(actor))

    print(f"Best mean score: {max([i['mean'] for i in results])}")

New weights: [-1. -1.  1. -1.  1.]
Best mean score: 173.01
New weights: [-0.75228254 -1.          1.          1.          1.        ]
Best mean score: 170.73
New weights: [ 1. -1.  1.  1.  1.]
Best mean score: 169.84
New weights: [ 1. -1.  1.  1.  1.]
Best mean score: 168.77
New weights: [ 0.05097823 -1.          1.         -0.15761214  1.        ]
Best mean score: 192.54
New weights: [-1. -1.  1. -1.  1.]
Best mean score: 192.58
New weights: [ 0.69013871 -1.          1.         -1.          1.        ]
Best mean score: 192.68
New weights: [-0.98354755 -1.          1.          1.          1.        ]
Best mean score: 192.29
New weights: [-0.30007058 -1.          1.         -0.46680355  1.        ]
Best mean score: 192.37
New weights: [-0.30007058 -1.          1.         -0.46680355  1.        ]
Best mean score: 191.97
New weights: [-0.30802376 -1.          1.          0.61807036  1.        ]
Best mean score: 193.01
New weights: [ 0.55365654 -1.          1.          1.          1.      

In [5]:
winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='./{winner['actor'].render('model_simple.gif')}'>")

{'actor': <actors.SimpleActor object at 0x7fab3f7623c8>, 'weights': array([ 0.05097823, -1.        ,  1.        , -0.15761214,  1.        ]), 'minimum': 155.0, 'maximum': 199.0, 'mean': 191.43}


## Complex Actor

In [6]:
results = []

for a in range(25):
    results.append(trial_actor(actors.ComplexActor(None)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='./{winner['actor'].render('model_complex_test.gif')}'>")

{'actor': <actors.ComplexActor object at 0x7fab3f762320>, 'weights': array([-0.1206004 , -0.03058163,  0.74918929, -0.82805002,  0.94920338,
       -0.65548428,  0.56143857, -0.17891737,  0.4913803 ]), 'minimum': 25.0, 'maximum': 57.0, 'mean': 40.41}


In [7]:
for rounds in range(25):
    X = np.array([i["actor"].w for i in results])
    y = [-i["mean"] for i in results]
    model = ElasticNetCV()

    model.fit(X, y)

    def function(x):
        return model.predict([x])

    optim_result = minimize(
        function, 
        X[np.random.randint(X.shape[0]),:],
        bounds = ((-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1)), 
        options={"maxiter":1000, "disp":False}
        )

    new_weights = optim_result["x"]
    
    print(f"New weights: {new_weights}")

    new_actors = [i["actor"] for i in results]
    new_actors.append(actors.ComplexActor(new_weights))

    results = []

    for actor in new_actors:
        results.append(trial_actor(actor))

    print(f"Best mean score: {max([i['mean'] for i in results])}")

New weights: [-1.          0.29731395  0.51255066 -1.          1.         -1.
  1.         -0.0142812   1.        ]
Best mean score: 39.76
New weights: [-1.         -0.55284318  0.65319236 -1.          1.         -1.
  1.          0.01674042  1.        ]
Best mean score: 40.42
New weights: [ 0.81870783 -0.5285352   0.91420731  0.06376509  1.         -1.
  1.          0.17886725  1.        ]
Best mean score: 39.36
New weights: [-1.         -0.51773146 -0.17583907 -1.          1.         -1.
  0.94807473  0.75947424  1.        ]
Best mean score: 39.17
New weights: [-1.          0.29731395  0.51255066 -1.          1.         -1.
  1.         -0.0142812   1.        ]
Best mean score: 38.17
New weights: [-1.         -0.03433781 -0.35463149 -1.          1.         -1.
  1.          0.22362722  1.        ]
Best mean score: 40.04
New weights: [-1.         -0.01319001  0.51321105 -1.          1.         -1.
  1.          0.75866448  1.        ]
Best mean score: 39.68
New weights: [-1.         -

In [8]:
winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='./{winner['actor'].render('model_complex.gif')}'>")

{'actor': <actors.ComplexActor object at 0x7fab3f762320>, 'weights': array([-0.1206004 , -0.03058163,  0.74918929, -0.82805002,  0.94920338,
       -0.65548428,  0.56143857, -0.17891737,  0.4913803 ]), 'minimum': 24.0, 'maximum': 62.0, 'mean': 40.41}
