# Modelled Agents

I don't really accomplish anything here.

I wanted to see if cartpole could be solved by a naive approach. I figured all you need to do is try a handful of random agents, model reward against agent parameters with ElasticCV, and then simply minimize (well, maximize) the estimated reward/score. Seems simple.

As you'll see, this doesn't really work.

In [1]:
# For Jupyter to display GIFs
from IPython.display import HTML

# OpenAI gym
import gym

# Import local script
import agents

# For modelling the scores
import numpy as np
from sklearn.linear_model import ElasticNetCV
from scipy.optimize import minimize

I'm going to create myself a convenience function to manage the agents' attempts. The results are saved in a dictionary.

In [2]:
def trial_agent(agent, trials=100, limit=1000):
    env = gym.make(agent.game)

    scores = []
    for i in range(trials):
        observation = env.reset()
        score = 0
        for t in range(limit):
            action = agent.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            score += reward
        scores.append(score)
        
    data_dict = {
        "agent" : agent, 
        "weights" : agent.w, 
        "pedigree" : agent.pedigree, 
        "minimum" : min(scores), 
        "maximum" : max(scores), 
        "mean" : sum(scores)/len(scores)
    }
    
    env.close()
    
    return data_dict

## First-order agent

We'll see that this technique can improve the agent's score, but it's not consistent. Often it seems to make the whole thing worse.

In [3]:
results = []

for a in range(25):
    results.append(trial_agent(agents.LinearAgent(weights=None)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('model_simple_test.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7fd49675af28>, 'weights': array([ 0.06445774,  0.26379023, -0.77444378, -0.00948223,  0.72288678]), 'pedigree': ['-1', '-1', '-1', '-1', '-1'], 'minimum': 31.0, 'maximum': 120.0, 'mean': 54.73}


In [4]:
for rounds in range(25):
    X = np.array([i["agent"].w for i in results])
    y = [-i["mean"] for i in results]
    model = ElasticNetCV()

    model.fit(X, y)

    def function(x):
        return model.predict([x])

    optim_result = minimize(
        function, 
        X[np.random.randint(X.shape[0]),:],
        bounds = ((-1,1),(-1,1),(-1,1),(-1,1),(-1,1))
        )

    new_weights = optim_result["x"]
    
    print(f"New weights: {new_weights}")

    new_agents = [i["agent"] for i in results]
    new_agents.append(agents.LinearAgent(weights=new_weights))

    results = []

    for agent in new_agents:
        results.append(trial_agent(agent))

    print(f"Best mean score: {max([i['mean'] for i in results])}")

New weights: [ 1.          0.55519479 -1.          1.          1.        ]
Best mean score: 50.07
New weights: [ 1. -1. -1.  1.  1.]
Best mean score: 49.54
New weights: [ 1.          0.47419455 -1.          1.          1.        ]
Best mean score: 53.34
New weights: [ 0.53803873 -0.33275824 -1.          1.          1.        ]
Best mean score: 53.07
New weights: [-0.50941169  0.08646901 -1.          1.          1.        ]
Best mean score: 51.28
New weights: [-1. -1. -1.  1.  1.]
Best mean score: 52.16
New weights: [ 0.35302354  0.55519479 -1.          1.          1.        ]
Best mean score: 54.68
New weights: [ 1.  1. -1.  1.  1.]
Best mean score: 52.14
New weights: [-0.083834   -0.62977961 -1.          1.          1.        ]
Best mean score: 93.94
New weights: [-1. -1. -1.  1.  1.]
Best mean score: 107.66
New weights: [-1. -1. -1.  1.  1.]
Best mean score: 92.65
New weights: [ 1. -1. -1.  1.  1.]
Best mean score: 124.01
New weights: [-1. -1. -1.  1.  1.]
Best mean score: 109.86
New

In [5]:
winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('model_simple.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7fd48b7c4f28>, 'weights': array([-0.083834  , -0.62977961, -1.        ,  1.        ,  1.        ]), 'pedigree': ['-1', '-1', '-1', '-1', '-1'], 'minimum': 35.0, 'maximum': 499.0, 'mean': 94.75}


## Second-order agent

What about a second-order linear agent? Well, not much more luck there.

In [6]:
results = []

for a in range(25):
    results.append(trial_agent(agents.LinearAgent(None, order=2)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('model_complex_test.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7fd48b7c49b0>, 'weights': array([-0.07696393, -0.41381943, -0.39084882,  0.01602592,  0.9757666 ,
       -0.72577967,  0.28824408, -0.2641107 , -0.99834271]), 'pedigree': ['-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1'], 'minimum': 34.0, 'maximum': 114.0, 'mean': 57.95}


In [7]:
for rounds in range(25):
    X = np.array([i["agent"].w for i in results])
    y = [-i["mean"] for i in results]
    model = ElasticNetCV()

    model.fit(X, y)

    def function(x):
        return model.predict([x])

    optim_result = minimize(
        function, 
        X[np.random.randint(X.shape[0]),:],
        bounds = ((-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1))
        )

    new_weights = optim_result["x"]
    
    print(f"New weights: {new_weights}")

    new_agents = [i["agent"] for i in results]
    new_agents.append(agents.LinearAgent(weights=new_weights, order=2))

    results = []

    for agent in new_agents:
        results.append(trial_agent(agent))

    print(f"Best mean score: {max([i['mean'] for i in results])}")

New weights: [-0.56505166 -0.4292349  -0.05185146 -0.27065133  0.75084665  0.25098994
 -0.96915118 -0.4028917  -0.38118921]
Best mean score: 63.16
New weights: [ 0.43788251  0.45405379 -0.96720325  0.28977011 -0.3975548   0.93323486
 -0.4434338   0.79899917 -0.50224365]
Best mean score: 63.62
New weights: [-0.84235024  0.78402661 -0.38574107 -0.09165103  1.         -0.53242327
 -0.8334698   0.36825708 -0.7734683 ]
Best mean score: 60.0
New weights: [ 0.50576435 -0.9849318   0.6877891   0.29483987  0.46107278  0.35342359
  0.91627621 -0.83574404  0.36608969]
Best mean score: 65.27
New weights: [-0.30019676  0.07596906  0.84034081  0.81476214 -0.0422685  -0.96825594
 -0.90818088 -0.84462504 -0.88879345]
Best mean score: 63.69
New weights: [-0.84235024  0.78402661 -0.38574107 -0.09165103  0.9329098  -0.53242327
 -0.8334698   0.36825708 -0.7734683 ]
Best mean score: 60.71
New weights: [ 0.70706614 -0.08600052  0.83260981 -0.39733541 -0.80570633 -0.6913631
  0.82276298  0.44627394  0.955987

In [8]:
winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('model_complex.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7fd48b7c49b0>, 'weights': array([-0.07696393, -0.41381943, -0.39084882,  0.01602592,  0.9757666 ,
       -0.72577967,  0.28824408, -0.2641107 , -0.99834271]), 'pedigree': ['-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1'], 'minimum': 34.0, 'maximum': 131.0, 'mean': 59.59}


## A simple interpolation approach

Maybe I can just pick the two best agents, average them, and win? This pseudo-genetic strategy doesn't really work either.

In [9]:
results = []

for a in range(25):
    results.append(trial_agent(agents.LinearAgent(weights=None)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('average_simple_test.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7fd48b7c42b0>, 'weights': array([ 0.35443014, -0.78348071, -0.66300644,  0.64119488,  0.75963163]), 'pedigree': ['-1', '-1', '-1', '-1', '-1'], 'minimum': 22.0, 'maximum': 77.0, 'mean': 34.9}


In [10]:
for rounds in range(25):
    top_scores = sorted(results, key=lambda x: x["mean"], reverse=True)
    
    top_two = top_scores[:2]

    new_weights = np.mean(np.array([i["weights"] for i in top_two]), axis=0)
    
    print(f"New weights: {new_weights}")

    new_agents = [i["agent"] for i in results]
    new_agents.append(agents.LinearAgent(weights=new_weights, order=1))

    results = []

    for agent in new_agents:
        results.append(trial_agent(agent))

    print(f"Best mean score: {max([i['mean'] for i in results])}")

New weights: [ 0.17377927 -0.11580529 -0.73357662  0.72638408  0.1810018 ]
Best mean score: 39.44
New weights: [ 0.2641047  -0.449643   -0.69829153  0.68378948  0.47031672]
Best mean score: 41.47
New weights: [ 0.21894198 -0.28272414 -0.71593407  0.70508678  0.32565926]
Best mean score: 40.82
New weights: [ 0.21894198 -0.28272414 -0.71593407  0.70508678  0.32565926]
Best mean score: 39.36
New weights: [ 0.19636063 -0.19926471 -0.72475534  0.71573543  0.25333053]
Best mean score: 41.77
New weights: [ 0.18506995 -0.157535   -0.72916598  0.72105975  0.21716617]
Best mean score: 42.86
New weights: [ 0.17942461 -0.13667014 -0.7313713   0.72372192  0.19908399]
Best mean score: 40.48
New weights: [ 0.17660194 -0.12623771 -0.73247396  0.725053    0.19004289]
Best mean score: 43.19
New weights: [ 0.19071529 -0.17839986 -0.72696066  0.71839759  0.23524835]
Best mean score: 45.19
New weights: [ 0.17942461 -0.13667014 -0.7313713   0.72372192  0.19908399]
Best mean score: 44.11
New weights: [ 0.179

In [11]:
winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('average_simple.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7fd496d81400>, 'weights': array([ 0.18506995, -0.157535  , -0.72916598,  0.72105975,  0.21716617]), 'pedigree': ['-1', '-1', '-1', '-1', '-1'], 'minimum': 22.0, 'maximum': 175.0, 'mean': 45.2}
