# Modelled Agents

I don't really accomplish anything here.

I wanted to see if cartpole could be solved by a naive approach. I figured all you need to do is try a handful of random agents, model reward against agent parameters with ElasticCV, and then simply minimize (well, maximize) the estimated reward/score. Seems simple.

As you'll see, this doesn't really work.

In [1]:
# For Jupyter to display GIFs
from IPython.display import HTML

# OpenAI gym
import gym

# Import local script
import agents

# For modelling the scores
import numpy as np
from sklearn.linear_model import ElasticNetCV
from scipy.optimize import minimize

I'm going to create myself a convenience function to manage the agents' attempts. The results are saved in a dictionary.

In [2]:
def trial_agent(agent, trials=100, limit=1000):
    env = gym.make(agent.game)

    scores = []
    for i in range(trials):
        observation = env.reset()
        score = 0
        for t in range(limit):
            action = agent.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            score += reward
        scores.append(score)
        
    data_dict = {
        "agent" : agent, 
        "weights" : agent.w, 
        "pedigree" : agent.pedigree, 
        "minimum" : min(scores), 
        "maximum" : max(scores), 
        "mean" : sum(scores)/len(scores)
    }
    
    env.close()
    
    return data_dict

## First-order agent

We'll see that this technique can improve the agent's score, but it's not consistent. Often it seems to make the whole thing worse.

In [3]:
results = []

for a in range(25):
    results.append(trial_agent(agents.LinearAgent(weights=None)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('model_simple_test.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7f2146b9dac8>, 'weights': array([[-0.04767383],
       [-0.41343628],
       [-0.8815537 ],
       [ 0.5984474 ],
       [ 0.94331561]]), 'pedigree': ['-1', '-1', '-1', '-1', '-1'], 'minimum': 38.0, 'maximum': 499.0, 'mean': 140.49}


In [4]:
for rounds in range(25):
    X = np.array([i["weights"].flatten() for i in results])
    y = [-i["mean"] for i in results]
    model = ElasticNetCV()

    model.fit(X, y)

    def function(x):
        return model.predict([x])

    optim_result = minimize(
        function, 
        X[np.random.randint(X.shape[0]),:],
        bounds = ((-1,1),(-1,1),(-1,1),(-1,1),(-1,1))
        )

    new_weights = optim_result["x"]
    
    print(f"New weights: {new_weights}")

    new_agents = [i["agent"] for i in results]
    new_agents.append(agents.LinearAgent(weights=new_weights))

    results = []

    for agent in new_agents:
        results.append(trial_agent(agent))

    print(f"Best mean score: {max([i['mean'] for i in results])}")

New weights: [ 1. -1. -1.  1.  1.]
Best mean score: 140.65
New weights: [-0.5772359 -1.        -1.         1.         1.       ]
Best mean score: 169.96
New weights: [ 0.01468095 -1.         -1.          1.          1.        ]
Best mean score: 145.03
New weights: [ 1. -1. -1.  1.  1.]
Best mean score: 149.6
New weights: [ 1. -1. -1.  1.  1.]
Best mean score: 142.57
New weights: [-1. -1. -1.  1.  1.]
Best mean score: 144.56
New weights: [ 0.6471874 -1.        -1.         1.         1.       ]
Best mean score: 151.56
New weights: [ 0.3738965 -1.        -1.         1.         1.       ]
Best mean score: 128.38
New weights: [ 0.40622772 -1.         -1.          1.          1.        ]
Best mean score: 157.13
New weights: [ 1. -1. -1.  1.  1.]
Best mean score: 158.38
New weights: [ 0.04067076 -1.         -1.          1.          1.        ]
Best mean score: 132.34
New weights: [-1. -1. -1.  1.  1.]
Best mean score: 144.26
New weights: [ 0.04067076 -1.         -1.          1.          1.   

In [5]:
winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('model_simple.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7f2146b9dac8>, 'weights': array([[-0.04767383],
       [-0.41343628],
       [-0.8815537 ],
       [ 0.5984474 ],
       [ 0.94331561]]), 'pedigree': ['-1', '-1', '-1', '-1', '-1'], 'minimum': 40.0, 'maximum': 499.0, 'mean': 133.46}


## Second-order agent

What about a second-order linear agent? Well, not much more luck there.

In [6]:
results = []

for a in range(25):
    results.append(trial_agent(agents.LinearAgent(None, order=2)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('model_complex_test.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7f2139c8e0b8>, 'weights': array([[ 0.02993938],
       [-0.07200166],
       [-0.02676448],
       [-0.44498421],
       [ 0.70079584],
       [-0.37041175],
       [-0.90085725],
       [ 0.47640766],
       [-0.18951579],
       [-0.49825458],
       [ 0.5753615 ],
       [-0.79484706],
       [ 0.40982099],
       [ 0.43879367],
       [-0.24865189]]), 'pedigree': ['-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1'], 'minimum': 51.0, 'maximum': 171.0, 'mean': 76.95}


In [7]:
for rounds in range(25):
    X = np.array([list(i["agent"].w.flatten()) for i in results])
    y = [-i["mean"] for i in results]
    model = ElasticNetCV()

    model.fit(X, y)

    def function(x):
        return model.predict([x])

    optim_result = minimize(
        function, 
        X[np.random.randint(X.shape[0]),:],
        bounds = ((-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1),(-1,1))
        )

    new_weights = optim_result["x"]
    
    print(f"New weights: {new_weights}")

    new_agents = [i["agent"] for i in results]
    new_agents.append(agents.LinearAgent(weights=new_weights, order=2))

    results = []

    for agent in new_agents:
        results.append(trial_agent(agent))

    print(f"Best mean score: {max([i['mean'] for i in results])}")

New weights: [ 1.         -0.98057026 -1.          0.76308348  1.          0.14146411
 -1.          1.          1.          0.89912533  1.         -0.25412356
  1.          0.52808718  1.        ]
Best mean score: 74.4
New weights: [-0.57745244 -0.81514094  0.19659757  0.27986783  0.71523956  0.29032466
  0.94202693 -0.29160399  0.61232511 -0.70212115 -0.57001023 -0.06248123
  0.45745882 -0.02075776  0.27565627]
Best mean score: 76.42
New weights: [-0.26042569  0.0638245  -0.43251775  0.94336785  0.72831443  0.38260426
 -0.95668549 -0.33951856 -0.2318254   0.51604825 -0.64357834 -0.60017052
 -0.79884325 -0.63089202 -0.05974894]
Best mean score: 75.3
New weights: [-0.757083   -0.72731039 -1.          0.9049504   1.          0.09401692
 -1.         -0.1936582   0.42634316  0.69140615  0.36088997 -0.85579091
 -0.44700762  0.50628162  0.93339813]
Best mean score: 77.89
New weights: [-0.75302379 -0.923601   -1.         -0.42399257  1.         -0.38219842
 -1.          0.2765027   0.19400621

In [8]:
winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('model_complex.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7f2139c8e0b8>, 'weights': array([[ 0.02993938],
       [-0.07200166],
       [-0.02676448],
       [-0.44498421],
       [ 0.70079584],
       [-0.37041175],
       [-0.90085725],
       [ 0.47640766],
       [-0.18951579],
       [-0.49825458],
       [ 0.5753615 ],
       [-0.79484706],
       [ 0.40982099],
       [ 0.43879367],
       [-0.24865189]]), 'pedigree': ['-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1', '-1'], 'minimum': 51.0, 'maximum': 143.0, 'mean': 75.6}


## A simple interpolation approach

Maybe I can just pick the two best agents, average them, and win? This pseudo-genetic strategy doesn't really work either.

In [9]:
results = []

for a in range(25):
    results.append(trial_agent(agents.LinearAgent(weights=None)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('average_simple_test.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7f214720c7b8>, 'weights': array([[0.02795687],
       [0.54463446],
       [0.63246507],
       [0.22635165],
       [0.71787658]]), 'pedigree': ['-1', '-1', '-1', '-1', '-1'], 'minimum': 50.0, 'maximum': 160.0, 'mean': 87.0}


In [10]:
for rounds in range(25):
    top_scores = sorted(results, key=lambda x: x["mean"], reverse=True)
    
    top_two = top_scores[:2]

    new_weights = np.mean(np.array([i["weights"] for i in top_two]), axis=0)
    
    print(f"New weights: {new_weights}")

    new_agents = [i["agent"] for i in results]
    new_agents.append(agents.LinearAgent(weights=new_weights, order=1))

    results = []

    for agent in new_agents:
        results.append(trial_agent(agent))

    print(f"Best mean score: {max([i['mean'] for i in results])}")

New weights: [[0.00164897]
 [0.62903522]
 [0.42023632]
 [0.11298344]
 [0.6704636 ]]
Best mean score: 89.4
New weights: [[0.01480292]
 [0.58683484]
 [0.52635069]
 [0.16966755]
 [0.69417009]]
Best mean score: 89.08
New weights: [[0.00822594]
 [0.60793503]
 [0.47329351]
 [0.14132549]
 [0.68231684]]
Best mean score: 88.39
New weights: [[0.00493745]
 [0.61848512]
 [0.44676492]
 [0.12715447]
 [0.67639022]]
Best mean score: 87.95
New weights: [[0.0065817 ]
 [0.61321008]
 [0.46002921]
 [0.13423998]
 [0.67935353]]
Best mean score: 90.73
New weights: [[0.00575958]
 [0.6158476 ]
 [0.45339706]
 [0.13069722]
 [0.67787188]]
Best mean score: 88.82
New weights: [[0.00534851]
 [0.61716636]
 [0.45008099]
 [0.12892585]
 [0.67713105]]
Best mean score: 89.82
New weights: [[0.00329321]
 [0.62376017]
 [0.43350062]
 [0.12006895]
 [0.67342691]]
Best mean score: 88.81
New weights: [[0.00432086]
 [0.62046327]
 [0.4417908 ]
 [0.1244974 ]
 [0.67527898]]
Best mean score: 90.95
New weights: [[0.00462916]
 [0.6194741

In [11]:
winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('average_simple.gif')}'>")

{'agent': <agents.LinearAgent object at 0x7f2139c8ef98>, 'weights': array([[0.00478331],
       [0.61897966],
       [0.44552139],
       [0.1264902 ],
       [0.67611241]]), 'pedigree': ['-1', '-1', '-1', '-1', '-1'], 'minimum': 54.0, 'maximum': 140.0, 'mean': 89.55}
