# Modifying Rewards

I'm a bit disappointed that the genetic algorithm tends to sway too far from the center. If I had a light penalty, will it avoid this.

Reading [the wiki](https://github.com/openai/gym/wiki/CartPole-v0) helps a lot in figuring this out! That page has the parameters for cart-pole v0, which is just a shorter version of the cart-pole v1 we use.

In [1]:
# Display GIFs in Jupyter
from IPython.display import HTML

# OpenAI gym
import gym

# Import local script
import agents

# numpy
import numpy as np

# To speed up the algorithm
from multiprocessing import Pool
n_jobs = 4 # Set your number of cores here

Here I add a penalty of `abs(observation[0])`. Observation 0 is the distance from the center, within `[-2.6, 2.6]`.

In [2]:
def trial_agent(agent, trials=100, limit=1000):
    env = gym.make(agent.game)

    scores = []
    for i in range(trials):
        observation = env.reset()
        score = 0
        for t in range(limit):
            action = agent.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            # Add a light penalty for distance
            score += reward - abs(observation[0]/10)
        scores.append(score)
        
    data_dict = {
        "agent" : agent, 
        "weights" : agent.w, 
        "pedigree" : agent.pedigree, 
        "minimum" : min(scores), 
        "maximum" : max(scores), 
        "mean" : sum(scores)/len(scores)
    }
    
    env.close()
    
    return data_dict

In [3]:
def genetic_algorithm(results, old=5, new=95, n_parents=2, generations=25, 
                      mutation_rate=0.01, mutation_amount=0.5, order=1, max_score=499.0, 
                      game="CartPole-v1"):
    for round in range(generations):
        # Sort agents by score (fitness)
        top_scores = sorted(results, key=lambda x: x["mean"], reverse=True)

        # The survival of the fittest. Wikipedia calls this "elitism".
        # The top agents of a generation are carried over to the next
        survivors = top_scores[:old]

        # To start breeding new agents, I'll mix weights (genes)
        weight_shape = top_scores[0]["weights"].shape
        gene_pool = [list(i["weights"].flatten()) for i in top_scores]
        pedigree_list = [i["pedigree"] for i in top_scores]
        genome_size = top_scores[0]["weights"].size

        # Scores can be negative, so here I make them all positive
        # They also need to sum to 1 for random sampling
        min_score = min([i["mean"] for i in top_scores])
        sum_score = sum([i["mean"]+min_score for i in top_scores])
        probs = [(i["mean"]+min_score)/sum_score for i in top_scores]

        # For each new agent, randomly select parents
        # Higher-fitness agents are likelier to sire new agents
        children = []
        for birth in range(new):
            parents = np.random.choice(np.arange(len(gene_pool)), 
                             size=n_parents, 
                             replace=False, 
                             p=probs)

            # The offspring get a mix of each parent's weights
            # The weights (genes) are simply copied over
            mix = np.random.randint(0, high=n_parents, size=genome_size)

            weights = []
            pedigree = []
            for i in range(genome_size):
                weights.append(gene_pool[parents[mix[i]]][i])
                pedigree.append(pedigree_list[parents[mix[i]]][i])
                # A mutation happens rarely and adds a bit of noise to a gene
                if np.random.random(1) < mutation_rate:
                    weights[i] += float(np.random.normal(0, mutation_amount, 1))
                    pedigree[i] += "M"

            children.append({"weights" : weights, "pedigree" : pedigree})

        # Elitism: the top agents survive to fight another day
        new_agents = [i["agent"] for i in survivors]

        # The offspring are added it
        # With the pedigree variable their ancestors are tracked
        for child in children:
            new_agents.append(
                agents.LinearAgent(
                    np.array(child["weights"]).reshape(weight_shape), 
                    pedigree=child["pedigree"],
                    order=order,
                    game=game))

        # Trial the agents using multiple CPU cores
        p = Pool(n_jobs)
        results = p.map(trial_agent, new_agents)
        p.close()
        
        results = sorted(results, key=lambda x: x["mean"], reverse=True)

        print(f"[{round+1:3}] Population average: {sum([i['mean'] for i in results])/len(results):5.1f}")
        print(f"[{round+1:3}] Best mean score:    {results[0]['mean']:5.1f}, Pedigree: {'-'.join(results[0]['pedigree'])}")
        print()
        
        # End early if maximum is reached
        if results[0]['mean'] >= max_score:
            print(f"[{round+1:3}] Best score reached, ending early")
            break
    return results

## Extreme third-order agent

I'm going to try a third-order agent. And this time I'll take a peek at results every few generation.

In [4]:
results = []

for a in range(25):
    results.append(trial_agent(agents.LinearAgent(None, order=3, id=a)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('mod_cart_0.gif', episodes=1)}'>")

{'agent': <agents.LinearAgent object at 0x7f4c212d5a58>, 'weights': array([[-0.03436326],
       [ 0.62651788],
       [-0.8671359 ],
       [-0.77003561],
       [ 0.88472795],
       [ 0.9753544 ],
       [-0.13446598],
       [-0.70975749],
       [ 0.92727059],
       [-0.16145533],
       [ 0.58148144],
       [ 0.67178113],
       [ 0.51396379],
       [ 0.91591603],
       [-0.21173226],
       [-0.7605436 ],
       [-0.17849587],
       [ 0.49374757],
       [-0.96997173],
       [ 0.8402381 ],
       [ 0.55645057],
       [ 0.16820958],
       [-0.56130607],
       [-0.04514864],
       [-0.91161306],
       [ 0.63469725],
       [ 0.3426981 ],
       [ 0.96146971],
       [ 0.68770587],
       [-0.76626071],
       [ 0.05597288],
       [-0.80606728],
       [ 0.64280773],
       [-0.85669744],
       [ 0.00206797]]), 'pedigree': ['6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6

In [5]:
results = genetic_algorithm(results, generations=5, order=3)

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('mod_cart_5.gif', episodes=1)}'>")

[  1] Population average:  20.3
[  1] Best mean score:     97.9, Pedigree: 6-6-20-20-6-6-20-20-20-6-6-20-6-6-20-20-6-20-20-6-6-6-20-6-6-20-20-6-20-6-20-20-20-6-6

[  2] Population average:  42.3
[  2] Best mean score:    178.5, Pedigree: 6-10-10-10-6-6-20-20-20-4-10-10-6-4-4-10-6-20-20-6-10-6-20-4-10-20-20-6-10-10-20-10-20-6-6

[  3] Population average:  72.1
[  3] Best mean score:    223.2, Pedigree: 6-16-10-10-6-16-16-20-20-4-6-16-6-4-6-10-16-6-6-6-16-6-6-4-16-16-20-6-10-10-6-10-16-6-6

[  4] Population average:  98.8
[  4] Best mean score:    415.4, Pedigree: 6-17-15M-15-6-6-6-5-16-15-4-5-6-2-6-5-6-8-6-5-15-11-6-6-6-17-6-6-7-6-17-6-15-6-8

[  5] Population average: 139.3
[  5] Best mean score:    448.3, Pedigree: 6-16-15M-15-6-6-16-5-16-4-4-3-6-4-6-10-16-6-6-6-15-11-20-4-5-17-6-6-7-6-20-10-20-6-8

{'agent': <agents.LinearAgent object at 0x7f4c1edc2748>, 'weights': array([[-0.03436326],
       [-0.28789561],
       [ 0.3425465 ],
       [ 0.51404998],
       [ 0.88472795],
       [ 0

In [6]:
results = genetic_algorithm(results, generations=5, order=3)

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('mod_cart_10.gif', episodes=1)}'>")

[  1] Population average: 180.6
[  1] Best mean score:    464.2, Pedigree: 6-10-15M-10-6-6-20-5-20-4-6-3-6-4M-6-10-16-6-6-6-7-6-20-6-6-17-6-6M-10-6-6-10-16-6-8

[  2] Population average: 225.2
[  2] Best mean score:    495.0, Pedigree: 6-10-15M-10-6-6-6-22-20-4-6-3-6-2-6-10-6-8-17-6-10-11-20-6-6-17-6-6M-7-6-5-10-22-13-8

[  3] Population average: 248.0
[  3] Best mean score:    494.8, Pedigree: 6-10-15M-10-6-6-6-22-20-4-6-3-6-2-6-10-6-8-17-6-10-11-20-6-6-17-6-6M-7-6-5-10-22-13-8

[  4] Population average: 297.9
[  4] Best mean score:    494.9, Pedigree: 6-10-15M-10-6-6-6-22-20-4-6-3-6-2-6-10-6-8-17-6-10-11-20-6-6-17-6-6M-7-6-5-10-22-13-8

[  5] Population average: 306.9
[  5] Best mean score:    494.8, Pedigree: 6-10-15M-10-6-6-20-5-20-4-6-3-6-13-6-17-16-6-6-6-7-11-2-6-6-17-6-6M-10-6-6-10-16-6-8

{'agent': <agents.LinearAgent object at 0x7f4c12b7ef98>, 'weights': array([[-0.03436326],
       [ 0.33235873],
       [ 0.3425465 ],
       [ 0.86844564],
       [ 0.88472795],
       [ 0.975

In [7]:
results = genetic_algorithm(results, generations=5, order=3)

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('mod_cart_15.gif', episodes=1)}'>")

[  1] Population average: 321.5
[  1] Best mean score:    495.1, Pedigree: 6-10-15M-10-6-6-6-22-20-4-6-3-6-2-6-10-6-8-17-6-10-11-20-6-6-17-6-6M-7-6-5-10-22-13-8

[  2] Population average: 353.5
[  2] Best mean score:    495.5, Pedigree: 6-10-15M-10-22M-6-6-5-7-15-6-3-6-2-6-10-16-8-5-6-10-11M-20-5-16-17-6-6M-7-6-20-10-22-6-8

[  3] Population average: 383.3
[  3] Best mean score:    495.6, Pedigree: 6-10-15M-10-22M-6-6-5-7-15-6-3-6-2-6-10-16-8-5-6-10-11M-20-5-16-17-6-6M-7-6-20-10-22-6-8

[  4] Population average: 393.5
[  4] Best mean score:    495.5, Pedigree: 6-10-15M-10-22M-6-6-5-7-15-6-3-6-2-6-10-16-8-5-6-10-11M-20-5-16-17-6-6M-7-6-20-10-22-6-8

[  5] Population average: 382.1
[  5] Best mean score:    495.6, Pedigree: 6-10-15MM-10-6-6-6-5-7-4-6-3-6-2-6-10-6-8-5-5M-6-11-6M-5-16-17-22-6M-7-6-20-10-22-6-8

{'agent': <agents.LinearAgent object at 0x7f4c12b11240>, 'weights': array([[-0.03436326],
       [ 0.33235873],
       [ 0.47124037],
       [ 0.86844564],
       [ 0.88472795],
   

In [8]:
results = genetic_algorithm(results, generations=5, order=3)

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('mod_cart_20.gif', episodes=1)}'>")

[  1] Population average: 379.1
[  1] Best mean score:    496.0, Pedigree: 6-10-15MM-10-6-6-20-6-7-4-6-3-6-2-6-17-5-8-17-2-15-11-20-6-6MM-6-6-6M-10-6-3-10-22-6-8

[  2] Population average: 392.4
[  2] Best mean score:    496.8, Pedigree: 6-10-15MM-10-22M-6-6-5-7-4-6-3M-6-13-6-17-16-6-5-6-7-11M-20-6M-6M-17-6-6M-7-6-20-10-16-6-8

[  3] Population average: 396.7
[  3] Best mean score:    497.0, Pedigree: 6-10-15MM-10-22M-6-6-5-7-4-6-3M-6-13-6-17-16-6-5-6-7-11M-20-6M-6M-17-6-6M-7-6-20-10-16-6-8

[  4] Population average: 436.3
[  4] Best mean score:    496.1, Pedigree: 6-10-15MM-10-22M-6-6-5-7-4-6-3M-16M-13-6-17-6-20-5-6-6-11M-2-6M-6M-17-6-6M-7-6-20M-10-16-6-8

[  5] Population average: 449.9
[  5] Best mean score:    496.1, Pedigree: 6-10-15MM-10-6-6M-20-5M-7-4-6-3-4-6-6-10-6-8-5-5M-7-11-20-6M-16-17-22-6MM-7-6M-20-10-22-6-8

{'agent': <agents.LinearAgent object at 0x7f4c12af7320>, 'weights': array([[-0.03436326],
       [ 0.33235873],
       [ 0.61481722],
       [ 0.86844564],
       [ 0

In [9]:
results = genetic_algorithm(results, generations=5, order=3)

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('mod_cart_25.gif', episodes=1)}'>")

[  1] Population average: 437.5
[  1] Best mean score:    496.2, Pedigree: 6-10-15MM-10-22M-6-6-5-7-4-6-3M-16M-13-6-17-6-20-5-6-6-11M-2-6M-6M-17-6-6M-7-6-20M-10-16-6-8

[  2] Population average: 445.5
[  2] Best mean score:    496.1, Pedigree: 6-10-15MM-10-22M-6-6-5-7-4-6-3M-16M-13-6-17-6-20-5-6-6-11M-2-6M-6M-17-6-6M-7-6-20M-10-16-6-8

[  3] Population average: 451.6
[  3] Best mean score:    496.1, Pedigree: 6-10-15MMM-10-6-6-16-5-20-4-4-3-6-13-6-10-6-6-5-5-10-11-6-6-6-20-6-6-2-6-20-8-16-6-8

[  4] Population average: 445.0
[  4] Best mean score:    496.4, Pedigree: 6-10-15MM-10-22M-6-6-5-7-4-6-3M-16M-13-6-17-6-6-5-6-7-11-6-6M-6M-17-6-6-7-6-20M-10-16-6-8

[  5] Population average: 441.8
[  5] Best mean score:    496.3, Pedigree: 6-10-15MM-10-22M-6-6-5-7-4-6-3M-16M-13-6-17-6-6-5-6-7-11-6-6M-6M-17-6-6-7-6-20M-10-16-6-8

{'agent': <agents.LinearAgent object at 0x7f4c1eda9cc0>, 'weights': array([[-0.03436326],
       [ 0.33235873],
       [ 0.47124037],
       [ 0.86844564],
       [ 0.51