# The Pendulum: Do a Cool Backflip

This game requires you to balance a pendulum upside-down. This seems to be a difficult task because the agent is tempted to just twirl around forever. Read the game rules [here](https://github.com/openai/gym/wiki/Pendulum-v0).

In [1]:
# Display GIFs in Jupyter
from IPython.display import HTML

# OpenAI gym
import gym

# Import local script
import agents

# numpy
import numpy as np

# To speed up the algorithm
from multiprocessing import Pool
n_jobs = 4 # Set your number of cores here

In [2]:
def trial_agent(agent, trials=50, limit=250):
    env = gym.make(agent.game)

    scores = []
    for i in range(trials):
        observation = env.reset()
        score = 0
        for t in range(limit):
            action = agent.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            # Oddly the reward is given as a numpy array here
            score += float(reward) 
        scores.append(score)
        
    data_dict = {
        "agent" : agent, 
        "weights" : agent.w, 
        "pedigree" : agent.pedigree, 
        "minimum" : min(scores), 
        "maximum" : max(scores), 
        "mean" : sum(scores)/len(scores)
    }
    
    env.close()
    
    return data_dict

In [3]:
def genetic_algorithm(results, old=5, new=95, n_parents=2, generations=25, 
                      mutation_rate=0.01, mutation_amount=0.5, order=1, max_score=499.0, 
                      game="CartPole-v1"):
    for round in range(generations):
        # Sort agents by score (fitness)
        top_scores = sorted(results, key=lambda x: x["mean"], reverse=True)

        # The survival of the fittest. Wikipedia calls this "elitism".
        # The top agents of a generation are carried over to the next
        survivors = top_scores[:old]

        # To start breeding new agents, I'll mix weights (genes)
        weight_shape = top_scores[0]["weights"].shape
        gene_pool = [list(i["weights"].flatten()) for i in top_scores]
        pedigree_list = [i["pedigree"] for i in top_scores]
        genome_size = top_scores[0]["weights"].size

        # Scores can be negative, so here I make them all positive
        # They also need to sum to 1 for random sampling
        min_score = min([i["mean"] for i in top_scores])
        sum_score = sum([i["mean"]+min_score for i in top_scores])
        probs = [(i["mean"]+min_score)/sum_score for i in top_scores]

        # For each new agent, randomly select parents
        # Higher-fitness agents are likelier to sire new agents
        children = []
        for birth in range(new):
            parents = np.random.choice(np.arange(len(gene_pool)), 
                             size=n_parents, 
                             replace=False, 
                             p=probs)

            # The offspring get a mix of each parent's weights
            # The weights (genes) are simply copied over
            mix = np.random.randint(0, high=n_parents, size=genome_size)

            weights = []
            pedigree = []
            for i in range(genome_size):
                weights.append(gene_pool[parents[mix[i]]][i])
                pedigree.append(pedigree_list[parents[mix[i]]][i])
                # A mutation happens rarely and adds a bit of noise to a gene
                if np.random.random(1) < mutation_rate:
                    weights[i] += float(np.random.normal(0, mutation_amount, 1))
                    pedigree[i] += "M"

            children.append({"weights" : weights, "pedigree" : pedigree})

        # Elitism: the top agents survive to fight another day
        new_agents = [i["agent"] for i in survivors]

        # The offspring are added it
        # With the pedigree variable their ancestors are tracked
        for child in children:
            new_agents.append(
                agents.LinearAgent(
                    np.array(child["weights"]).reshape(weight_shape), 
                    pedigree=child["pedigree"],
                    order=order,
                    game=game))

        # Trial the agents using multiple CPU cores
        p = Pool(n_jobs)
        results = p.map(trial_agent, new_agents)
        p.close()
        
        results = sorted(results, key=lambda x: x["mean"], reverse=True)

        print(f"[{round+1:3}] Population average: {sum([i['mean'] for i in results])/len(results):5.1f}")
        print(f"[{round+1:3}] Best mean score:    {results[0]['mean']:5.1f}, Pedigree: {'-'.join(results[0]['pedigree'])}")
        print()
        
        # End early if maximum is reached
        if results[0]['mean'] >= max_score:
            print(f"[{round+1:3}] Best score reached, ending early")
            break
    return results

## Simple agent

This ends up working, even with a first-order agent. You just need to be patient.

I have a hard time with continuous outputs so I set this up as a softmax regression over -2, -1, 0, +1, +2. The agent probably can't precisely balance the pendulum because of this.

In [4]:
results = []

for a in range(100):
    results.append(trial_agent(agents.LinearAgent(None, id=a, order=1, game="Pendulum-v0")))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('pendulum_test.gif', limit=250)}'>")

{'agent': <agents.LinearAgent object at 0x7f5948db4630>, 'weights': array([[-0.49523128, -0.69699123, -0.94127978,  0.18397558, -0.13567396],
       [ 0.41566394,  0.71414234,  0.95001799, -0.54315465,  0.74036428],
       [ 0.05895708,  0.34342843, -0.95065304, -0.53485288, -0.18889497],
       [ 0.68329703,  0.23353464, -0.80322433,  0.64596063, -0.35379445]]), 'pedigree': ['24', '24', '24', '24', '24', '24', '24', '24', '24', '24', '24', '24', '24', '24', '24', '24', '24', '24', '24', '24'], 'minimum': -1724.7191218268897, 'maximum': -520.8231227530816, 'mean': -925.454547189137}


In [5]:
results = genetic_algorithm(results, generations=50, order=1, game="Pendulum-v0")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('pendulum_50.gif', limit=250)}'>")

[  1] Population average: -1457.0
[  1] Best mean score:    -795.8, Pedigree: 24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24

[  2] Population average: -1462.5
[  2] Best mean score:    -894.9, Pedigree: 24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24

[  3] Population average: -1455.1
[  3] Best mean score:    -863.1, Pedigree: 24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24

[  4] Population average: -1430.6
[  4] Best mean score:    -931.7, Pedigree: 24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24

[  5] Population average: -1443.4
[  5] Best mean score:    -902.9, Pedigree: 24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24

[  6] Population average: -1466.8
[  6] Best mean score:    -837.9, Pedigree: 24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24

[  7] Population average: -1474.4
[  7] Best mean score:    -826.3, Pedigree: 24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24-24

[  8] Population average: -

In [6]:
results = genetic_algorithm(results, generations=50, order=1, game="Pendulum-v0")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('pendulum_100.gif', limit=250)}'>")

[  1] Population average: -1454.1
[  1] Best mean score:    -339.4, Pedigree: 24-24-62M-56-21M-37-84-2-57M-7-10-75-32M-7-75-10-87-9-50MM-11

[  2] Population average: -1433.3
[  2] Best mean score:    -369.1, Pedigree: 24-24-62M-56-21M-37-84-2-57M-7-10-75-32M-7-75-10-87-9-50MM-11

[  3] Population average: -1437.3
[  3] Best mean score:    -321.3, Pedigree: 24-24-62M-56-21M-37-84-2-57M-7-10-75-32M-7-75-10-87-9-50MM-11

[  4] Population average: -1440.3
[  4] Best mean score:    -350.2, Pedigree: 24-24-62M-56-21M-37-84-2-57M-7-10-75-32M-7-75-10-87-9-50MM-11

[  5] Population average: -1405.9
[  5] Best mean score:    -297.8, Pedigree: 24-24-78-24-21M-37-22MM-29-57M-7-10-32-32M-7-75-10-87-31-50MM-11

[  6] Population average: -1433.2
[  6] Best mean score:    -304.9, Pedigree: 24-24-78-24-21M-37-22MM-29-57M-7-10-32-32M-7-75-10-87-31-50MM-11

[  7] Population average: -1409.7
[  7] Best mean score:    -355.7, Pedigree: 24-24-78-24-21M-37-22MM-29-57M-7-10-32-32M-7-75-10-87-31-50MM-11

[  8

In [7]:
results = genetic_algorithm(results, generations=50, order=1, game="Pendulum-v0")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('pendulum_150.gif', limit=250)}'>")

[  1] Population average: -1343.0
[  1] Best mean score:    -248.3, Pedigree: 24-24-62MM-24-21M-37-22MM-29MM-97-7-10M-75-32M-28M-75M-10-87-9-58M-11

[  2] Population average: -1372.2
[  2] Best mean score:    -254.1, Pedigree: 24-24-62M-24-21M-37-84-29-40-7-38-75-53-28M-75-10-34-9-58-11

[  3] Population average: -1348.3
[  3] Best mean score:    -257.7, Pedigree: 24-24-62M-24M-21M-37-84-29-40-7-38-75-53-28M-75-10-34-9-50MM-11

[  4] Population average: -1318.6
[  4] Best mean score:    -281.8, Pedigree: 24-24-62MM-24-21M-37-22MM-29MM-97-7-10M-75-32M-28M-75M-10-87-9-58M-11

[  5] Population average: -1342.3
[  5] Best mean score:    -230.7, Pedigree: 24-24-62M-24-21M-37-84-29-40-7-38-75-53-28M-75-10-34-9-58-11

[  6] Population average: -1383.5
[  6] Best mean score:    -243.7, Pedigree: 24-24-62M-24-21M-37-84-29-40-7-38-32-53-28M-75M-10-34-9-58M-11

[  7] Population average: -1378.0
[  7] Best mean score:    -253.1, Pedigree: 24-24-62M-24-21M-37-84-29-40-7-38-32-53-28M-75M-10-34-9-58M

In [8]:
results = genetic_algorithm(results, generations=50, order=1, game="Pendulum-v0")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('pendulum_200.gif', limit=250)}'>")

[  1] Population average: -1459.8
[  1] Best mean score:    -254.4, Pedigree: 24MM-24MM-62M-24MM-21M-37-84-29-40-7-38-75M-32MM-28MM-75-10-87-9-58M-11

[  2] Population average: -1423.4
[  2] Best mean score:    -254.0, Pedigree: 24-24MM-62MM-24MM-21M-37-50MMMM-29M-40-7-38-75-53-28M-75-10-34M-9-58M-11

[  3] Population average: -1389.9
[  3] Best mean score:    -234.4, Pedigree: 24MM-24MM-62M-24MM-21M-37-84-29-97M-7-38-75M-32MM-28MM-75M-10-87-9-58M-11

[  4] Population average: -1375.9
[  4] Best mean score:    -245.0, Pedigree: 24-24MM-62MM-24MM-21M-37-50MMMM-29M-40-7-38-75-53-28M-75-10-34M-9-58M-11

[  5] Population average: -1429.6
[  5] Best mean score:    -257.3, Pedigree: 24-24MM-62M-24M-21M-37-84-29M-40-7-10M-75MM-53-28MM-75M-10-34-9-58M-11

[  6] Population average: -1420.5
[  6] Best mean score:    -229.9, Pedigree: 24M-24M-62M-24M-21M-37-50MMMMM-29-40-7-38-75-32MM-28M-75MM-10-87-9M-58M-11

[  7] Population average: -1403.2
[  7] Best mean score:    -245.3, Pedigree: 24M-24M-62

In [9]:
results = genetic_algorithm(results, generations=50, order=1, game="Pendulum-v0")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='{winner['agent'].render('pendulum_250.gif', limit=250)}'>")

[  1] Population average: -1483.6
[  1] Best mean score:    -246.1, Pedigree: 24M-24MMM-62M-24M-21M-37-50MMMMM-29M-40M-7-38-75-32MM-28M-75MM-10-87M-9-58M-11

[  2] Population average: -1479.0
[  2] Best mean score:    -207.9, Pedigree: 24-24MMM-62M-24MM-21M-37-50MMMMM-29M-40M-7-38-75M-32MM-28MM-75M-10-87M-9-58M-11

[  3] Population average: -1465.1
[  3] Best mean score:    -221.6, Pedigree: 24M-24MM-62MM-24MM-21M-37-50MMMM-29M-40M-7-38-75M-32MM-28M-75M-10-87M-9-58M-11

[  4] Population average: -1503.7
[  4] Best mean score:    -232.8, Pedigree: 24-24MMM-62MM-24MM-21M-37-50MMMMMMM-29-40M-7-38-75M-32MM-28M-75MM-10-87-9-58MM-11

[  5] Population average: -1474.8
[  5] Best mean score:    -219.5, Pedigree: 24M-24MM-62M-24MMM-21M-37-50MMMMM-29M-40M-7-38-75M-53-28M-75M-10-87M-9-58M-11

[  6] Population average: -1466.7
[  6] Best mean score:    -215.6, Pedigree: 24M-24MM-62M-24MMM-21M-37-50MMMMMM-29M-40M-7-38-75M-53-28M-75MM-10-34-9M-58MM-11

[  7] Population average: -1476.5
[  7] Best me