# The Lunar Lander: How Moon Craters Are Made

A cool little game you may have played before. You control the craft's angle and can apply the retrorocket. See the rules [here](https://github.com/openai/gym/wiki/Leaderboard#lunarlander-v2).

In [1]:
# Display GIFs in Jupyter
from IPython.display import HTML

# OpenAI gym
import gym

# Import local script
import agents

# numpy
import numpy as np

# To speed up the algorithm
from multiprocessing import Pool
n_jobs = 4 # Set your number of cores here

In [2]:
def trial_agent(agent, trials=50, limit=500):
    env = gym.make(agent.game)

    scores = []
    for i in range(trials):
        observation = env.reset()
        rewards = []
        for t in range(limit):
            action = agent.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            rewards.append(reward)
        scores.append(sum(rewards))
        
    data_dict = {
        "agent" : agent, 
        "weights" : agent.w, 
        "pedigree" : agent.pedigree, 
        "minimum" : min(scores), 
        "maximum" : max(scores), 
        "mean" : sum(scores)/len(scores)
    }
    
    env.close()
    
    return data_dict

In [3]:
def genetic_algorithm(results, old=5, new=95, n_parents=2, generations=25, 
                      mutation_rate=0.01, mutation_amount=0.5, order=1, max_score=499.0, 
                      game="CartPole-v1"):
    for round in range(generations):
        # Sort agents by score (fitness)
        top_scores = sorted(results, key=lambda x: x["mean"], reverse=True)

        # The survival of the fittest. Wikipedia calls this "elitism".
        # The top agents of a generation are carried over to the next
        survivors = top_scores[:old]

        # To start breeding new agents, I'll mix weights (genes)
        weight_shape = top_scores[0]["weights"].shape
        gene_pool = [list(i["weights"].flatten()) for i in top_scores]
        pedigree_list = [i["pedigree"] for i in top_scores]
        genome_size = top_scores[0]["weights"].size

        # Scores can be negative, so here I make them all positive
        # They also need to sum to 1 for random sampling
        min_score = min([i["mean"] for i in top_scores])
        sum_score = sum([i["mean"]+min_score for i in top_scores])
        probs = [(i["mean"]+min_score)/sum_score for i in top_scores]

        # For each new agent, randomly select parents
        # Higher-fitness agents are likelier to sire new agents
        children = []
        for birth in range(new):
            parents = np.random.choice(np.arange(len(gene_pool)), 
                             size=n_parents, 
                             replace=False, 
                             p=probs)

            # The offspring get a mix of each parent's weights
            # The weights (genes) are simply copied over
            mix = np.random.randint(0, high=n_parents, size=genome_size)

            weights = []
            pedigree = []
            for i in range(genome_size):
                weights.append(gene_pool[parents[mix[i]]][i])
                pedigree.append(pedigree_list[parents[mix[i]]][i])
                # A mutation happens rarely and adds a bit of noise to a gene
                if np.random.random(1) < mutation_rate:
                    weights[i] += float(np.random.normal(0, mutation_amount, 1))
                    pedigree[i] += "M"

            children.append({"weights" : weights, "pedigree" : pedigree})

        # Elitism: the top agents survive to fight another day
        new_agents = [i["agent"] for i in survivors]

        # The offspring are added it
        # With the pedigree variable their ancestors are tracked
        for child in children:
            new_agents.append(
                agents.LinearAgent(
                    np.array(child["weights"]).reshape(weight_shape), 
                    pedigree=child["pedigree"],
                    order=order,
                    game=game))

        # Trial the agents using multiple CPU cores
        p = Pool(n_jobs)
        results = p.map(trial_agent, new_agents)
        p.close()
        
        results = sorted(results, key=lambda x: x["mean"], reverse=True)

        print(f"[{round+1:3}] Population average: {sum([i['mean'] for i in results])/len(results):5.1f}")
        print(f"[{round+1:3}] Best mean score:    {results[0]['mean']:5.1f}, Pedigree: {'-'.join(results[0]['pedigree'])}")
        print()
        
        # End early if maximum is reached
        if results[0]['mean'] >= max_score:
            print(f"[{round+1:3}] Best score reached, ending early")
            break
    return results

## Simple agent

I got this to work with a simple first-oder softmax regression agent. Since there are more parameters to work with, it seems important to use a larger population. This must expand the algorithm's search space.

In [4]:
results = []

for a in range(200):
    results.append(trial_agent(agents.LinearAgent(None, id=a, order=1, game="LunarLander-v2")))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_0.gif', limit=500)}'>")

In [5]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=1, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_20.gif', limit=500)}'>")

[  1] Population average: -352.0
[  1] Best mean score:      2.8, Pedigree: 185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185-185

[  2] Population average: -372.0
[  2] Best mean score:     12.3, Pedigree: 183-177-183-183-13-13-177-183-187M-177-183-187-13-13-177-13-177-183-183-177-177-183-177-13-13-177-177-187-177-13-177-187-177-13-13-13

[  3] Population average: -361.8
[  3] Best mean score:     15.8, Pedigree: 183-177-183-183-13-13-177-183-187M-177-183-187-13-13-177-13-177-183-183-177-177-183-177-13-13-177-177-187-177-13-177-187-177-13-13-13

[  4] Population average: -342.8
[  4] Best mean score:     20.5, Pedigree: 183-177-183-183-13-13-177-183-187M-177-183-187-13-13-177-13-177-183-183-177-177-183-177-13-13-177-177-187-177-13-177-187-177-13-13-13

[  5] Population average: -360.2
[  5] Best mean score:     42.4, Pedigree: 70-71-20-29-58-17-17-114-86-185-20M-17-61-61-56-185-25-81M-70-29-70-90

In [6]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=1, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_40.gif', limit=500)}'>")

[  1] Population average: -359.2
[  1] Best mean score:     81.9, Pedigree: 156M-52-43M-92-178-119-46M-75-97-20-183-49M-25-130MM-64-128-185-82-185-40-8-3-139-185-173-82-105-58-119-105-17-174-71M-139-185-128

[  2] Population average: -368.6
[  2] Best mean score:     85.4, Pedigree: 156M-52-43M-92-178-119-46M-75-97-20-183-49M-25-130MM-64-128-185-82-185-40-8-3-139-185-173-82-105-58-119-105-17-174-71M-139-185-128

[  3] Population average: -353.5
[  3] Best mean score:     96.8, Pedigree: 156M-52-43M-92-178-119-46M-75-97-20-183-49M-25-130MM-64-128-185-82-185-40-8-3-139-185-173-82-105-58-119-105-17-174-71M-139-185-128

[  4] Population average: -355.2
[  4] Best mean score:     92.2, Pedigree: 156M-52-43M-92-178-119-46M-75-97-20-183-49M-25-130MM-64-128-185-82-185-40-8-3-139-185-173-82-105-58-119-105-17-174-71M-139-185-128

[  5] Population average: -371.7
[  5] Best mean score:     87.7, Pedigree: 156M-52-43M-92-178-119-46M-75-97-20-183-49M-25-130MM-64-128-185-82-185-40-8-3-139-185-173-82

In [7]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=1, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_60.gif', limit=500)}'>")

[  1] Population average: -403.4
[  1] Best mean score:     98.1, Pedigree: 67M-119-64-92-48M-72-86M-69-131-185-43-5-170M-81-185-128M-25M-186-185-40-48-160-185-81-49-70M-81-67-126-5-198-49-117M-185-81-185

[  2] Population average: -433.6
[  2] Best mean score:    105.2, Pedigree: 67M-119-64-92-48M-72-86M-69-131-185-43-5-170M-81-185-128M-25M-186-185-40-48-160-185-81-49-70M-81-67-126-5-198-49-117M-185-81-185

[  3] Population average: -451.6
[  3] Best mean score:    102.9, Pedigree: 67M-119-64-92-48M-72-86M-69-131-185-43-5-170M-81-185-128M-25M-186-185-40-48-160-185-81-49-70M-81-67-126-5-198-49-117M-185-81-185

[  4] Population average: -446.1
[  4] Best mean score:    117.0, Pedigree: 5-119-43-197-118M-119-180-183-187M-133-105-86-117-166-177-33-56-185-185-113M-58-160-179-81-49-70M-8-67-43-119-41-136M-185-185-117-185

[  5] Population average: -454.0
[  5] Best mean score:    119.2, Pedigree: 5-119-43-197-118M-119-180-183-187M-133-105-86-117-166-177-33-56-185-185-113M-58-160-179-81-49-7

In [8]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=1, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_80.gif', limit=500)}'>")

[  1] Population average: -444.1
[  1] Best mean score:    115.6, Pedigree: 5-119-43-197-118M-119-180-183-187M-133-105-86-117-166-177-33-56-185-185-113M-58-160-179-81-49-70M-8-67-43-119-41-136M-185-185-117-185

[  2] Population average: -444.6
[  2] Best mean score:    123.5, Pedigree: 68M-71-75-92-185-98MM-86M-119-49-185-183-5-25-185-56M-81-56-183-185-40-48-185-179-75-49-82M-121M-58-119-119-132-49-97-185-13-185

[  3] Population average: -417.8
[  3] Best mean score:    124.2, Pedigree: 5-119-43-197-118M-119-180-183-187M-133-105-86-117-166-177-33-56-185-185-113M-58-160-179-81-49-70M-8-67-43-119-41-136M-185-185-117-185

[  4] Population average: -442.0
[  4] Best mean score:    129.5, Pedigree: 5-119-43-197-118M-119-180-183-187M-133-105-86-117-166-177-33-56-185-185-113M-58-160-179-81-49-70M-8-67-43-119-41-136M-185-185-117-185

[  5] Population average: -469.9
[  5] Best mean score:    123.5, Pedigree: 5-119-43-197-118M-119-180-183-187M-133-105-86-117-166-177-33-56-185-185-113M-58-160-1

In [9]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=1, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_100.gif', limit=500)}'>")

[  1] Population average: -425.1
[  1] Best mean score:    138.6, Pedigree: 71-71-43-119-13-119-17-75-86-133-43-17-117M-61-151-33-56-81M-185-90-58-160-71-81-173-82-121-67-185-119-41-174-117M-185-81M-179M

[  2] Population average: -435.2
[  2] Best mean score:    129.8, Pedigree: 71-71-43-119-13-119-198-114-131-133-43-5-117-61-132-81M-56-81M-185-29-169-82-185-81-49-70M-81-67M-43-119-41M-189M-97-20-81-185

[  3] Population average: -416.3
[  3] Best mean score:    133.7, Pedigree: 71-154-64-119-13-64-198-114-37M-133-43-187-25-61-132-33-25M-186-185-29-169-82-185-81-49-70M-81-67M-43-5-17-189M-43-185-81-185

[  4] Population average: -438.3
[  4] Best mean score:    135.5, Pedigree: 71-71-43-119-13-119-17-75-86-133-43-17-117M-61-151-33-56-81M-185-90-58-160-71-81-173-82-121-67-185-119-41-174-117M-185-81M-179M

[  5] Population average: -405.7
[  5] Best mean score:    139.5, Pedigree: 37M-71-43-119-185-72-177-69-86-133-43-5-117-166-177M-0M-56-56-185-90M-169-82-71-75-49-70M-181M-67-43-54-41-

The lander should make reasonable landings.