# The Lunar Lander: How Moon Craters Are Made

A cool little game you may have played before. You control the craft's angle and can apply the retrorocket. See the rules [here](https://github.com/openai/gym/wiki/Leaderboard#lunarlander-v2).

In [1]:
# Display GIFs in Jupyter
from IPython.display import HTML

# OpenAI gym
import gym

# Import local script
import agents

# numpy
import numpy as np

# To speed up the algorithm
from multiprocessing import Pool
n_jobs = 8 # Set your number of cores here

In [2]:
def trial_agent(agent, trials=50, limit=500):
    env = gym.make(agent.game)

    scores = []
    for i in range(trials):
        observation = env.reset()
        rewards = []
        for t in range(limit):
            action = agent.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            rewards.append(reward)
        scores.append(sum(rewards))
        
    data_dict = {
        "agent" : agent, 
        "weights" : agent.w, 
        "pedigree" : agent.pedigree, 
        "minimum" : min(scores), 
        "maximum" : max(scores), 
        "mean" : sum(scores)/len(scores)
    }
    
    env.close()
    
    return data_dict

In [3]:
def genetic_algorithm(results, old=5, new=95, n_parents=2, generations=25, 
                      mutation_rate=0.01, mutation_amount=0.5, order=1, max_score=499.0, 
                      game="CartPole-v1"):
    for round in range(generations):
        # Sort agents by score (fitness)
        top_scores = sorted(results, key=lambda x: x["mean"], reverse=True)

        # The survival of the fittest. Wikipedia calls this "elitism".
        # The top agents of a generation are carried over to the next
        survivors = top_scores[:old]

        # To start breeding new agents, I'll mix weights (genes)
        weight_shape = top_scores[0]["weights"].shape
        gene_pool = [list(i["weights"].flatten()) for i in top_scores]
        pedigree_list = [i["pedigree"] for i in top_scores]
        genome_size = top_scores[0]["weights"].size

        # Scores can be negative, so here I make them all positive
        # They also need to sum to 1 for random sampling
        min_score = min([i["mean"] for i in top_scores])
        sum_score = sum([i["mean"]+min_score for i in top_scores])
        probs = [(i["mean"]+min_score)/sum_score for i in top_scores]

        # For each new agent, randomly select parents
        # Higher-fitness agents are likelier to sire new agents
        children = []
        for birth in range(new):
            parents = np.random.choice(np.arange(len(gene_pool)), 
                             size=n_parents, 
                             replace=False, 
                             p=probs)

            # The offspring get a mix of each parent's weights
            # The weights (genes) are simply copied over
            mix = np.random.randint(0, high=n_parents, size=genome_size)

            weights = []
            pedigree = []
            for i in range(genome_size):
                weights.append(gene_pool[parents[mix[i]]][i])
                pedigree.append(pedigree_list[parents[mix[i]]][i])
                # A mutation happens rarely and adds a bit of noise to a gene
                if np.random.random(1) < mutation_rate:
                    weights[i] += float(np.random.normal(0, mutation_amount, 1))
                    pedigree[i] += "M"

            children.append({"weights" : weights, "pedigree" : pedigree})

        # Elitism: the top agents survive to fight another day
        new_agents = [i["agent"] for i in survivors]

        # The offspring are added it
        # With the pedigree variable their ancestors are tracked
        for child in children:
            new_agents.append(
                agents.LinearAgent(
                    np.array(child["weights"]).reshape(weight_shape), 
                    pedigree=child["pedigree"],
                    order=order,
                    game=game))

        # Trial the agents using multiple CPU cores
        p = Pool(n_jobs)
        results = p.map(trial_agent, new_agents)
        p.close()
        
        results = sorted(results, key=lambda x: x["mean"], reverse=True)

        print(f"[{round+1:3}] Population average: {sum([i['mean'] for i in results])/len(results):5.1f}")
        print(f"[{round+1:3}] Best mean score:    {results[0]['mean']:5.1f}, Pedigree: {'-'.join(results[0]['pedigree'])}")
        print()
        
        # End early if maximum is reached
        if results[0]['mean'] >= max_score:
            print(f"[{round+1:3}] Best score reached, ending early")
            break
    return results

## Second-order agent

I got this to work with a simple second-oder softmax regression agent. Since there are more parameters to work with, it seems important to use a larger population. This must expand the algorithm's search space.

In [4]:
results = []

new_agents = [agents.LinearAgent(None, id=a, order=2, game="LunarLander-v2") for a in range(200)]

p = Pool(n_jobs)
results = p.map(trial_agent, new_agents)
p.close()

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_0.gif', limit=500)}'>")

In [5]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=2, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_20.gif', limit=500)}'>")

[  1] Population average: -352.9
[  1] Best mean score:      4.2, Pedigree: 137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137-137

[  2] Population average: -319.4
[  2] Best mean score:     25.9, Pedigree: 178-178-46-46-178-46-178-178-178-46-46-178-46-46-46-46-178-178-46-46-178-178-178-46-178-178-178-46-178-46-178-46-46-46-46-46-46

In [6]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=2, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_40.gif', limit=500)}'>")

[  1] Population average: -355.3
[  1] Best mean score:     37.1, Pedigree: 161-79-187-144-146M-120-53-178-187-1-164-137-20-137-46-49-108-137-137-137-137-137-137-187-196-137-187-170-99-62-13-194-43-107-18-137-167-27-137-187-199-59-30-194-81M-137-137-46-187-187-178-71-36-92-77M-78-137-114M-101-9-153-137-137-187-147-144-1-8-84M-46-23M-21M-158M-137-101-159-137-51-8-162-194M-96-194-137-129M-56-17-187M-170-194-153-190M-137-164-151-144-137-80-144MM-120-55-144-76-162-164-144-74-194-151-178-120-137-87-8-181-34MMM-59-187-137-137-31-12-112-178-168-120-16-66-185-137-137-194-137-137-151-32-194-81-46-144M-164-178-180-178-185-71-178-185-137M-148-137-137-1-137-40-148-137-187M-198-88-137-194-144-137-194-153-104M-46-117-137-189M-151-51-57-13-56-137-69-12-137

[  2] Population average: -379.0
[  2] Best mean score:     42.9, Pedigree: 104-40-91-14-14-12-137-180-84M-1-196-104-77-120-198-51-12-14-84-1M-185-137-91-185-144-92-126-1-96-14-12-185-96-1-1M-12-185-96-84-12-196-30-14-117-96-137-14-121-96-51-178-1

In [7]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=2, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_60.gif', limit=500)}'>")

[  1] Population average: -383.9
[  1] Best mean score:     88.0, Pedigree: 150-49-187-196-137-77-137-29-133-84-120-14-137-144-71-144-101-13-51-181-144-137M-77-169M-194-71-133-141M-148M-51MM-10-194M-92-153-1M-196-126M-112-47-39-175-150M-74-159M-81MM-137M-69M-8-187-187M-120-167-133-121-104-161-101-53M-178-46-137-19-46M-120-147-51M-84-92-84-162-52-137-137-137-40-45M-137M-137-137-9-77MM-187-137-46-129M-146-137M-14-96-194-153-153-84-196-151-154-46-91-144M-88-40-71M-76-182-53-99-8M-137M-59-178M-57-87-170-84-71-137-137-182-163-199-91M-8-137-14-77-120-137-141-175-137-92-92-137MM-185-84-120M-156-43-84-20-46-51-14-199-117-12-32-101-163-185-116MM-52-39-137-137-187-153-198MM-104-137-196-153MMM-163-148-82M-127-17-46M-170-137-190MM-187M-165M-104-144-29-121M-137-46M-59M

[  2] Population average: -366.6
[  2] Best mean score:     85.1, Pedigree: 150-49-187-196-137-77-137-29-133-84-120-14-137-144-71-144-101-13-51-181-144-137M-77-169M-194-71-133-141M-148M-51MM-10-194M-92-153-1M-196-126M-112-47-39-175-

In [8]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=2, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_80.gif', limit=500)}'>")

[  1] Population average: -401.0
[  1] Best mean score:    100.8, Pedigree: 150-144-137-110MM-137-77-137-137-133-84M-153-13-153-137-133-51-20-137-137M-121-185-8-51-169MM-84-149-194-46-148-146MM-10-198MM-92-153M-92M-63-126M-153M-8-187M-17M-59-74-199-81M-137M-137-79M-91-29-178-46-81-121-104-194-144-151-185M-137-137M-76MM-83-84M-161-14-81-63M-84-51-181-21M-137M-181-194M-71MM-17-137-99M-154-14-187-112-137-182M-146M-17-187M-181-137-34M-120-8-79-161M-154-137-80M-46-187M-14-43MMM-151M-12M-194-14-8-46M-81-137M-137-104-8-12-181M-137-178-194MM-91-92-31-8-137-178-196-120-137-66-185-193-137-83M-137-84-59-120M-190-185-51M-14-133-137-180-96M-1-84-178M-94M-137-84-116-194-1M-133-196-12-137-51-153MM-137-199-91-185-185M-194-153-14M-46M-84-178M-189M-59-17-104-117MM-137-96MM-137-46M-153

[  2] Population average: -396.2
[  2] Best mean score:    100.3, Pedigree: 150-144-137-110MM-137-77-137-137-133-84M-153-13-153-137-133-51-20-137-137M-121-185-8-51-169MM-84-149-194-46-148-146MM-10-198MM-92-153M-92M-63-126

In [9]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=2, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_100.gif', limit=500)}'>")

[  1] Population average: -414.3
[  1] Best mean score:    109.2, Pedigree: 104M-194-137-110MM-115-153M-137-137-133-137-194-14-137M-104M-71-49-39M-137-137M-137M-195-8-107-169M-194-39MM-29M-94M-20-121-154-185-92-52-1MM-63-126MMM-153M-159M-12-199-59-79-117M-96-12-137M-46-187-161-178-167-180MM-104-46-137-144-91-185MMMM-14MM-144-137-51M-51MM-187-14-133-92-69M-175MM-52-117M-194-137M-101-71-1-137-91-194M-14-187-162-137M-129M-133M-29-26M-181-104-77-194-14-46MM-151-84-137M-39M-130-187M-175-71M-76-115-53-144-177-194-81-14-57-104MM-14-69-71-181-126-19-52M-137M-121-8-146M-178M-181MM-169M-165-115-112-137-137M-194M-8M-137-14MM-32M-14M-42-121MM-20-92M-92-71M-84M-67M-12-144MM-101M-96-185MM-94-12-1M-164MM-137M-59M-153-198-104-137-134-91M-185-163-14-57-17-46-170M-116-59M-59-46-104-9M-29-96MM-164M-46-59MM

[  2] Population average: -373.8
[  2] Best mean score:    109.8, Pedigree: 104M-40M-187M-110MM-115-77-137-137-133-137M-46-88-137-144-71-8-101-137-176M-181-144-8-137M-169MM-51-137-126-94M-99-51MM-154-

In [10]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=2, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_120.gif', limit=500)}'>")

[  1] Population average: -516.5
[  1] Best mean score:    139.4, Pedigree: 104MM-144-137M-146M-137-120-178-29-133M-137M-120M-88-137M-149MM-133-194MM-39MM-14-84-91M-137-8-81-169MM-84-39MM-91MM-94M-148-62MM-13-194M-146-153M-1MM-63-187M-199-5MM-137-12-59-79M-14-81MM-12-187-147-96-65-65M-167-1MM-104-104-137-47M-40M-178M-137-144-26MM-46M-199M-147-137-133-182-69M-162M-52-117MM-137M-81M-45-17M-1-59M-8-9-77MM-187M-112M-137-129M-14-43M-26M-170M-104-115-194-14-46M-161-84MM-91-39-196-137M-161-71M-76-46M-53-144-12-194MM-127-14-120-96-14M-57M-29MM-181-178-137M-52M-137M-31-181-17M-14M-196-120-137-141-112-137-137M-69-43-137-14MM-21M-14M-42-194-14-46-137-180-84-67M-84-81MM-8M-194-185-94-65M-46-178-1-170-137-51M-120MM-88-134-91M-185-185-82M-57MM-126-46-170M-116-59M-59-46-104-13-29-121-137-50MM-153MM

[  2] Population average: -560.5
[  2] Best mean score:    132.9, Pedigree: 71-49MM-137-146M-137-77M-137-137M-133M-137M-196-120-77M-149MM-133-194MM-20-137-137M-181M-137-108-187M-169M-196-71M-133-94-148-12

In [11]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=2, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_140.gif', limit=500)}'>")

[  1] Population average: -519.3
[  1] Best mean score:    141.5, Pedigree: 104M-144-137-146-146M-153MM-137-137-133-84MM-46M-13-137M-149MM-71-194MM-39M-187-137M-181M-137-108-40M-169MMMM-51-71M-29M-46-20M-46M-10-94M-194-52-59-63-189M-153M-5MM-12-199-59M-84M-199-137M-137MM-84M-194-115-161-12-91-194-51-34-161-96M-91MM-178-9-13-26M-46MM-88M-147-14-81-185-132MM-160-150M-137-59-81-101M-71-1M-137-137-84-77MM-187-112-137-129M-137-43M-14M-81MM-104-168-43-14-46MM-161M-84-91MM-80M-196-187M-14-137M-76-162M-53-144-8MM-194-42-199-57-96-8-12-137-137-137-112-187M-199-31-8-146M-178-164-167-165-59MM-185-26-137M-194M-43-137-84-21MM-14MMMM-81M-107MM-20-46-17-180-84-196MM-71MM-154-101-137-185-116-52-1M-164-39-115MM-26M-51M-198-137-196M-91-163-163-144M-57-126-46MM-57M-194-59M-91-17MM-104MM-13M-29-121-137-46-153

[  2] Population average: -492.1
[  2] Best mean score:    139.4, Pedigree: 104M-40M-137-146-146M-153MM-137-137-133M-102M-46M-13-137MM-149MM-71-194MM-20M-187-137M-181M-137M-108-40M-169MMM-51-39M-29M

In [12]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=2, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_160.gif', limit=500)}'>")

[  1] Population average: -517.3
[  1] Best mean score:    155.0, Pedigree: 104M-144-137MM-146-146MM-77MM-178-17-133-102-46M-137M-137M-149MM-133-49M-20M-187-137M-137M-137-8-40-169M-196M-71M-91-46-148-146M-154-185-146MM-120M-92M-63-126MM-153M-159M-137-46-59-50-117-81MMM-12M-167-194-187MM-69M-12-91-20-121-104-161-47MM-91MM-185M-46M-175-26M-46M-84MM-147MM-137-133MM-92M-132-51-107-117MM-59-81M-45-66M-196-59M-137-77-127MM-187M-12M-180MM-96M-133M-29-154-181-46M-168-43-84M-79M-161M-63-91M-46MM-196-187M-161M-137M-76-137-194-14MMM-8MM-46M-178-14-120M-87-14M-8-65MMM-137-126-159-91-154M-91M-8-146MM-178M-164-169M-165-141-185-40MMM-107-90MMM-43-84M-14MM-32MMM-14MMM-42M-121MMM-20-46M-137M-180-84M-196MM-12-154-71M-137-185-151MM-52-1M-137-1-115MM-153-198-153MM-137-134M-153MMM-163-148M-144M-57MM-17M-46MMM-170-63M-111-91M-14-104-13-137-121M-164M-46-153MMM

[  2] Population average: -524.6
[  2] Best mean score:    154.8, Pedigree: 104M-144-137MM-146-146MM-77MM-178-17-133-102-46M-137M-137M-149MM-133-49M-

In [13]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=2, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_180.gif', limit=500)}'>")

[  1] Population average: -529.1
[  1] Best mean score:    173.4, Pedigree: 104M-40M-137MM-181-137M-77MMM-178-17-133M-102-46M-137M-137M-149MM-107-49M-39M-187-137M-91M-137-8-40-169M-196M-149M-91-46-148-51MM-154-185-146MMM-107-92M-12M-126MM-199M-8M-12-46-59M-74-117-81MMM-12M-69MMM-79MM-187MM-69-178-194-20-51MM-104-161-47MM-91MM-185M-137M-13M-26M-46M-84MMMMM-147M-137-133MM-182-132MM-51-107-117MM-59-81M-144MM-17M-196-74-137-77-127MM-187M-112-180MM-96M-137-29-187M-181-194M-168-43-14M-79-161M-63-91MM-46MM-196-187M-40-71MM-76-137-194-99M-8MM-46M-104-84-120-87-40-194M-65MMM-137-178MMM-159-91M-137M-91MM-8-17MM-14M-164-120-165-141-185-40MMM-107-90MMM-144M-137-14MM-32MMM-88M-81M-84MM-20-46M-51M-180-84M-196MM-71M-178MMM-8M-96M-185-116M-52-1M-84M-1-115MM-153-51-198M-96M-199-153MMM-163-148-144M-57MM-17M-46-57M-116-111-91-46-120-144-81-121-164M-46-153MMM

[  2] Population average: -499.3
[  2] Best mean score:    170.8, Pedigree: 104M-181-137-146-137-77-59-17-133M-137M-46M-120-137MM-149MM-133-49M-20M

In [14]:
results = genetic_algorithm(results, old=25, new=175, generations=20, order=2, max_score=200.0, game="LunarLander-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('lander_200.gif', limit=500)}'>")

[  1] Population average: -538.1
[  1] Best mean score:    169.0, Pedigree: 104M-40M-137MM-181-137M-77MMM-178-17-133M-102-46M-137M-137M-149MM-107-49M-39M-187-137M-91M-137-8-40-169M-196M-149M-91-46-148-51MM-154-185-146MMM-107-92M-12M-126MM-199M-8M-12-46-59M-74-117-81MMM-12M-69MMM-79MM-187MM-69-178-194-20-51MM-104-161-47MM-91MM-185M-137M-13M-26M-46M-84MMMMM-147M-137-133MM-182-132MM-51-107-117MM-59-81M-144MM-17M-196-74-137-77-127MM-187M-112-180MM-96M-137-29-187M-181-194M-168-43-14M-79-161M-63-91MM-46MM-196-187M-40-71MM-76-137-194-99M-8MM-46M-104-84-120-87-40-194M-65MMM-137-178MMM-159-91M-137M-91MM-8-17MM-14M-164-120-165-141-185-40MMM-107-90MMM-144M-137-14MM-32MMM-88M-81M-84MM-20-46M-51M-180-84M-196MM-71M-178MMM-8M-96M-185-116M-52-1M-84M-1-115MM-153-51-198M-96M-199-153MMM-163-148-144M-57MM-17M-46-57M-116-111-91-46-120-144-81-121-164M-46-153MMM

[  2] Population average: -543.8
[  2] Best mean score:    168.7, Pedigree: 104M-40M-137MM-181-137M-77MMM-178-17-133M-102-46M-137M-137M-149MM-107-4

The lander should make reasonable landings.