# The Bipedal Walker

This environment has a huge amount of parameters, so it's difficult to run. Without a better processor, it might not be worth doing. You can read the rules [here](https://github.com/openai/gym/wiki/BipedalWalker-v2).

In [1]:
# Display GIFs in Jupyter
from IPython.display import HTML

# OpenAI gym
import gym

# Import local script
import agents

# numpy
import numpy as np

# To speed up the algorithm
from multiprocessing import Pool
n_jobs = 4 # Set your number of cores here

In [2]:
def trial_agent(agent, trials=50, limit=500):
    env = gym.make(agent.game)

    scores = []
    for i in range(trials):
        observation = env.reset()
        rewards = []
        for t in range(limit):
            action = agent.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            rewards.append(reward)
        scores.append(sum(rewards))
        
    data_dict = {
        "agent" : agent, 
        "weights" : agent.w, 
        "pedigree" : agent.pedigree, 
        "minimum" : min(scores), 
        "maximum" : max(scores), 
        "mean" : sum(scores)/len(scores)
    }
    
    env.close()
    
    return data_dict

In [3]:
def genetic_algorithm(results, old=5, new=95, n_parents=2, generations=25, 
                      mutation_rate=0.01, mutation_amount=0.5, order=1, max_score=499.0, 
                      game="CartPole-v1"):
    for round in range(generations):
        # Sort agents by score (fitness)
        top_scores = sorted(results, key=lambda x: x["mean"], reverse=True)

        # The survival of the fittest. Wikipedia calls this "elitism".
        # The top agents of a generation are carried over to the next
        survivors = top_scores[:old]

        # To start breeding new agents, I'll mix weights (genes)
        weight_shape = top_scores[0]["weights"].shape
        gene_pool = [list(i["weights"].flatten()) for i in top_scores]
        pedigree_list = [i["pedigree"] for i in top_scores]
        genome_size = top_scores[0]["weights"].size

        # Scores can be negative, so here I make them all positive
        # They also need to sum to 1 for random sampling
        min_score = min([i["mean"] for i in top_scores])
        sum_score = sum([i["mean"]+min_score for i in top_scores])
        probs = [(i["mean"]+min_score)/sum_score for i in top_scores]

        # For each new agent, randomly select parents
        # Higher-fitness agents are likelier to sire new agents
        children = []
        for birth in range(new):
            parents = np.random.choice(np.arange(len(gene_pool)), 
                             size=n_parents, 
                             replace=False, 
                             p=probs)

            # The offspring get a mix of each parent's weights
            # The weights (genes) are simply copied over
            mix = np.random.randint(0, high=n_parents, size=genome_size)

            weights = []
            pedigree = []
            for i in range(genome_size):
                weights.append(gene_pool[parents[mix[i]]][i])
                pedigree.append(pedigree_list[parents[mix[i]]][i])
                # A mutation happens rarely and adds a bit of noise to a gene
                if np.random.random(1) < mutation_rate:
                    weights[i] += float(np.random.normal(0, mutation_amount, 1))
                    pedigree[i] += "M"

            children.append({"weights" : weights, "pedigree" : pedigree})

        # Elitism: the top agents survive to fight another day
        new_agents = [i["agent"] for i in survivors]

        # The offspring are added it
        # With the pedigree variable their ancestors are tracked
        for child in children:
            new_agents.append(
                agents.LinearAgent(
                    np.array(child["weights"]).reshape(weight_shape), 
                    pedigree=child["pedigree"],
                    order=order,
                    game=game))

        # Trial the agents using multiple CPU cores
        p = Pool(n_jobs)
        results = p.map(trial_agent, new_agents)
        p.close()
        
        results = sorted(results, key=lambda x: x["mean"], reverse=True)

        print(f"[{round+1:3}] Population average: {sum([i['mean'] for i in results])/len(results):5.1f}")
        print(f"[{round+1:3}] Best mean score:    {results[0]['mean']:5.1f}, Pedigree: {'-'.join(results[0]['pedigree'])}")
        print()
        
        # End early if maximum is reached
        if results[0]['mean'] >= max_score:
            print(f"[{round+1:3}] Best score reached, ending early")
            break
    return results

## Simple agent

Since the state has so many observations, I only try the simplest agent possible. The robot at least doesn't immediately fall over.

In [4]:
results = []

for a in range(200):
    results.append(trial_agent(agents.LinearAgent(None, id=a, order=1, game="BipedalWalker-v2")))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('bipedal_0.gif', limit=500)}'>")

In [5]:
results = genetic_algorithm(results, old=25, new=175, generations=10, order=1, max_score=300.0, game="BipedalWalker-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('bipedal_10.gif', limit=500)}'>")

[  1] Population average: -10.0
[  1] Best mean score:      7.2, Pedigree: 99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-99-9

In [6]:
results = genetic_algorithm(results, old=25, new=175, generations=10, order=1, max_score=300.0, game="BipedalWalker-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('bipedal_20.gif', limit=500)}'>")

[  1] Population average: -10.1
[  1] Best mean score:      9.5, Pedigree: 54-79-79-179-54-62-54-136-163-54-99-83-179-62-99-80-80-112-97-17-179-179-37M-180-34-62-159-64-62-67-70-143-180-68-191-54-34-191-4-79-99-67-10-179-34-62-8-34-107M-149-99-179-136-4-161-97-112-99-62-62-191-99-23-34-136-4-99-21-80-68-191-179-99-70-83-62-79-62-112-99-63-14-136-123-180-68-67-125-159-179-99-99-88-54-136-68-97-149-180-62-47-87-125-8-159-8-112-10-143-179-83-83M-88-70-48M-68-99-99-161-4-54-34-67-54-67-149-179-34-99-182-180-99M-119-37-83-1-86-87-191-63-10-46-4M-62-191M-54-179-179-4-136-179-10-180-67-47-40-68-35-10-62M-55M-191-80-179-182-99-5-68-69-68-0-136-80-191-99-100-99-119-62-34-100M-99-149-4-32-83-99-68-182-125-4-89-64-68-161-44-1-34-149-179-45-4-179-62-99-99-191-6-93-87M-182-79M-47-50-107-64-119-143-34-100-30-99-125-99-40-45-112-179-99-191-70-21-70-159-99-179-149-93-99-179-99-143-119-46-179-191-99-191-99-62-47-149-80-83-62-136-10M-159-34-99-182-6-12-86-70-163-99-119-119-126-125-68-119-110-182-50-47-2

In [7]:
results = genetic_algorithm(results, old=25, new=175, generations=10, order=1, max_score=300.0, game="BipedalWalker-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('bipedal_30.gif', limit=500)}'>")

[  1] Population average:  -8.0
[  1] Best mean score:      9.6, Pedigree: 54-79-79-179-54-62-54-136-163-54-99-83-179-62-99-80-80-112-97-17-179-179-37M-180-34-62-159-64-62-67-70-143-180-68-191-54-34-191-4-79-99-67-10-179-34-62-8-34-107M-149-99-179-136-4-161-97-112-99-62-62-191-99-23-34-136-4-99-21-80-68-191-179-99-70-83-62-79-62-112-99-63-14-136-123-180-68-67-125-159-179-99-99-88-54-136-68-97-149-180-62-47-87-125-8-159-8-112-10-143-179-83-83M-88-70-48M-68-99-99-161-4-54-34-67-54-67-149-179-34-99-182-180-99M-119-37-83-1-86-87-191-63-10-46-4M-62-191M-54-179-179-4-136-179-10-180-67-47-40-68-35-10-62M-55M-191-80-179-182-99-5-68-69-68-0-136-80-191-99-100-99-119-62-34-100M-99-149-4-32-83-99-68-182-125-4-89-64-68-161-44-1-34-149-179-45-4-179-62-99-99-191-6-93-87M-182-79M-47-50-107-64-119-143-34-100-30-99-125-99-40-45-112-179-99-191-70-21-70-159-99-179-149-93-99-179-99-143-119-46-179-191-99-191-99-62-47-149-80-83-62-136-10M-159-34-99-182-6-12-86-70-163-99-119-119-126-125-68-119-110-182-50-47-2

In [8]:
results = genetic_algorithm(results, old=25, new=175, generations=10, order=1, max_score=300.0, game="BipedalWalker-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('bipedal_40.gif', limit=500)}'>")

[  1] Population average:  -8.3
[  1] Best mean score:     14.0, Pedigree: 99-4-47-50-139M-1-101-148M-60-99M-57-93-179-62-62-19-27-66-196-138-99-63-64-143-34-142M-62M-75-176-68-157-42M-149-0-4-42MM-190M-179-99-83-1-124-10-112-55M-160-186-32-93-62-112-112M-136-182M-94M-86-136-148-34M-29-94-19-99-94-99-47-94-176-157-101-70-195-99-99-99-10M-19-76-93-124M-68-112-110-123-119-119-195-114-45M-99-43-195-99-10-179-45M-120-174-10-58-99M-23-99-157-112-42M-34-68-99M-8-21-100-83-46-104M-149-6M-93-68-195-54-197MM-34-195-99-33-67-99M-157-99M-164M-67-174M-70-110-0-63-45-21-182M-99-47-148-112-182-45-99-179-179M-99-99-79M-84-17-99-101-47-84-104-62M-141-111-99-47-42-58-175-100-18-75M-138-34-120-45-99-93-0-119M-99-179M-21MM-110M-10-69-104-157-99-149-30-144-42-180-93M-112M-173M-182-57-47-112-120-27-100M-63-99-145-43-73-6-75-48-112-176-40-63-68-64-77-99M-99-110-47-70-10-83M-100M-45-4-150-141-99-111-82-160MM-67-160-58-94-93-63M-57-93-40-160-191-173M-112-141-76-45-47-187-111-27-45-124-76-77M-5-93-132-67-104-4

In [9]:
results = genetic_algorithm(results, old=25, new=175, generations=10, order=1, max_score=300.0, game="BipedalWalker-v2")

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

HTML(f"<img src='{winner['agent'].render('bipedal_50.gif', limit=500)}'>")

[  1] Population average:  -5.8
[  1] Best mean score:     15.5, Pedigree: 99-4-47-50-139M-1-101-148M-60-99M-57-93-179-62-62-19-27-66-196-138-99-63-64-143-34-142M-62M-75-176-68-157-42M-149-0-4-42MM-190M-179-99-83-1-124-10-112-55M-160-186-32-93-62-112-112M-136-182M-94M-86-136-148-34M-29-94-19-99-94-99-47-94-176-157-101-70-195-99-99-99-10M-19-76-93-124M-68-112-110-123-119-119-195-114-45M-99-43-195-99-10-179-45M-120-174-10-58-99M-23-99-157-112-42M-34-68-99M-8-21-100-83-46-104M-149-6M-93-68-195-54-197MM-34-195-99-33-67-99M-157-99M-164M-67-174M-70-110-0-63-45-21-182M-99-47-148-112-182-45-99-179-179M-99-99-79M-84-17-99-101-47-84-104-62M-141-111-99-47-42-58-175-100-18-75M-138-34-120-45-99-93-0-119M-99-179M-21MM-110M-10-69-104-157-99-149-30-144-42-180-93M-112M-173M-182-57-47-112-120-27-100M-63-99-145-43-73-6-75-48-112-176-40-63-68-64-77-99M-99-110-47-70-10-83M-100M-45-4-150-141-99-111-82-160MM-67-160-58-94-93-63M-57-93-40-160-191-173M-112-141-76-45-47-187-111-27-45-124-76-77M-5-93-132-67-104-4