In [1]:
from IPython.display import HTML

import gym

# Import local script
import actors

import numpy as np

In [2]:
def trial_actor(actor, trials=100, limit=1000):
    env = gym.make('CartPole-v0')

    scores = []
    for i in range(trials):
        observation = env.reset()
        score = 0
        for t in range(limit):
            action = actor.predict(observation)
            observation, reward, done, info = env.step(action)
            if done:
                break
            score += reward
        scores.append(score)
        
    data_dict = {
        "actor" : actor, 
        "weights" : actor.w, 
        "minimum" : min(scores), 
        "maximum" : max(scores), 
        "mean" : sum(scores)/len(scores)
    }
    
    env.close()
    
    return data_dict

## Simple actor

In [3]:
results = []

for a in range(25):
    results.append(trial_actor(actors.SimpleActor(None)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='./{winner['actor'].render('genetic_simple_test.gif')}'>")

{'actor': <actors.SimpleActor object at 0x7fbf0fa1c4a8>, 'weights': array([ 0.02033437, -0.80453892, -0.36425672, -0.66378632,  0.90555347]), 'minimum': 33.0, 'maximum': 156.0, 'mean': 61.35}


In [4]:
for rounds in range(25):
    top_scores = sorted(results, key=lambda x: x["mean"], reverse=True)
    
    survivors = top_scores[-5:]
    
    gene_pool = [i["weights"] for i in top_scores]
    genome_size = len(gene_pool[0])
    
    total_scores = sum([i["mean"] for i in top_scores])
    
    children = []
    for birth in range(20):
        parents = np.random.choice(np.arange(len(gene_pool)), 
                         size=2, 
                         replace=False, 
                         p=[i["mean"]/total_scores for i in top_scores])

        mix = np.random.randint(0, high=2, size=genome_size)

        child = []
        for i in range(genome_size):
            child.append(gene_pool[parents[mix[i]]][i])
        
        if np.random.random(1) > 0.95:
            mutated_gene = int(np.random.randint(0, genome_size, 1))
            child[mutated_gene] += float(np.random.uniform(-0.1, 0.1, 1))

        children.append(child)    
    
    new_actors = [i["actor"] for i in survivors]
    
    for child in children:
        new_actors.append(actors.SimpleActor(child))

    results = []

    for actor in new_actors:
        results.append(trial_actor(actor))

    print(f"Best mean score: {max([i['mean'] for i in results])}")

Best mean score: 37.29
Best mean score: 32.65
Best mean score: 34.76
Best mean score: 31.17
Best mean score: 58.88
Best mean score: 53.88
Best mean score: 57.84
Best mean score: 67.46
Best mean score: 65.33
Best mean score: 67.33
Best mean score: 65.31
Best mean score: 64.91
Best mean score: 72.23
Best mean score: 71.19
Best mean score: 70.21
Best mean score: 67.43
Best mean score: 72.69
Best mean score: 70.54
Best mean score: 69.46
Best mean score: 70.65
Best mean score: 69.93
Best mean score: 67.15
Best mean score: 69.47
Best mean score: 69.79
Best mean score: 67.89


In [5]:
winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='./{winner['actor'].render('model_simple.gif')}'>")

{'actor': <actors.SimpleActor object at 0x7fbf0fa1ce80>, 'weights': [0.020334370048818995, -0.7315010196867933, -0.364256722047809, 0.22168651195691513, 0.18321013920684126], 'minimum': 30.0, 'maximum': 177.0, 'mean': 67.89}


## Complex Actor

In [6]:
results = []

for a in range(25):
    results.append(trial_actor(actors.ComplexActor(None)))

winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='./{winner['actor'].render('genetic_complex_test.gif')}'>")

{'actor': <actors.ComplexActor object at 0x7fbf0492b860>, 'weights': array([ 0.05624499,  0.91669354, -0.60696225,  0.20122468,  0.54291869,
        0.45154117,  0.81096258,  0.24194493,  0.03103505]), 'minimum': 31.0, 'maximum': 92.0, 'mean': 51.39}


In [7]:
for rounds in range(25):
    top_scores = sorted(results, key=lambda x: x["mean"], reverse=True)
    
    survivors = top_scores[-5:]
    
    gene_pool = [i["weights"] for i in top_scores]
    genome_size = len(gene_pool[0])
    
    total_scores = sum([i["mean"] for i in top_scores])
    
    children = []
    for birth in range(20):
        parents = np.random.choice(np.arange(len(gene_pool)), 
                         size=2, 
                         replace=False, 
                         p=[i["mean"]/total_scores for i in top_scores])

        mix = np.random.randint(0, high=2, size=genome_size)

        child = []
        for i in range(genome_size):
            child.append(gene_pool[parents[mix[i]]][i])
        
        if np.random.random(1) > 0.95:
            mutated_gene = int(np.random.randint(0, genome_size, 1))
            child[mutated_gene] += float(np.random.uniform(-0.1, 0.1, 1))

        children.append(child)    
    
    new_actors = [i["actor"] for i in survivors]
    
    for child in children:
        new_actors.append(actors.ComplexActor(child))

    results = []

    for actor in new_actors:
        results.append(trial_actor(actor))

    print(f"Best mean score: {max([i['mean'] for i in results])}")

Best mean score: 63.87
Best mean score: 199.0
Best mean score: 131.76
Best mean score: 185.89
Best mean score: 178.53
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0
Best mean score: 199.0


In [8]:
winner = sorted(results, key=lambda x: x["mean"], reverse=True)[0]

print(winner)

HTML(f"<img src='./{winner['actor'].render('genetic_complex.gif')}'>")

{'actor': <actors.ComplexActor object at 0x7fbf2c089e10>, 'weights': [-0.030026029279145883, 0.3444529737511939, 0.2936193761352276, 0.9645224173868459, 0.8054054216719782, -0.3988432844848584, 0.31046046288422535, 0.24194492602911422, 0.10390274082494], 'minimum': 199.0, 'maximum': 199.0, 'mean': 199.0}
