In [1]:
!pip install deap
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y

!pip install pyvirtualdisplay
!pip install piglet

Reading package lists... Done
Building dependency tree       
Reading state information... Done
python-opengl is already the newest version (3.1.0+dfsg-1).
0 upgraded, 0 newly installed, 0 to remove and 14 not upgraded.
Reading package lists... Done
Building dependency tree       
Reading state information... Done
xvfb is already the newest version (2:1.19.6-1ubuntu4.8).
0 upgraded, 0 newly installed, 0 to remove and 14 not upgraded.


In [2]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

<Display cmd_param=['Xvfb', '-br', '-nolisten', 'tcp', '-screen', '0', '1400x900x24', ':1001'] cmd=['Xvfb', '-br', '-nolisten', 'tcp', '-screen', '0', '1400x900x24', ':1001'] oserror=None return_code=None stdout="None" stderr="None" timeout_happened=False>

In [3]:
import os
if type(os.environ.get("DISPLAY")) is not str or len(os.environ.get("DISPLAY"))==0:
    !bash ../xvfb start
    %env DISPLAY=:1

In [4]:
from deap import tools
import random


def varOr(population, toolbox, lambda_, cxpb, mutpb):
    offspring = []
    for _ in range(lambda_):
        op_choice = random.random()
        if op_choice < cxpb:  # Apply crossover
            ind1, ind2 = list(map(toolbox.clone, random.sample(population, 2)))
            ind1, ind2 = toolbox.mate(ind1, ind2)
            del ind1.fitness.values
            del ind2.fitness.values
            offspring.append(ind1)
        elif op_choice < cxpb + mutpb:  # Apply mutation
            ind = toolbox.clone(random.choice(population))
            ind, = toolbox.mutate(ind)
            del ind.fitness.values
            offspring.append(ind)
        else:  # Apply reproduction
            offspring.append(toolbox.clone(random.choice(population)))

    return offspring


def eaMuPlusLambda(population, toolbox, mu, lambda_, cxpb, mutpb, ngen,
                   stats=None, halloffame=None, verbose=__debug__):
    logbook = tools.Logbook()
    logbook.header = ['gen', 'nevals'] + (stats.fields if stats else [])

    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in population if not ind.fitness.valid]
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    if halloffame is not None:
        halloffame.update(population)

    record = stats.compile(population) if stats is not None else {}
    logbook.record(gen=0, nevals=len(invalid_ind), **record)
    if verbose:
        print(logbook.stream)

    # Begin the generational process
    for gen in range(1, ngen + 1):
        # Vary the population
        if halloffame is not None:
            for ind in halloffame:
                population.append(toolbox.clone(ind))
        offspring = varOr(population, toolbox, lambda_, cxpb, mutpb)

        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        # Update the hall of fame with the generated individuals
        if halloffame is not None:
            halloffame.update(offspring)

        # Select the next generation population
        population[:] = toolbox.select(population + offspring, mu)

        # Update the statistics with the new population
        record = stats.compile(population) if stats is not None else {}
        logbook.record(gen=gen, nevals=len(invalid_ind), **record)
        if verbose:
            print(logbook.stream)

    return population, logbook

In [16]:
from deap import base, creator, tools
import numpy as np
from copy import deepcopy
from typing import Callable, Optional, List, Tuple
import gym
from gym import wrappers
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) # error only
from IPython.display import HTML
from IPython import display as ipythondisplay
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, InputLayer
import glob
import io
import base64

In [6]:
def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

In [7]:
creator.create("BaseFitness", base.Fitness, weights=(1.0, ))
creator.create("Individual", list, fitness=creator.BaseFitness)

In [8]:
class Experiment:

    def __init__(self, 
            population: int, 
            iterations: int,
            model: Model,
            cross_prob: float = 0.4, 
            mut_prob: float = 0.4,
            tournamet_size: int = 3,
            env = gym.make("CartPole-v1"), 
            engine: base.Toolbox = base.Toolbox()):
        self.population: int = population
        self.iterations: int = iterations
        self.mut_prob = mut_prob
        self.cross_prob = cross_prob 
        self.tournamet_size = tournamet_size

        self.model: Model = model
        self.params = self.model.get_weights()

        self.env = env

        self.engine: base.Toolbox = base.Toolbox()
        self.engine.register('map', map)
        self.engine.register("individual", 
            tools.initIterate, 
            creator.Individual, 
            self.factory
        )
        self.engine.register('population', 
            tools.initRepeat, 
            list, 
            self.engine.individual, 
            self.population
        )
        self.engine.register('mutate', self.mutation)
        self.engine.register("mate", self.crossover)
        self.engine.register('select', 
            tools.selTournament, 
            tournsize=tournamet_size
        )
        self.engine.register('evaluate', self.fitness)

    def factory(self) -> creator.Individual:
        individual: List[np.array] = list()
        for i in range(len(self.params)):
            if i % 2 == 0:
                individual.append(np.random.normal(0.1, 0.3, size=self.params[i].shape))
            else:
                individual.append(np.zeros(shape=self.params[i].shape))
        return creator.Individual(individual)

    def mutation(self, individual: np.array) -> Tuple[np.array]:
        for i in range(len(individual)):
            if i % 2 != 0:
                continue
            for j in range(len(individual[i])):
                for k in range(len(individual[i][j])):
                    if np.random.random() < 0.15:
                        individual[i][j] += np.random.normal(0.0, 0.2)
        return individual,

    def compare(self, ind1, ind2):
        result = True
        for i in range(len(ind1)):
            if i % 2 == 0:
                for j in range(len(ind1[i])):
                    for k in range(len(ind1[i][j])):
                        if ind1[i][j][k] != ind2[i][j][k]:
                            return False

        return result

    def crossover(self, p1: creator.Individual, p2: creator.Individual) \
            -> Tuple[creator.Individual, creator.Individual]:

        c1: List[np.array] = list()
        c2: List[np.array] = list()

        c1.append(deepcopy(p1[0]))
        c1.append(deepcopy(p1[1])) # zero
        c1.append(deepcopy(p2[2]))
        c1.append(deepcopy(p1[3])) # zero
        c1.append(deepcopy(p1[4]))
        c1.append(deepcopy(p1[5])) # zero

        c2.append(deepcopy(p2[0]))
        c2.append(deepcopy(p2[1]))  # zero
        c2.append(deepcopy(p1[2]))
        c2.append(deepcopy(p2[3]))  # zero
        c2.append(deepcopy(p2[4]))
        c2.append(deepcopy(p2[5]))  # zero

        return creator.Individual(c1), creator.Individual(c2)

    def fitness(self, individual: creator.Individual):
        self.model.set_weights(individual)
        scores: List[float] = []
        for _ in range(1):
            state = self.env.reset()
            score = 0.0
            for t in range(200):
                self.env.render()
                act_prob = self.model.predict(state.reshape(1, 4)).squeeze()
                action = np.random.choice(np.arange(2), 1, p=act_prob)[0]
                next_state, reward, done, _ = self.env.step(action)
                score += reward
                state = next_state
                if done:
                    break
            scores.append(score)
        return np.mean(scores),
    
    def run(self):
        pop: int = self.engine.population()
        hof: tools.HallOfFame = tools.HallOfFame(self.tournamet_size, similar=self.compare)
        stats: tools.Statistics = tools.Statistics(lambda ind: ind.fitness.values[0])
        stats.register('min', np.min)
        stats.register('max', np.max)
        stats.register('avg', np.mean)
        stats.register('std', np.std)

        pop, log = eaMuPlusLambda(
            pop,
            self.engine,
            mu = self.population,
            lambda_ = int(0.8 * self.population), 
            cxpb = self.cross_prob, 
            mutpb = self.mut_prob,
            ngen = self.iterations, 
            halloffame = hof, 
            stats = stats,
            verbose = True
        )

        best = hof[0]
        print("Best fitness = {}".format(best.fitness.values[0]))
        return log, best

In [9]:
def build_model(dim: Tuple[int, int, int, int]):
    model = Sequential()
    model.add(InputLayer(dim[0]))
    model.add(Dense(dim[1], activation='tanh'))
    model.add(Dense(dim[2], activation='tanh'))
    model.add(Dense(dim[-1], activation='softmax'))
    model.compile(optimizer='adam', loss='mse')
    return model

In [25]:
model_1: Model = build_model((4, 20, 12, 2))

experiment_1 = Experiment(
    population=10,
    iterations=50,
    model=model_1,
    env=gym.make("CartPole-v1")
)

result_1 = experiment_1.run()

gen	nevals	min	max	avg 	std    
0  	10    	9  	33 	18.4	7.60526
1  	5     	11 	36 	29.1	7.687  
2  	8     	12 	52 	36.2	10.5148
3  	5     	36 	54 	46  	8.19756
4  	6     	36 	54 	51.4	5.2192 
5  	7     	54 	195	115.9	63.667 
6  	5     	54 	195	157.2	58.8979
7  	5     	28 	195	174  	50.3269
8  	6     	152	195	186.4	17.2   
9  	7     	195	195	195  	0      
10 	8     	152	195	190.7	12.9   
11 	6     	156	195	191.1	11.7   
12 	8     	156	195	191.1	11.7   
13 	7     	195	195	195  	0      
14 	8     	195	195	195  	0      
15 	7     	195	200	196  	2      
16 	7     	53 	200	183.3	43.4972
17 	7     	195	200	197.5	2.5    
18 	7     	195	200	199.5	1.5    
19 	7     	156	200	195.6	13.2   
20 	7     	94 	200	180.6	33.6814
21 	8     	132	200	192.7	20.2882
22 	5     	200	200	200  	0      
23 	6     	200	200	200  	0      
24 	7     	195	200	199.5	1.5    
25 	8     	195	200	199.5	1.5    
26 	7     	46 	200	184.6	46.2   
27 	7     	161	200	196.1	11.7   
28 	8     	200	200	200  	0      
29 	7     	200	2

In [24]:
model_2: Model = build_model((4, 20, 12, 2))

experiment_2 = Experiment(
    population=10,
    iterations=100,
    model=model_2,
    env=gym.make("CartPole-v1")
)

result_2 = experiment_2.run()

gen	nevals	min	max	avg 	std    
0  	10    	12 	142	30.4	37.4625
1  	5     	23 	142	79.2	55.3024
2  	7     	22 	142	102.5	52.2231
3  	6     	27 	142	102  	45.4048
4  	6     	97 	142	137.5	13.5   
5  	8     	56 	142	114.7	33.562 
6  	8     	80 	142	133  	18.5095
7  	6     	128	142	140.6	4.2    
8  	5     	128	142	140.6	4.2    
9  	5     	137	142	141.5	1.5    
10 	6     	142	142	142  	0      
11 	8     	74 	142	135.2	20.4   
12 	7     	119	142	139.7	6.9    
13 	7     	142	142	142  	0      
14 	5     	142	142	142  	0      
15 	7     	140	142	141.4	0.916515
16 	7     	101	142	137.7	12.2479 
17 	6     	142	148	144.4	2.93939 
18 	7     	142	155	147  	4.81664 
19 	5     	142	200	169.6	25.0687 
20 	4     	155	200	195.5	13.5    
21 	7     	92 	200	180.5	39.2817 
22 	6     	196	200	199.6	1.2     
23 	6     	196	200	199.6	1.2     
24 	5     	104	200	190.4	28.8    
25 	8     	166	200	196.6	10.2    
26 	6     	146	200	194.2	16.1109 
27 	7     	167	200	196.7	9.9     
28 	8     	167	200	196.7	9.9     

In [22]:
model_3: Model = build_model((4, 20, 12, 2))

experiment_3 = Experiment(
    population=10,
    iterations=50,
    model=model_3,
    env=gym.make("CartPole-v1")
)

result_3 = experiment_3.run()

gen	nevals	min	max	avg 	std    
0  	10    	9  	85 	29.6	22.4062
1  	8     	24 	85 	53.2	22.0581
2  	8     	47 	85 	69.8	18.6161
3  	6     	37 	85 	68.8	20.029 
4  	7     	70 	116	91.3	17.1351
5  	7     	85 	116	100.5	15.5   
6  	8     	56 	116	100.7	20.3521
7  	6     	56 	117	100.3	21.0431
8  	5     	85 	117	113.5	9.51052
9  	5     	105	117	115.6	3.55528
10 	7     	85 	160	118  	16.9234
11 	5     	116	160	133.1	20.1119
12 	7     	116	160	144.9	18.8491
13 	7     	150	200	175  	20.6155
14 	8     	160	200	179  	17.8494
15 	7     	200	200	200  	0      
16 	6     	200	200	200  	0      
17 	7     	200	200	200  	0      
18 	8     	200	200	200  	0      
19 	8     	200	200	200  	0      
20 	6     	200	200	200  	0      
21 	8     	167	200	196.7	9.9    
22 	7     	200	200	200  	0      
23 	7     	104	200	190.4	28.8   
24 	7     	200	200	200  	0      
25 	6     	200	200	200  	0      
26 	6     	200	200	200  	0      
27 	4     	200	200	200  	0      
28 	5     	200	200	200  	0      
29 	5     	200	2

In [23]:
model_4: Model = build_model((4, 20, 12, 2))

experiment_4 = Experiment(
    population=10,
    iterations=50,
    model=model_4,
    env=gym.make("CartPole-v1")
)

result_4 = experiment_4.run()

gen	nevals	min	max	avg 	std    
0  	10    	11 	64 	27.6	18.6129
1  	7     	14 	64 	53.2	14.169 
2  	5     	26 	64 	56.7	11.4896
3  	4     	64 	73 	66.7	4.12432
4  	5     	64 	73 	71.2	3.6    
5  	6     	64 	84 	75.4	6.21611
6  	6     	73 	84 	81.7	4.36005
7  	8     	73 	84 	82.7	3.2573 
8  	5     	83 	85 	84  	0.447214
9  	6     	83 	112	87.1	8.32406 
10 	7     	83 	112	90.5	10.8097 
11 	7     	85 	112	101.2	13.2272 
12 	3     	85 	112	104.9	9.67936 
13 	7     	56 	136	104.4	24.2124 
14 	5     	70 	166	121.6	28.5489 
15 	8     	82 	166	122.2	25.0352 
16 	8     	112	170	136.4	22.9704 
17 	4     	112	170	156.8	18.9515 
18 	7     	139	170	166.1	9.17006 
19 	6     	104	170	162.6	19.5969 
20 	6     	170	200	176  	12      
21 	4     	170	200	182  	14.6969 
22 	6     	170	200	188  	14.6969 
23 	8     	36 	200	174.6	48.1086 
24 	6     	170	200	194  	12      
25 	7     	185	200	198.5	4.5     
26 	5     	170	200	194  	12      
27 	5     	166	200	193.6	12.8312 
28 	6     	170	200	197  	9       
2

In [26]:
model_5: Model = build_model((4, 20, 12, 2))

experiment_5 = Experiment(
    population=10,
    iterations=50,
    model=model_5,
    env=gym.make("CartPole-v1")
)

result_5 = experiment_5.run()

gen	nevals	min	max	avg	std    
0  	10    	10 	33 	19 	8.68332
1  	6     	15 	37 	31.1	8.0181 
2  	4     	32 	37 	35.4	2.00998
3  	7     	34 	67 	42.8	12.2049
4  	6     	37 	137	59.4	29.3094
5  	4     	67 	150	104 	35.6987
6  	8     	123	200	164.7	29.8431
7  	4     	137	200	184.9	22.1606
8  	7     	176	200	193.6	9.84073
9  	7     	200	200	200  	0      
10 	6     	200	200	200  	0      
11 	6     	200	200	200  	0      
12 	7     	200	200	200  	0      
13 	6     	200	200	200  	0      
14 	7     	200	200	200  	0      
15 	5     	200	200	200  	0      
16 	6     	200	200	200  	0      
17 	6     	135	200	193.5	19.5   
18 	6     	200	200	200  	0      
19 	4     	200	200	200  	0      
20 	7     	200	200	200  	0      
21 	5     	200	200	200  	0      
22 	7     	200	200	200  	0      
23 	6     	200	200	200  	0      
24 	5     	200	200	200  	0      
25 	7     	200	200	200  	0      
26 	5     	200	200	200  	0      
27 	8     	200	200	200  	0      
28 	7     	200	200	200  	0      
29 	7     	200	200	

In [27]:
model_6: Model = build_model((4, 20, 12, 2))

experiment_6 = Experiment(
    population=10,
    iterations=50,
    model=model_6,
    env=gym.make("CartPole-v1")
)

result_6 = experiment_6.run()

gen	nevals	min	max	avg 	std    
0  	10    	12 	59 	27.3	16.9177
1  	7     	15 	78 	48.8	21.7016
2  	5     	30 	78 	59.8	12.4804
3  	7     	27 	101	64.1	26.5648
4  	5     	59 	101	84.1	14.8624
5  	7     	53 	101	85.8	16.6301
6  	7     	76 	101	93.9	10.8577
7  	8     	50 	101	92.7	15.3951
8  	8     	65 	115	96.6	12.1507
9  	5     	92 	126	105.4	9.50999
10 	7     	92 	126	104  	9.0111 
11 	8     	89 	127	109.1	12.8953
12 	8     	91 	127	119.4	10.9654
13 	7     	78 	127	115.8	19.2811
14 	7     	127	179	142.6	23.8294
15 	8     	115	179	146.6	26.6803
16 	6     	127	179	163.4	23.8294
17 	8     	34 	179	154.1	44.981 
18 	5     	62 	179	160.9	38.0906
19 	6     	179	179	179  	0      
20 	6     	179	179	179  	0      
21 	5     	179	179	179  	0      
22 	6     	141	179	175.2	11.4   
23 	5     	179	179	179  	0      
24 	6     	141	200	177.3	13.6239
25 	4     	127	200	174.2	21.908 
26 	5     	141	200	179.4	15.246 
27 	7     	179	200	183.2	8.4    
28 	6     	141	200	183.6	17.3101
29 	5     	127	200	1

**Conclusion**

After failing to beat lunar environment I switched to cart pole. Setting different activation functions was the main thing to stabilize the solution around top score.