the code in this notebook is from:
https://gist.github.com/DollarAkshay/e6fe84fdf721db2731948e8461092e5a

In [32]:
import time, math, random, bisect, copy
import gym
import csv
import numpy as np

In [33]:
class NeuralNet : 
    def __init__(self, nodeCount):     
        self.fitness = 0.0
        self.nodeCount = nodeCount
        self.weights = []
        self.biases = []
        for i in range(len(nodeCount) - 1):
            self.weights.append( np.random.uniform(low=-1, high=1, size=(nodeCount[i], nodeCount[i+1])).tolist() )
            self.biases.append( np.random.uniform(low=-1, high=1, size=(nodeCount[i+1])).tolist())


    def printWeightsandBiases(self):
        
        print("--------------------------------")
        print("Weights :\n[", end="")
        for i in range(len(self.weights)):
            print("\n [ ", end="")
            for j in range(len(self.weights[i])):
                if j!=0:
                    print("\n   ", end="")
                print("[", end="")
                for k in range(len(self.weights[i][j])):
                    print(" %5.2f," % (self.weights[i][j][k]), end="")
                print("\b],", end="")
            print("\b ],")
        print("\n]")

        print("\nBiases :\n[", end="")
        for i in range(len(self.biases)):
            print("\n [ ", end="")
            for j in range(len(self.biases[i])):
                    print(" %5.2f," % (self.biases[i][j]), end="")
            print("\b],", end="")
        print("\b \n]\n--------------------------------\n")
  
    def getOutput(self, input):
        output = input
        for i in range(len(self.nodeCount)-1):
            output = np.reshape( np.matmul(output, self.weights[i]) + self.biases[i], (self.nodeCount[i+1]))
        return np.argmax(output)

In [34]:
class Population :
    def __init__(self, populationCount, mutationRate, nodeCount):
        self.nodeCount = nodeCount
        self.popCount = populationCount
        self.m_rate = mutationRate
        self.population = [ NeuralNet(nodeCount) for i in range(populationCount)]


    def createChild(self, nn1, nn2):
        
        child = NeuralNet(self.nodeCount)
        for i in range(len(child.weights)):
            for j in range(len(child.weights[i])):
                for k in range(len(child.weights[i][j])):
                    if random.random() > self.m_rate:
                        if random.random() < nn1.fitness / (nn1.fitness+nn2.fitness):
                            child.weights[i][j][k] = nn1.weights[i][j][k]
                        else :
                            child.weights[i][j][k] = nn2.weights[i][j][k]
                        

        for i in range(len(child.biases)):
            for j in range(len(child.biases[i])):
                if random.random() > self.m_rate:
                    if random.random() < nn1.fitness / (nn1.fitness+nn2.fitness):
                        child.biases[i][j] = nn1.biases[i][j]
                    else:
                        child.biases[i][j] = nn2.biases[i][j]

        return child


    def createNewGeneration(self, bestNN):    
        nextGen = []
        self.population.sort(key=lambda x: x.fitness, reverse=True)
        for i in range(self.popCount):
            if random.random() < float(self.popCount-i)/self.popCount:
                nextGen.append(copy.deepcopy(self.population[i]));

        fitnessSum = [0]
        minFit = min([i.fitness for i in nextGen])
        for i in range(len(nextGen)):
            fitnessSum.append(fitnessSum[i]+(nextGen[i].fitness-minFit)**4)
        

        while(len(nextGen) < self.popCount):
            r1 = random.uniform(0, fitnessSum[len(fitnessSum)-1] )
            r2 = random.uniform(0, fitnessSum[len(fitnessSum)-1] )
            nn1 = nextGen[bisect.bisect_right(fitnessSum, r1)-1]
            nn2 = nextGen[bisect.bisect_right(fitnessSum, r2)-1]
            nextGen.append( self.createChild(nn1, nn2) )
        self.population.clear()
        self.population = nextGen

In [36]:
def replayBestBot(bestNeuralNets):  
    fitness_bb = -1000
    generation = 0
    for i in range(len(bestNeuralNets)):
        if bestNeuralNets[i].fitness > fitness_bb:
            fitness_bb = bestNeuralNets[i].fitness
            generation = i
            best_bot = copy.deepcopy(bestNeuralNets[i])
    
    print("Generation %3d had a best fitness: %4d" % (generation, fitness_bb))
    with open("LUNAR_GA_play_scores.csv", "w") as csvfile:
        header = ["attempt", "score"]
        writer = csv.writer(csvfile, delimiter=',')
        writer.writerow(header)
        
        for attempt in range(PLAY):
            reward_total = 0
            observation = env.reset()
            for step in range(MAX_STEPS):
                action = best_bot.getOutput(observation)
                observation, reward, done, info = env.step(action)
                reward_total += reward
                if done:
                    observation = env.reset()
                    break
            writer.writerow([attempt+1, reward_total])
            print("Played to get a reward of:", reward_total)
            
GAME = 'LunarLander-v2'
MAX_STEPS = 200
MAX_GENERATIONS = 1000
PLAY = 100
POPULATION_COUNT = 100
MUTATION_RATE = 0.001

env = gym.make(GAME)
observation = env.reset()
in_dimen = env.observation_space.shape[0]
out_dimen = env.action_space.n
obsMin = env.observation_space.low
obsMax = env.observation_space.high
actionMin = 0
actionMax = env.action_space.n
pop = Population(POPULATION_COUNT, MUTATION_RATE, [in_dimen, 13, 8, 13, out_dimen])
bestNeuralNets = []

with open("LUNAR_GA_train_scores.csv", "w") as csvfile:
    header = ["generation", "average fitness", "maximum fitness"]
    writer = csv.writer(csvfile, delimiter=',')
    writer.writerow(header)

    for gen in range(MAX_GENERATIONS):
        genAvgFit = 0.0
        maxFit = -100000000
        maxNeuralNet = None
        for nn in pop.population:
            totalReward = 0
            for step in range(MAX_STEPS):
                action = nn.getOutput(observation)
                observation, reward, done, info = env.step(action)
                totalReward += reward
                if done:
                    observation = env.reset()
                    break

            nn.fitness = totalReward
            genAvgFit += nn.fitness
            if nn.fitness > maxFit :
                maxFit = nn.fitness
                maxNeuralNet = copy.deepcopy(nn);

        bestNeuralNets.append(maxNeuralNet)
        genAvgFit/=pop.popCount
        print("Generation : %3d |  Avg Fitness : %5.0f  |  Max Fitness : %5.0f  " % (gen+1, genAvgFit, maxFit) )
        writer.writerow([gen+1, genAvgFit, maxFit])
        pop.createNewGeneration(maxNeuralNet)

replayBestBot(bestNeuralNets)

Generation :   1 |  Avg Fitness :  -488  |  Max Fitness :   -63  
Generation :   2 |  Avg Fitness :  -429  |  Max Fitness :    13  
Generation :   3 |  Avg Fitness :  -397  |  Max Fitness :   -24  
Generation :   4 |  Avg Fitness :  -363  |  Max Fitness :   -34  
Generation :   5 |  Avg Fitness :  -382  |  Max Fitness :    10  
Generation :   6 |  Avg Fitness :  -346  |  Max Fitness :    45  
Generation :   7 |  Avg Fitness :  -293  |  Max Fitness :    39  
Generation :   8 |  Avg Fitness :  -254  |  Max Fitness :    24  
Generation :   9 |  Avg Fitness :  -277  |  Max Fitness :    20  
Generation :  10 |  Avg Fitness :  -269  |  Max Fitness :   243  
Generation :  11 |  Avg Fitness :  -278  |  Max Fitness :    46  
Generation :  12 |  Avg Fitness :  -221  |  Max Fitness :    18  
Generation :  13 |  Avg Fitness :  -257  |  Max Fitness :    -2  
Generation :  14 |  Avg Fitness :  -258  |  Max Fitness :     3  
Generation :  15 |  Avg Fitness :  -225  |  Max Fitness :    19  
Generation

Generation : 126 |  Avg Fitness :    44  |  Max Fitness :   190  
Generation : 127 |  Avg Fitness :    42  |  Max Fitness :   186  
Generation : 128 |  Avg Fitness :    47  |  Max Fitness :   184  
Generation : 129 |  Avg Fitness :    72  |  Max Fitness :   191  
Generation : 130 |  Avg Fitness :    62  |  Max Fitness :   190  
Generation : 131 |  Avg Fitness :    55  |  Max Fitness :   199  
Generation : 132 |  Avg Fitness :    45  |  Max Fitness :   188  
Generation : 133 |  Avg Fitness :    36  |  Max Fitness :   182  
Generation : 134 |  Avg Fitness :    40  |  Max Fitness :   191  
Generation : 135 |  Avg Fitness :    64  |  Max Fitness :   251  
Generation : 136 |  Avg Fitness :    52  |  Max Fitness :   182  
Generation : 137 |  Avg Fitness :    62  |  Max Fitness :   268  
Generation : 138 |  Avg Fitness :    61  |  Max Fitness :   190  
Generation : 139 |  Avg Fitness :    50  |  Max Fitness :   235  
Generation : 140 |  Avg Fitness :    50  |  Max Fitness :   165  
Generation

Generation : 251 |  Avg Fitness :    57  |  Max Fitness :   191  
Generation : 252 |  Avg Fitness :    46  |  Max Fitness :   171  
Generation : 253 |  Avg Fitness :    46  |  Max Fitness :   212  
Generation : 254 |  Avg Fitness :    31  |  Max Fitness :   186  
Generation : 255 |  Avg Fitness :    37  |  Max Fitness :   195  
Generation : 256 |  Avg Fitness :    42  |  Max Fitness :   195  
Generation : 257 |  Avg Fitness :    33  |  Max Fitness :   179  
Generation : 258 |  Avg Fitness :    54  |  Max Fitness :   191  
Generation : 259 |  Avg Fitness :    54  |  Max Fitness :   197  
Generation : 260 |  Avg Fitness :    50  |  Max Fitness :   191  
Generation : 261 |  Avg Fitness :    50  |  Max Fitness :   191  
Generation : 262 |  Avg Fitness :    57  |  Max Fitness :   276  
Generation : 263 |  Avg Fitness :    50  |  Max Fitness :   199  
Generation : 264 |  Avg Fitness :    45  |  Max Fitness :   190  
Generation : 265 |  Avg Fitness :    52  |  Max Fitness :   191  
Generation

Generation : 376 |  Avg Fitness :   105  |  Max Fitness :   292  
Generation : 377 |  Avg Fitness :   119  |  Max Fitness :   305  
Generation : 378 |  Avg Fitness :   118  |  Max Fitness :   312  
Generation : 379 |  Avg Fitness :    97  |  Max Fitness :   306  
Generation : 380 |  Avg Fitness :   102  |  Max Fitness :   299  
Generation : 381 |  Avg Fitness :   118  |  Max Fitness :   317  
Generation : 382 |  Avg Fitness :    83  |  Max Fitness :   321  
Generation : 383 |  Avg Fitness :    85  |  Max Fitness :   307  
Generation : 384 |  Avg Fitness :   102  |  Max Fitness :   316  
Generation : 385 |  Avg Fitness :   101  |  Max Fitness :   315  
Generation : 386 |  Avg Fitness :   116  |  Max Fitness :   313  
Generation : 387 |  Avg Fitness :   127  |  Max Fitness :   312  
Generation : 388 |  Avg Fitness :   112  |  Max Fitness :   318  
Generation : 389 |  Avg Fitness :   118  |  Max Fitness :   309  
Generation : 390 |  Avg Fitness :   106  |  Max Fitness :   294  
Generation

Generation : 501 |  Avg Fitness :   134  |  Max Fitness :   303  
Generation : 502 |  Avg Fitness :   129  |  Max Fitness :   300  
Generation : 503 |  Avg Fitness :   141  |  Max Fitness :   319  
Generation : 504 |  Avg Fitness :   130  |  Max Fitness :   293  
Generation : 505 |  Avg Fitness :   126  |  Max Fitness :   316  
Generation : 506 |  Avg Fitness :   116  |  Max Fitness :   301  
Generation : 507 |  Avg Fitness :   126  |  Max Fitness :   297  
Generation : 508 |  Avg Fitness :   133  |  Max Fitness :   304  
Generation : 509 |  Avg Fitness :   128  |  Max Fitness :   311  
Generation : 510 |  Avg Fitness :   143  |  Max Fitness :   316  
Generation : 511 |  Avg Fitness :   151  |  Max Fitness :   295  
Generation : 512 |  Avg Fitness :   153  |  Max Fitness :   300  
Generation : 513 |  Avg Fitness :   128  |  Max Fitness :   304  
Generation : 514 |  Avg Fitness :   132  |  Max Fitness :   318  
Generation : 515 |  Avg Fitness :   141  |  Max Fitness :   311  
Generation

Generation : 626 |  Avg Fitness :   131  |  Max Fitness :   312  
Generation : 627 |  Avg Fitness :   126  |  Max Fitness :   310  
Generation : 628 |  Avg Fitness :   130  |  Max Fitness :   305  
Generation : 629 |  Avg Fitness :   131  |  Max Fitness :   310  
Generation : 630 |  Avg Fitness :   112  |  Max Fitness :   308  
Generation : 631 |  Avg Fitness :   120  |  Max Fitness :   306  
Generation : 632 |  Avg Fitness :   137  |  Max Fitness :   298  
Generation : 633 |  Avg Fitness :   119  |  Max Fitness :   296  
Generation : 634 |  Avg Fitness :   121  |  Max Fitness :   307  
Generation : 635 |  Avg Fitness :   131  |  Max Fitness :   308  
Generation : 636 |  Avg Fitness :   124  |  Max Fitness :   315  
Generation : 637 |  Avg Fitness :   132  |  Max Fitness :   312  
Generation : 638 |  Avg Fitness :   120  |  Max Fitness :   313  
Generation : 639 |  Avg Fitness :   118  |  Max Fitness :   297  
Generation : 640 |  Avg Fitness :   114  |  Max Fitness :   294  
Generation

Generation : 751 |  Avg Fitness :   140  |  Max Fitness :   310  
Generation : 752 |  Avg Fitness :   141  |  Max Fitness :   292  
Generation : 753 |  Avg Fitness :   125  |  Max Fitness :   305  
Generation : 754 |  Avg Fitness :   149  |  Max Fitness :   311  
Generation : 755 |  Avg Fitness :   140  |  Max Fitness :   314  
Generation : 756 |  Avg Fitness :   134  |  Max Fitness :   321  
Generation : 757 |  Avg Fitness :   144  |  Max Fitness :   298  
Generation : 758 |  Avg Fitness :   140  |  Max Fitness :   307  
Generation : 759 |  Avg Fitness :   140  |  Max Fitness :   319  
Generation : 760 |  Avg Fitness :   142  |  Max Fitness :   309  
Generation : 761 |  Avg Fitness :   172  |  Max Fitness :   310  
Generation : 762 |  Avg Fitness :   139  |  Max Fitness :   317  
Generation : 763 |  Avg Fitness :   128  |  Max Fitness :   290  
Generation : 764 |  Avg Fitness :   135  |  Max Fitness :   300  
Generation : 765 |  Avg Fitness :   154  |  Max Fitness :   323  
Generation

Generation : 876 |  Avg Fitness :   169  |  Max Fitness :   309  
Generation : 877 |  Avg Fitness :   153  |  Max Fitness :   301  
Generation : 878 |  Avg Fitness :   144  |  Max Fitness :   302  
Generation : 879 |  Avg Fitness :   150  |  Max Fitness :   291  
Generation : 880 |  Avg Fitness :   129  |  Max Fitness :   308  
Generation : 881 |  Avg Fitness :   139  |  Max Fitness :   303  
Generation : 882 |  Avg Fitness :   147  |  Max Fitness :   307  
Generation : 883 |  Avg Fitness :   141  |  Max Fitness :   299  
Generation : 884 |  Avg Fitness :   152  |  Max Fitness :   314  
Generation : 885 |  Avg Fitness :   149  |  Max Fitness :   303  
Generation : 886 |  Avg Fitness :   148  |  Max Fitness :   305  
Generation : 887 |  Avg Fitness :   135  |  Max Fitness :   311  
Generation : 888 |  Avg Fitness :   130  |  Max Fitness :   290  
Generation : 889 |  Avg Fitness :   131  |  Max Fitness :   301  
Generation : 890 |  Avg Fitness :   142  |  Max Fitness :   306  
Generation

Played to get a reward of: 155.89025363565128
Played to get a reward of: 159.73754971424023
Played to get a reward of: 174.74280134438658
Played to get a reward of: 157.52656169364877
Played to get a reward of: 178.85305678909953
Played to get a reward of: 302.53386704478544
Played to get a reward of: 153.19926694447895
Played to get a reward of: 174.0380005287301
Played to get a reward of: 194.98647067296753
Played to get a reward of: 48.988845888097075
Played to get a reward of: 170.28678876199453
Played to get a reward of: 151.93248076903174
Played to get a reward of: 178.8036903177156
Played to get a reward of: 208.21600256418913
Played to get a reward of: 148.69969531794098
Played to get a reward of: 274.21653239317334
Played to get a reward of: 185.61406651075637
Played to get a reward of: 256.7337879038804
Played to get a reward of: 152.13647078648384
Played to get a reward of: 254.5049346330469
Played to get a reward of: 156.99262525845
Played to get a reward of: 151.7171841185