the code in this notebook is from:
https://gist.github.com/DollarAkshay/e6fe84fdf721db2731948e8461092e5a

In [32]:
import time, math, random, bisect, copy
import gym
import csv
import numpy as np

In [33]:
class NeuralNet : 
    def __init__(self, nodeCount):     
        self.fitness = 0.0
        self.nodeCount = nodeCount
        self.weights = []
        self.biases = []
        for i in range(len(nodeCount) - 1):
            self.weights.append( np.random.uniform(low=-1, high=1, size=(nodeCount[i], nodeCount[i+1])).tolist() )
            self.biases.append( np.random.uniform(low=-1, high=1, size=(nodeCount[i+1])).tolist())


    def printWeightsandBiases(self):
        
        print("--------------------------------")
        print("Weights :\n[", end="")
        for i in range(len(self.weights)):
            print("\n [ ", end="")
            for j in range(len(self.weights[i])):
                if j!=0:
                    print("\n   ", end="")
                print("[", end="")
                for k in range(len(self.weights[i][j])):
                    print(" %5.2f," % (self.weights[i][j][k]), end="")
                print("\b],", end="")
            print("\b ],")
        print("\n]")

        print("\nBiases :\n[", end="")
        for i in range(len(self.biases)):
            print("\n [ ", end="")
            for j in range(len(self.biases[i])):
                    print(" %5.2f," % (self.biases[i][j]), end="")
            print("\b],", end="")
        print("\b \n]\n--------------------------------\n")
  
    def getOutput(self, input):
        output = input
        for i in range(len(self.nodeCount)-1):
            output = np.reshape( np.matmul(output, self.weights[i]) + self.biases[i], (self.nodeCount[i+1]))
        return np.argmax(output)

In [34]:
class Population :
    def __init__(self, populationCount, mutationRate, nodeCount):
        self.nodeCount = nodeCount
        self.popCount = populationCount
        self.m_rate = mutationRate
        self.population = [ NeuralNet(nodeCount) for i in range(populationCount)]


    def createChild(self, nn1, nn2):
        
        child = NeuralNet(self.nodeCount)
        for i in range(len(child.weights)):
            for j in range(len(child.weights[i])):
                for k in range(len(child.weights[i][j])):
                    if random.random() > self.m_rate:
                        if random.random() < nn1.fitness / (nn1.fitness+nn2.fitness):
                            child.weights[i][j][k] = nn1.weights[i][j][k]
                        else :
                            child.weights[i][j][k] = nn2.weights[i][j][k]
                        

        for i in range(len(child.biases)):
            for j in range(len(child.biases[i])):
                if random.random() > self.m_rate:
                    if random.random() < nn1.fitness / (nn1.fitness+nn2.fitness):
                        child.biases[i][j] = nn1.biases[i][j]
                    else:
                        child.biases[i][j] = nn2.biases[i][j]

        return child


    def createNewGeneration(self, bestNN):    
        nextGen = []
        self.population.sort(key=lambda x: x.fitness, reverse=True)
        for i in range(self.popCount):
            if random.random() < float(self.popCount-i)/self.popCount:
                nextGen.append(copy.deepcopy(self.population[i]));

        fitnessSum = [0]
        minFit = min([i.fitness for i in nextGen])
        for i in range(len(nextGen)):
            fitnessSum.append(fitnessSum[i]+(nextGen[i].fitness-minFit)**4)
        

        while(len(nextGen) < self.popCount):
            r1 = random.uniform(0, fitnessSum[len(fitnessSum)-1] )
            r2 = random.uniform(0, fitnessSum[len(fitnessSum)-1] )
            nn1 = nextGen[bisect.bisect_right(fitnessSum, r1)-1]
            nn2 = nextGen[bisect.bisect_right(fitnessSum, r2)-1]
            nextGen.append( self.createChild(nn1, nn2) )
        self.population.clear()
        self.population = nextGen

In [None]:
def replayBestBot(bestNeuralNets):  
    fitness_bb = -1000
    generation = 0
    for i in range(len(bestNeuralNets)):
        if bestNeuralNets[i].fitness > fitness_bb:
            fitness_bb = bestNeuralNets[i].fitness
            generation = i
            best_bot = copy.deepcopy(bestNeuralNets[i])
    
    print("Generation %3d had a best fitness: %4d" % (generation, fitness_bb))
    with open("LUNAR_GA_play_scores.csv", "w") as csvfile:
        header = ["attempt", "score"]
        writer = csv.writer(csvfile, delimiter=',')
        writer.writerow(header)
        
        for attempt in range(PLAY):
            reward_total = 0
            observation = env.reset()
            for step in range(MAX_STEPS):
                action = best_bot.getOutput(observation)
                observation, reward, done, info = env.step(action)
                reward_total += reward
                if done:
                    observation = env.reset()
                    break
            writer.writerow([attempt+1, reward_total])
            print("Played to get a reward of:", reward_total)
            
GAME = 'LunarLander-v2'
MAX_STEPS = 200
MAX_GENERATIONS = 1000
PLAY = 100
POPULATION_COUNT = 100
MUTATION_RATE = 0.001

env = gym.make(GAME)
observation = env.reset()
in_dimen = env.observation_space.shape[0]
out_dimen = env.action_space.n
obsMin = env.observation_space.low
obsMax = env.observation_space.high
actionMin = 0
actionMax = env.action_space.n
pop = Population(POPULATION_COUNT, MUTATION_RATE, [in_dimen, 13, 8, 13, out_dimen])
bestNeuralNets = []

with open("LUNAR_GA_train_scores.csv", "w") as csvfile:
    header = ["generation", "average fitness", "maximum fitness"]
    writer = csv.writer(csvfile, delimiter=',')
    writer.writerow(header)

    for gen in range(MAX_GENERATIONS):
        genAvgFit = 0.0
        maxFit = -100000000
        maxNeuralNet = None
        for nn in pop.population:
            totalReward = 0
            for step in range(MAX_STEPS):
                action = nn.getOutput(observation)
                observation, reward, done, info = env.step(action)
                totalReward += reward
                if done:
                    observation = env.reset()
                    break

            nn.fitness = totalReward
            genAvgFit += nn.fitness
            if nn.fitness > maxFit :
                maxFit = nn.fitness
                maxNeuralNet = copy.deepcopy(nn);

        bestNeuralNets.append(maxNeuralNet)
        genAvgFit/=pop.popCount
        print("Generation : %3d |  Avg Fitness : %5.0f  |  Max Fitness : %5.0f  " % (gen+1, genAvgFit, maxFit) )
        writer.writerow([gen+1, genAvgFit, maxFit])
        pop.createNewGeneration(maxNeuralNet)

replayBestBot(bestNeuralNets)

Generation :   1 |  Avg Fitness :  -488  |  Max Fitness :   -63  
Generation :   2 |  Avg Fitness :  -429  |  Max Fitness :    13  
Generation :   3 |  Avg Fitness :  -397  |  Max Fitness :   -24  
Generation :   4 |  Avg Fitness :  -363  |  Max Fitness :   -34  
Generation :   5 |  Avg Fitness :  -382  |  Max Fitness :    10  
Generation :   6 |  Avg Fitness :  -346  |  Max Fitness :    45  
Generation :   7 |  Avg Fitness :  -293  |  Max Fitness :    39  
Generation :   8 |  Avg Fitness :  -254  |  Max Fitness :    24  
Generation :   9 |  Avg Fitness :  -277  |  Max Fitness :    20  
Generation :  10 |  Avg Fitness :  -269  |  Max Fitness :   243  
Generation :  11 |  Avg Fitness :  -278  |  Max Fitness :    46  
Generation :  12 |  Avg Fitness :  -221  |  Max Fitness :    18  
Generation :  13 |  Avg Fitness :  -257  |  Max Fitness :    -2  
Generation :  14 |  Avg Fitness :  -258  |  Max Fitness :     3  
Generation :  15 |  Avg Fitness :  -225  |  Max Fitness :    19  
Generation