In [7]:
import time, math, random, bisect
import gym
import numpy as np
import csv

In [45]:
MAX_GENERATIONS = 100
MAX_STEPS = 500 
POPULATION_COUNT = 200
MUTATION_RATE = 0.001
PLAY = 100

In [46]:
def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

class NeuralNet :
    
    def __init__(self, nodeCount):     
        self.fitness = 0
        self.nodeCount = nodeCount
        self.weights = []
        self.biases = []
        for i in range(len(nodeCount) - 1):
            self.weights.append( np.random.uniform(low=-1, high=1, size=(nodeCount[i], nodeCount[i+1])).tolist() )
            self.biases.append( np.random.uniform(low=-1, high=1, size=(nodeCount[i+1])).tolist())


    def printWeightsandBiases(self):
        
        print("--------------------------------")
        print("Weights :\n[", end="")
        for i in range(len(self.weights)):
            print("\n [ ", end="")
            for j in range(len(self.weights[i])):
                if j!=0:
                    print("\n   ", end="")
                print("[", end="")
                for k in range(len(self.weights[i][j])):
                    print(" %5.2f," % (self.weights[i][j][k]), end="")
                print("\b],", end="")
            print("\b ],")
        print("\n]")

        print("\nBiases :\n[", end="")
        for i in range(len(self.biases)):
            print("\n [ ", end="")
            for j in range(len(self.biases[i])):
                    print(" %5.2f," % (self.biases[i][j]), end="")
            print("\b],", end="")
        print("\b \n]\n--------------------------------\n")


  
    def getOutput(self, input):
        output = input
        for i in range(len(self.nodeCount)-1):
            output = np.reshape( np.matmul(output, self.weights[i]) + self.biases[i], (self.nodeCount[i+1]))
        return np.argmax(sigmoid(output))


class Population :

    def __init__(self, populationCount, mutationRate, nodeCount):
        self.nodeCount = nodeCount
        self.popCount = populationCount
        self.m_rate = mutationRate
        self.population = [ NeuralNet(nodeCount) for i in range(populationCount)]


    def createChild(self, nn1, nn2):
        
        child = NeuralNet(self.nodeCount)

        for i in range(len(child.weights)):
            for j in range(len(child.weights[i])):
                for k in range(len(child.weights[i][j])):
                    if random.random() < self.m_rate:
                        child.weights[i][j][k] = random.uniform(-1, 1)
                    else:
                        child.weights[i][j][k] = (nn1.weights[i][j][k] + nn2.weights[i][j][k])/2.0

        for i in range(len(child.biases)):
            for j in range(len(child.biases[i])):
                if random.random() < self.m_rate:
                    child.biases[i][j] = random.uniform(-1, 1)
                else:
                    child.biases[i][j] = (nn1.biases[i][j] + nn2.biases[i][j])/2.0

        return child


    def createNewGeneration(self):       
        nextGen = []
        fitnessSum = [0]
        for i in range(len(self.population)):
            fitnessSum.append(fitnessSum[i]+self.population[i].fitness)
        
        while(len(nextGen) < self.popCount):
            r1 = random.uniform(0, fitnessSum[len(fitnessSum)-1] )
            r2 = random.uniform(0, fitnessSum[len(fitnessSum)-1] )
            nn1 = self.population[bisect.bisect_right(fitnessSum, r1)-1]
            nn2 = self.population[bisect.bisect_right(fitnessSum, r2)-1]
            nextGen.append( self.createChild(nn1, nn2) )
        self.population.clear()
        self.population = nextGen


env = gym.make('CartPole-v1')
observation = env.reset()

in_dimen = env.observation_space.shape[0]
out_dimen = env.action_space.n
pop = Population(POPULATION_COUNT, MUTATION_RATE, [in_dimen, 8, 8, out_dimen])

bestNeuralNets = []

with open("Cartpole_GA_train_scores.csv", "w") as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    header = ["Generation", "Max. Fitness"]
    writer.writerow(header)
    for gen in range(MAX_GENERATIONS):
        genAvgFit = 0.0
        maxFit = 0.0
        maxNeuralNet = None
        for nn in pop.population:
            totalReward = 0

            for step in range(MAX_STEPS):
                action = nn.getOutput(observation)
                observation, reward, done, info = env.step(action)
                totalReward += reward
                if done:
                    observation = env.reset()
                    break
            nn.fitness = totalReward
            genAvgFit += nn.fitness
            if nn.fitness > maxFit :
                maxFit = nn.fitness
                maxNeuralNet = nn

        bestNeuralNets.append(maxNeuralNet)
        genAvgFit/=pop.popCount
        print("Generation : %3d |  Avg Fitness : %4.0f  |  Max Fitness : %4.0f  " % (gen+1, genAvgFit, maxFit) )

        writer.writerow([gen+1, genAvgFit])

        pop.createNewGeneration()


fitnesses = []
for i in range(len(bestNeuralNets)-1):
    fitnesses.append(bestNeuralNets[i].fitness)  

found = False

for i in range(0,len(bestNeuralNets)-1, -1):
    if not found:
        if bestNeuralNets[i] == max(fitnesses):
            best_nn = bestNeuralNets[i]
            found = True

            
with open("Cartpole_GA_play_scores.csv", "w") as csvfile2:
    writer = csv.writer(csvfile2, delimiter=',')
    header = ["Generation", "Score"]
    writer.writerow(header)
    print("-------------------------------------------------------------")
    for attempt in range(PLAY):
        rewards = 0
        for step in range(MAX_STEPS):
            action = best_nn.getOutput(observation)
            observation, reward, done, info = env.step(action)
            rewards += reward
            if done:
                observation = env.reset()
                break
       
        print("Generation : %3d |  Score : %4.0f  " % (attempt+1, rewards) )

        writer.writerow([attempt+1, rewards])
        
        

Generation :   1 |  Avg Fitness :   14  |  Max Fitness :  144  
Generation :   2 |  Avg Fitness :   18  |  Max Fitness :  247  
Generation :   3 |  Avg Fitness :   26  |  Max Fitness :  423  
Generation :   4 |  Avg Fitness :   44  |  Max Fitness :  500  
Generation :   5 |  Avg Fitness :   79  |  Max Fitness :  500  
Generation :   6 |  Avg Fitness :  150  |  Max Fitness :  500  
Generation :   7 |  Avg Fitness :  219  |  Max Fitness :  500  
Generation :   8 |  Avg Fitness :  294  |  Max Fitness :  500  
Generation :   9 |  Avg Fitness :  333  |  Max Fitness :  500  
Generation :  10 |  Avg Fitness :  379  |  Max Fitness :  500  
Generation :  11 |  Avg Fitness :  376  |  Max Fitness :  500  
Generation :  12 |  Avg Fitness :  388  |  Max Fitness :  500  
Generation :  13 |  Avg Fitness :  396  |  Max Fitness :  500  
Generation :  14 |  Avg Fitness :  405  |  Max Fitness :  500  
Generation :  15 |  Avg Fitness :  413  |  Max Fitness :  500  
Generation :  16 |  Avg Fitness :  428  

Generation :  59 |  Score :  500  
Generation :  60 |  Score :  500  
Generation :  61 |  Score :  500  
Generation :  62 |  Score :  500  
Generation :  63 |  Score :  500  
Generation :  64 |  Score :  500  
Generation :  65 |  Score :  500  
Generation :  66 |  Score :  500  
Generation :  67 |  Score :  500  
Generation :  68 |  Score :  500  
Generation :  69 |  Score :  500  
Generation :  70 |  Score :  500  
Generation :  71 |  Score :  500  
Generation :  72 |  Score :  500  
Generation :  73 |  Score :  500  
Generation :  74 |  Score :  500  
Generation :  75 |  Score :  500  
Generation :  76 |  Score :  500  
Generation :  77 |  Score :  500  
Generation :  78 |  Score :  500  
Generation :  79 |  Score :  500  
Generation :  80 |  Score :  500  
Generation :  81 |  Score :  500  
Generation :  82 |  Score :  500  
Generation :  83 |  Score :  500  
Generation :  84 |  Score :  500  
Generation :  85 |  Score :  500  
Generation :  86 |  Score :  500  
Generation :  87 |  