In [None]:
import genome
from genome import Genom, Node, crossover, distance
import numpy as np
import operator
import copy
from time import sleep
import gymnasium as gym
import torch
from operator import itemgetter
import random

In [None]:
env = gym.make("CartPole-v1")

## Erstellung eines Genoms mit einem Input- und zwei Output-Knoten

In [None]:
genom = Genom(2,1)

## Ausgabe der Knoten und Verbindungen

In [None]:
genom.p_nodes()
genom.p_connections()

## Visualisierung

In [None]:
genom.visualize(color="green")

## Forward Pass

In [None]:
genom.forward([1,1])

## Mutation

In [None]:
for _ in range(10):
    genom.mutate()
genom.visualize(labels=True, color="green")

In [None]:
genom.p_nodes()
genom.p_connections()

In [None]:
genom.forward([1,1])

## Erstellung von zwei neuen Genomen

In [None]:
genom1 = Genom(2,1)
genom2 = Genom(2,1)

In [None]:
genom1.visualize(color="red")
genom2.visualize(color="green")

## Crossover

In [None]:
cros_genom = crossover(genom1,genom2)
cros_genom.visualize(color="yellow")

In [None]:
for _ in range(5):
    genom2.mutate()

genom2.visualize(color="green")

In [None]:
cros_genom = crossover(genom1,genom2)
cros_genom.visualize(color="yellow")

In [None]:
cros_genom.p_nodes()
cros_genom.p_connections()

In [None]:
cros_genom.forward([1,1])

## Distanz

In [None]:
distance(genom1,genom2)

In [None]:
class Agent():
    
    def __init__(self, obs_dim = env.observation_space.shape, n_actions = env.action_space.n, 
                 eps = 0.05, eps_dec = 1e-5, eps_min = 0.05, popSize=20):
        
        self.obs_dim = obs_dim
        self.n_actions = n_actions
        self.action_space = [i for i in range(n_actions)]
        self.eps = eps
        self.eps_dec = eps_dec
        self.eps_min = eps_min
        self.popSize = popSize
        self.population = [(Genom(4,2),0) for _ in range(self.popSize)]


    #Choose the next action, based on the current state
    def pick_action(self, state, nn):
        if np.random.random() > self.eps:
            actions = [nn.forward(state)]
            action = max(actions, key=itemgetter(1))[0][0]
        else:
            action = np.random.choice(self.action_space)
        self.eps -= self.eps_dec if self.eps > self.eps_min else 0
        return action
    
    def train(self, epochs = 100):
        self.population = self.fitness(3, self.population)
        for i in range(epochs):
            
            #print frequenz
            if i % 25 == 0:
                print("Epoch: " + str(i), end=" ")
                print("Score: " +str(self.population[0][1]))

            #early break up
            if(self.population[0][1] >= 450):
               return self.population[0][0]
            
            #crossover
            random.shuffle(self.population)
            child_pop = []
            for i in range(0, len(self.population), 2):
                child_pop.append((crossover(self.population[i][0], self.population[i+1][0],
                                           self.population[i][1], self.population[i+1][1]), 0))
                
            #mutation
            self.mutation(child_pop)

            #calc fitness
            self.population += self.fitness(3, child_pop)

            #selection
            self.selection()

        return self.population[0][0]
            

    #Calculates the fitness of a population. The fitness is the average score after "reps" games 
    def fitness(self, reps, pop):
        new_pop = []
        for p in pop:
            sum = 0
            for _ in range(reps):
                sum += self.play(p[0])
            new_pop.append((p[0],sum/reps))
        return new_pop
    

    def selection(self):
        """Elitist Selection"""
        sortedPop = sorted(self.population, key=operator.itemgetter(1), reverse=True)
        self.population = sortedPop[:self.popSize]
            
    #mutates the childrens
    def mutation(self, children):
        for c in children:
            c[0].mutate()

    #play the game once and return the score
    def play(self, nn: Genom):
        obs, _ = env.reset()
        obs = torch.tensor(obs, dtype=torch.float)
        score = 0
        terminated = False
        truncated = False
        while not terminated and not truncated:
            action = self.pick_action(obs, nn)
            obs_, reward, terminated, truncated, _ = env.step(action)
            obs = torch.tensor(obs_, dtype=torch.float)
            score += 1
        return score
            

In [None]:
agent = Agent(popSize=20, eps=0.6)

In [None]:
best = agent.train(epochs=300)
best.p_connections()
best.p_nodes()
best.visualize()

In [None]:
class NN():

    def __init__(self, popSize = 2, n_In = 2, n_Out = 1) -> None:
        self.population = [(Genom(n_In,n_Out),0) for _ in range(popSize)] 
        self.popSize = popSize

    def forwardpass(self, index,x):
        return self.population[index][0].forward(x)

    def mutate(self,pop):
        for p in pop:
            p[0].mutate()

     #Calculates the fitness of a population. The fitness is the average score after "reps" games 
    def fitness(self, reps, pop):
        new_pop = []
        for p in pop:
            sum = 0
            for _ in range(reps):
                sum += self.play(p[0])
            new_pop.append((p[0],sum/reps))
        return new_pop
    

    def selection(self):
        """Elitist Selection"""
        sortedPop = sorted(self.population, key=operator.itemgetter(1), reverse=True)
        self.population = sortedPop[:self.popSize]

    def fitness(sel, pop):
        new_pop = []
        for p in pop:
            tmp = 0

            if np.round(p[0].forward([0,0])[0][1]) == 0:
                tmp += 0.25
            if np.round(p[0].forward([0,1])[0][1]) == 1:
                tmp += 0.25
            if np.round(p[0].forward([1,0])[0][1]) == 1:
                tmp += 0.25
            if np.round(p[0].forward([1,1])[0][1]) == 0:
                tmp += 0.25
            #print(genom.forward([0,0]))

            new_pop.append((p[0],tmp))

        return new_pop

            
    
    def train(self, epochs = 100):
        for i in range(epochs):
            
            print("Epoch: " + str(i), end=" ")
            print("Acc: " +str(self.population[0][1]))

            if(self.population[0][1] == 1):
               return self.population[0][0]
            '''for p in self.population:
                p[0].p_nodes()
                p[0].p_connections()'''

            #crossover
            random.shuffle(self.population)
            child_pop = []
            for i in range(0, len(self.population), 2):
                child_pop.append((crossover(self.population[i][0], self.population[i+1][0],
                                           self.population[i][1], self.population[i+1][1]), 0))
                
            self.mutate(child_pop)
            
            self.population += self.fitness(child_pop)

            self.selection()

        return self.population[0][0]
            
            

In [None]:
nn = NN(popSize=100)
best = nn.train()
best.visualize()
