In [1]:
import numpy as np
import gymnasium as gym
import random

# Hyperparameter
population_size = 100
num_generations = 200
mutation_rate = 0.1
crossover_rate = 0.5
elitism = True
elite_size = 5

# Environment
env = gym.make('MountainCar-v0')
state_size = env.observation_space.shape[0]
hidden_size = 10  # Größe der versteckten Schicht
action_size = env.action_space.n

# Initialize population
def initialize_population():
    return [{
        'w1': np.random.randn(state_size, hidden_size),
        'b1': np.random.randn(hidden_size),
        'w2': np.random.randn(hidden_size, action_size),
        'b2': np.random.randn(action_size)
    } for _ in range(population_size)]

# Forward pass of the neural network
def forward_pass(state, chromosome):
    z1 = np.dot(state, chromosome['w1']) + chromosome['b1']
    a1 = np.tanh(z1)
    z2 = np.dot(a1, chromosome['w2']) + chromosome['b2']
    return z2

# Fitness function
def fitness(chromosome):
    state, _ = env.reset()
    total_reward = 0
    for _ in range(200):
        action_values = forward_pass(state, chromosome)
        action = np.argmax(action_values)
        state, reward, done, _, _ = env.step(action)
        total_reward += reward
        if done:
            break
    return total_reward  # Note: total_reward is negative

# Selection
def selection(population, fitnesses):
    min_fitness = min(fitnesses)
    offset = abs(min_fitness) + 1 if min_fitness < 0 else 0
    adjusted_fitnesses = [f + offset for f in fitnesses]
    selected = random.choices(population, weights=adjusted_fitnesses, k=len(population) - elite_size)
    return selected

# Crossover
def crossover(parent1, parent2):
    child1 = {}
    child2 = {}
    for key in parent1.keys():
        if random.random() < crossover_rate:
            point = random.randint(1, parent1[key].shape[0] - 1)
            if parent1[key].ndim == 1:
                child1[key] = np.concatenate((parent1[key][:point], parent2[key][point:]))
                child2[key] = np.concatenate((parent2[key][:point], parent1[key][point:]))
            else:
                child1[key] = np.vstack((parent1[key][:point, :], parent2[key][point:, :]))
                child2[key] = np.vstack((parent2[key][:point, :], parent1[key][point:, :]))
        else:
            child1[key] = parent1[key].copy()
            child2[key] = parent2[key].copy()
    return child1, child2

# Mutation
def mutate(chromosome):
    for key in chromosome.keys():
        if random.random() < mutation_rate:
            index = random.randint(0, chromosome[key].shape[0] - 1)
            if chromosome[key].ndim == 1:
                chromosome[key][index] = np.random.randn()
            else:
                chromosome[key][index, :] = np.random.randn(chromosome[key].shape[1])
    return chromosome

# Main algorithm
population = initialize_population()
best_fitness = -float('inf')
best_chromosome = None

for generation in range(num_generations):
    fitnesses = [fitness(chromosome) for chromosome in population]
    max_fitness = max(fitnesses)
    if max_fitness > best_fitness:
        best_fitness = max_fitness
        best_chromosome = population[np.argmax(fitnesses)].copy()  # Ensure we copy the best chromosome
    print(f'Generation {generation} | Best fitness: {max_fitness}')
    
    # Apply elitism
    elite_indices = np.argsort(fitnesses)[-elite_size:]
    elites = [population[i] for i in elite_indices]
    
    selected_population = selection(population, fitnesses)
    next_population = []
    for i in range(0, len(selected_population) - 1, 2):
        parent1 = selected_population[i]
        parent2 = selected_population[i + 1]
        child1, child2 = crossover(parent1, parent2)
        next_population.extend([mutate(child1), mutate(child2)])
    
    # Add elites to the next population
    next_population.extend(elites)
    
    # Handle the case when the selected population size is odd
    if len(selected_population) % 2 != 0:
        next_population.append(selected_population[-1])

    population = next_population

# Save the best chromosome
np.save('best_chromosome_nn.npy', best_chromosome)
print('Best solution found:', best_chromosome)

# Example of using the saved model
env = gym.make('MountainCar-v0', render_mode='human')
def load_model():
    return np.load('best_chromosome_nn.npy', allow_pickle=True).item()

def run_model(chromosome):
    state, _ = env.reset()
    total_reward = 0
    for _ in range(200):
        env.render()
        action_values = forward_pass(state, chromosome)
        action = np.argmax(action_values)
        state, reward, done, _, _ = env.step(action)
        total_reward += reward
        if done:
            break
    env.close()
    return total_reward  # Total reward (negative value) reflects the number of steps

# Load the model and run it
best_chromosome = load_model()
total_reward = run_model(best_chromosome)
print('Total reward using the best model:', total_reward)

Generation 0 | Best fitness: -200.0
Generation 1 | Best fitness: -200.0
Generation 2 | Best fitness: -200.0
Generation 3 | Best fitness: -200.0
Generation 4 | Best fitness: -200.0
Generation 5 | Best fitness: -200.0
Generation 6 | Best fitness: -200.0
Generation 7 | Best fitness: -200.0
Generation 8 | Best fitness: -200.0
Generation 9 | Best fitness: -200.0
Generation 10 | Best fitness: -200.0
Generation 11 | Best fitness: -200.0
Generation 12 | Best fitness: -200.0
Generation 13 | Best fitness: -200.0
Generation 14 | Best fitness: -200.0
Generation 15 | Best fitness: -200.0
Generation 16 | Best fitness: -200.0
Generation 17 | Best fitness: -200.0
Generation 18 | Best fitness: -200.0
Generation 19 | Best fitness: -200.0
Generation 20 | Best fitness: -200.0
Generation 21 | Best fitness: -200.0
Generation 22 | Best fitness: -200.0
Generation 23 | Best fitness: -200.0
Generation 24 | Best fitness: -200.0
Generation 25 | Best fitness: -200.0
Generation 26 | Best fitness: -200.0
Generation 



Total reward using the best model: -103.0
