In [3]:
import numpy as np
import gymnasium as gym
import random
import pickle

# Hyperparameter
population_size = 100
num_generations = 20
mutation_rate = 0.1
crossover_rate = 0.5
elitism = True
elite_size = 5
hidden_size = 10  # Größe der versteckten Schicht

# Environment
env = gym.make('MountainCar-v0')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

def initialize_population(model_type='linear'):
    if model_type == 'linear':
        return [np.random.randn(state_size, action_size) for _ in range(population_size)]
    else:
        return [{
            'w1': np.random.randn(state_size, hidden_size),
            'b1': np.random.randn(hidden_size),
            'w2': np.random.randn(hidden_size, action_size),
            'b2': np.random.randn(action_size)
        } for _ in range(population_size)]

def forward_pass(state, chromosome):
    z1 = np.dot(state, chromosome['w1']) + chromosome['b1']
    a1 = np.tanh(z1)
    z2 = np.dot(a1, chromosome['w2']) + chromosome['b2']
    return z2

def fitness(chromosome, model_type='linear'):
    state, _ = env.reset()
    total_reward = 0
    for _ in range(200):
        if model_type == 'linear':
            action = np.argmax(np.dot(state, chromosome))
        else:
            action_values = forward_pass(state, chromosome)
            action = np.argmax(action_values)
        state, reward, done, _, _ = env.step(action)
        total_reward += reward
        if done:
            break
    return total_reward  # Note: total_reward is negative

def selection(population, fitnesses):
    min_fitness = min(fitnesses)
    offset = abs(min_fitness) + 1 if min_fitness < 0 else 0
    adjusted_fitnesses = [f + offset for f in fitnesses]
    selected = random.choices(population, weights=adjusted_fitnesses, k=len(population) - elite_size)
    return selected

def crossover(parent1, parent2, model_type='linear'):
    if model_type == 'linear':
        if random.random() < crossover_rate:
            point = random.randint(1, state_size - 1)
            child1 = np.vstack((parent1[:point], parent2[point:]))
            child2 = np.vstack((parent2[:point], parent1[point:]))
            return child1, child2
        else:
            return parent1, parent2
    else:
        child1, child2 = {}, {}
        for key in parent1.keys():
            if random.random() < crossover_rate:
                point = random.randint(1, parent1[key].shape[0] - 1)
                if parent1[key].ndim == 1:
                    child1[key] = np.concatenate((parent1[key][:point], parent2[key][point:]))
                    child2[key] = np.concatenate((parent2[key][:point], parent1[key][point:]))
                else:
                    child1[key] = np.vstack((parent1[key][:point, :], parent2[key][point:, :]))
                    child2[key] = np.vstack((parent2[key][:point, :], parent1[key][point:, :]))
            else:
                child1[key] = parent1[key].copy()
                child2[key] = parent2[key].copy()
        return child1, child2

def mutate(chromosome, model_type='linear'):
    if model_type == 'linear':
        if random.random() < mutation_rate:
            index = random.randint(0, state_size - 1)
            chromosome[index] = np.random.randn(action_size)
    else:
        for key in chromosome.keys():
            if random.random() < mutation_rate:
                index = random.randint(0, chromosome[key].shape[0] - 1)
                if chromosome[key].ndim == 1:
                    chromosome[key][index] = np.random.randn()
                else:
                    chromosome[key][index, :] = np.random.randn(chromosome[key].shape[1])
    return chromosome

def run_genetic_algorithm(model_type='linear'):
    population = initialize_population(model_type)
    best_fitness = -float('inf')
    best_chromosome = None

    for generation in range(num_generations):
        fitnesses = [fitness(chromosome, model_type) for chromosome in population]
        max_fitness = max(fitnesses)
        if max_fitness > best_fitness:
            best_fitness = max_fitness
            best_chromosome = population[np.argmax(fitnesses)].copy()  # Ensure we copy the best chromosome
        print(f'Generation {generation} | Best fitness: {max_fitness}')

        elite_indices = np.argsort(fitnesses)[-elite_size:]
        elites = [population[i] for i in elite_indices]

        selected_population = selection(population, fitnesses)
        next_population = []
        for i in range(0, len(selected_population) - 1, 2):
            parent1 = selected_population[i]
            parent2 = selected_population[i + 1]
            child1, child2 = crossover(parent1, parent2, model_type)
            next_population.extend([mutate(child1, model_type), mutate(child2, model_type)])

        next_population.extend(elites)

        if len(selected_population) % 2 != 0:
            next_population.append(selected_population[-1])

        population = next_population

    best_filename = f'best_chromosome_{model_type}.pkl'
    with open(best_filename, 'wb') as f:
        pickle.dump(best_chromosome, f)
    print('Best solution found:', best_chromosome)

def load_model(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

def run_model(chromosome, model_type='linear'):
    state, _ = env.reset()
    total_reward = 0
    for _ in range(200):
        env.render()
        if model_type == 'linear':
            action = np.argmax(np.dot(state, chromosome))
        else:
            action_values = forward_pass(state, chromosome)
            action = np.argmax(action_values)
        state, reward, done, _, _ = env.step(action)
        total_reward += reward
        if done:
            break
    env.close()
    return total_reward  # Total reward (negative value) reflects the number of steps

print("Running genetic algorithm for linear model...")
run_genetic_algorithm(model_type='linear')
print("Running genetic algorithm for neural network model...")
run_genetic_algorithm(model_type='nn')

print("Running the best linear model...")
best_chromosome_linear = load_model('best_chromosome_linear.pkl')
total_reward_linear = run_model(best_chromosome_linear, model_type='linear')
print('Total reward using the best linear model:', total_reward_linear)

print("Running the best neural network model...")
best_chromosome_nn = load_model('best_chromosome_nn.pkl')
total_reward_nn = run_model(best_chromosome_nn, model_type='nn')
print('Total reward using the best neural network model:', total_reward_nn)

Running genetic algorithm for linear model...
Generation 0 | Best fitness: -200.0
Generation 1 | Best fitness: -200.0
Generation 2 | Best fitness: -200.0
Generation 3 | Best fitness: -167.0
Generation 4 | Best fitness: -160.0
Generation 5 | Best fitness: -159.0
Generation 6 | Best fitness: -159.0
Generation 7 | Best fitness: -159.0
Generation 8 | Best fitness: -159.0
Generation 9 | Best fitness: -159.0
Generation 10 | Best fitness: -159.0
Generation 11 | Best fitness: -159.0
Generation 12 | Best fitness: -159.0
Generation 13 | Best fitness: -159.0
Generation 14 | Best fitness: -159.0
Generation 15 | Best fitness: -159.0
Generation 16 | Best fitness: -106.0
Generation 17 | Best fitness: -159.0
Generation 18 | Best fitness: -159.0
Generation 19 | Best fitness: -159.0
Best solution found: [[-0.29455541  0.67892938 -0.27584599]
 [-1.08599056  0.2301801   2.796704  ]]
Running genetic algorithm for neural network model...
Generation 0 | Best fitness: -200.0
Generation 1 | Best fitness: -200.

  gym.logger.warn(
