In [1]:
!apt-get install -y swig cmake
!pip install box2d-py==2.3.8
!pip install --no-cache-dir box2d-py
!pip install --upgrade gym

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
cmake is already the newest version (3.22.1-1ubuntu1.22.04.2).
Suggested packages:
  swig-doc swig-examples swig4.0-examples swig4.0-doc
The following NEW packages will be installed:
  swig swig4.0
0 upgraded, 2 newly installed, 0 to remove and 45 not upgraded.
Need to get 1,116 kB of archives.
After this operation, 5,542 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig4.0 amd64 4.0.2-1ubuntu1 [1,110 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig all 4.0.2-1ubuntu1 [5,632 B]
Fetched 1,116 kB in 1s (1,367 kB/s)
Selecting previously unselected package swig4.0.
(Reading database ... 121918 files and directories currently installed.)
Preparing to unpack .../swig4.0_4.0.2-1ubuntu1_amd64.deb ...
Unpacking swig4.0 (4.0.2-1ubuntu1) ...
Selecting previously unselected package swig.
Preparing to unpack .../swig_4.0.2-1ubu

In [2]:
# Import necessary libraries
import gym
import numpy as np
from concurrent.futures import ProcessPoolExecutor

# Define the genetic algorithm parameters
POPULATION_SIZE = 100
NUM_GENERATIONS = 50
MUTATION_RATE = 0.1

# Define the Q-learning parameters
LEARNING_RATE = 0.1
DISCOUNT_FACTOR = 0.99
EPSILON = 0.1

# Define the environment and algorithm settings
ENV_NAME = 'LunarLander-v2'
LEARNING_ALGORITHM = 'DQN'  # Choose from 'Q-learning', 'DQN', 'REINFORCE', 'ES'


In [3]:
# Function to create the environment
def create_environment(env_name):
    return gym.make(env_name)


In [4]:
# Define the Agent class
class Agent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.weights = np.random.rand(state_size, action_size)
        self.fitness = [0, 0]  # Initialize with two objectives: reward and steps

    def act(self, state):
        # Epsilon-greedy policy for exploration
        if np.random.rand() < EPSILON:
            return np.random.randint(self.action_size)
        else:
            q_values = np.dot(state, self.weights)
            return np.argmax(q_values)

    def learn(self, state, action, reward, next_state, done):
        # Update fitness based on reward and steps taken
        self.fitness[0] += reward
        self.fitness[1] += 1
        if done:
            self.fitness[1] += 1000  # Penalty for early termination


In [5]:
# Function to evaluate an agent
def evaluate_agent(agent, env):
    total_reward = 0
    total_steps = 0
    state = env.reset()
    done = False
    while not done:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.learn(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        total_steps += 1
    agent.fitness = [-total_reward, total_steps]  # Negative reward for minimization
    return agent.fitness


In [6]:
# Function to evolve agents
def evolve_agents(env, state_size, action_size):
    # Initialize population
    population = [Agent(state_size, action_size) for _ in range(POPULATION_SIZE)]

    for generation in range(NUM_GENERATIONS):
        # Evaluate each agent in parallel
        with ProcessPoolExecutor() as executor:
            fitness_results = list(executor.map(evaluate_agent, population, [env] * POPULATION_SIZE))

        # Perform adaptive fitness evaluation using surrogate models
        fitness_results = adapt_fitness_evaluation(fitness_results)

        # Select parents for reproduction based on fitness
        parents = select_parents(population, fitness_results)

        # Check if we have enough parents
        if len(parents) < 2:
            raise ValueError(f"Not enough parents selected: {len(parents)}. At least 2 needed.")

        # Generate offspring through crossover and mutation
        offspring = []
        while len(offspring) < POPULATION_SIZE:
            parent1, parent2 = np.random.choice(parents, size=2, replace=False)
            child = crossover(parent1, parent2)
            child = mutate(child)
            offspring.append(child)

        # Replace old population with offspring
        population = offspring

        # Perform dynamic hyperparameter tuning
        global MUTATION_RATE
        MUTATION_RATE = adjust_hyperparameters(generation)

        # Print average fitness for monitoring
        avg_reward = np.mean([fit[0] for fit in fitness_results])
        avg_steps = np.mean([fit[1] for fit in fitness_results])
        print(f"Generation {generation + 1}, Average Reward: {avg_reward}, Average Steps: {avg_steps}, Mutation Rate: {MUTATION_RATE}")

    # Return the best agent after evolution
    return max(population, key=lambda x: x.fitness[0])


In [7]:
# Function for adaptive fitness evaluation
def adapt_fitness_evaluation(fitness_results):
    # Example of adaptive fitness evaluation using surrogate models
    # Here, we simply scale the fitness values for demonstration purposes
    scaled_fitness = []
    max_reward = max(fit[0] for fit in fitness_results)
    max_steps = max(fit[1] for fit in fitness_results)
    for fit in fitness_results:
        scaled_reward = fit[0] / max_reward
        scaled_steps = fit[1] / max_steps
        scaled_fitness.append([scaled_reward, scaled_steps])
    return scaled_fitness


In [8]:
# Function to select parents based on fitness
def select_parents(population, fitness_results):
    # Select parents based on Pareto dominance
    parents = []
    for agent, fitness in zip(population, fitness_results):
        dominated = False
        for other_agent, other_fitness in zip(population, fitness_results):
            if agent is not other_agent and dominates(fitness, other_fitness):
                dominated = True
                break
        if not dominated:
            parents.append(agent)
    return parents


In [9]:
# Function to check Pareto dominance
def dominates(fitness1, fitness2):
    # Check if fitness1 dominates fitness2 in a Pareto sense
    return all(fit1 <= fit2 for fit1, fit2 in zip(fitness1, fitness2)) and any(fit1 < fit2 for fit1, fit2 in zip(fitness1, fitness2))


In [10]:
# Function to perform crossover between two parents
def crossover(parent1, parent2):
    # Perform single-point crossover
    crossover_point = np.random.randint(0, parent1.state_size)
    child_weights = np.concatenate([parent1.weights[:crossover_point], parent2.weights[crossover_point:]])
    child = Agent(parent1.state_size, parent1.action_size)
    child.weights = child_weights
    return child


In [11]:
# Function to mutate an agent
def mutate(agent):
    # Perform random mutation
    for i in range(agent.state_size):
        for j in range(agent.action_size):
            if np.random.rand() < MUTATION_RATE:
                agent.weights[i, j] += np.random.normal(scale=0.1)
    return agent


In [12]:
# Function to adjust hyperparameters
def adjust_hyperparameters(generation):
    # Example of dynamic hyperparameter tuning
    # Here, we define a custom rule to adjust mutation rate based on generation
    if generation < NUM_GENERATIONS / 2:
        return 0.1
    else:
        return 0.05


In [13]:
# Main function to execute the algorithm
def main():
    env = create_environment(ENV_NAME)
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    best_agent = evolve_agents(env, state_size, action_size)
    print("Best Agent Found!")
    evaluate_agent(best_agent, env)
    env.close()

# Run the main function if this is the main module
if __name__ == "__main__":
    main()


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.