From b74ca434bfa074c5de115e7f54d99bf89b6797a6 Mon Sep 17 00:00:00 2001 From: Nihat Engin Toklu Date: Tue, 5 Mar 2024 02:57:53 +0100 Subject: [PATCH] Add a notebook demonstrating how to evolve objects This commit adds an example notebook which demonstrates how one can declare and solve an optimization problem that has its `dtype` set as `object`, and therefore has a custom solution structure. While working on the example notebook, one bug, and one genetic algorithm-related shortcoming have been identified and fixed. The bug was preventing one from instantiating a new `ObjectArray` with the help of `evotorch.tools.as_tensor(..., dtype=object)`. This commit fixes this bug. The shortcoming was as follows: when the user defined a custom mutation operator using a regular function, `GeneticAlgorithm` assumed that the regular function would return a new batch of solutions whose length is equal to the number of solutions it received. This was an unnecessary restriction, and therefore, got fixed with this commit. --- examples/notebooks/Evolving_Objects.ipynb | 787 ++++++++++++++++++++++ examples/notebooks/README.md | 1 + src/evotorch/algorithms/ga.py | 20 +- src/evotorch/tools/misc.py | 6 +- 4 files changed, 808 insertions(+), 6 deletions(-) create mode 100644 examples/notebooks/Evolving_Objects.ipynb diff --git a/examples/notebooks/Evolving_Objects.ipynb b/examples/notebooks/Evolving_Objects.ipynb new file mode 100644 index 00000000..ca2e88c5 --- /dev/null +++ b/examples/notebooks/Evolving_Objects.ipynb @@ -0,0 +1,787 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "471df805-4945-4039-bf8a-b1fcfc50e990", + "metadata": {}, + "source": [ + "# Evolving Objects\n", + "\n", + "This notebook shows how one can express solutions that are not entirely numeric, and that differ in structure and/or length. To express such solutions, one can set the `dtype` of the problem at hand as `object`, which is demonstrated in this notebook.\n", + "\n", + "## Task\n", + "\n", + "In this example notebook, we aim to evolve a neural network policy for the MuJoCo Ant environment provided by the `gymnasium` library.\n", + "\n", + "## Implementation details\n", + "\n", + "**Genetic Algorithm.**\n", + "We use a simple genetic algorithm, inspired by the neuroevolution studies `[1]` and `[2]`. This simple genetic algorithm has neither cross-over nor tournament selection. We only rely on a mutation operator. The default population size in this notebook is 4. At each generation, 16 mutated candidate solutions are generated, giving us an extended population of size 20. The best 4 solutions within this extended population is then selected for the next generation.\n", + "\n", + "**How a solution is encoded and decoded.**\n", + "In `[1]`, a solution is expressed via a list of random seeds. These random seeds are decoded into a parameter vector to fill an entire neural network, by generating pseudo-random Gaussian noise out of each seed, and then applying a final summation upon these generated noise vectors.\n", + "In `[2]`, parameters of three modules are evolved: vision, memory, and policy. A solution includes a separate list of seeds for each module (where each list of seeds is decoded to the associated module's parameters like in `[1]`).\n", + "\n", + "In this notebook, like in `[1]`, we evolve a single module, which is a policy based on a feed-forward neural network. Additionally, like how it was done in `[2]` for multiple modules, we keep a different list of seeds for each parameter tensor of our policy neural network.\n", + "\n", + "The structure of a solution can be summarized like this:\n", + "\n", + "```\n", + "[\n", + " [ ... random seeds for parameter tensor0 ... ],\n", + " [ ... random seeds for parameter tensor1 ... ],\n", + " ...\n", + "]\n", + "```\n", + "\n", + "**Action binning.**\n", + "In this example, by default, we apply binning on the actions generated by the feed-forward network, the two bins being -0.2 and +0.2.\n", + "\n", + "**How a solution is mutated.**\n", + "To mutate a solution, we pick one of the contained sublists, and add another random seed into the picked sublist. By this, we end up adding further noise onto the pickled sublist's associated parameter tensor.\n", + "\n", + "## Please note\n", + "\n", + "Although this example is inspired by the studies `[1]` and `[2]`, the implementation here does not strictly follow what is proposed in those studies. Also, we do not claim that the particular implementation we show in this notebook is a competitive and efficient way of doing neuro-evolution. Still, you might find it interesting that this approach can evolve primitive gaits for the `Ant-v4` environment, using a small population (`popsize=4` and `num_mutated_solutions=16`, 10 episodes when evaluating each solution), over a small number of generations (30).\n", + "\n", + "Most importantly, we hope this notebook will serve as a tutorial showing how one can define problems whose solutions are not necessarily numeric, and/or whose lengths are not fixed.\n", + "\n", + "---\n", + "\n", + "`[1]` Felipe Petroski Such, Vashisht Madhavan, Edoardo Conti, Joel Lehman, Kenneth O. Stanley, Jeff Clune (2018). [Deep Neuroevolution: Genetic Algorithms Are a Competitive Alternative for Training Deep Neural Networks for Reinforcement Learning](https://arxiv.org/abs/1712.06567).\n", + "\n", + "`[2]` Sebastian Risi, Kenneth O. Stanley (2019). [Deep Neuroevolution of Recurrent and Discrete World Models](https://arxiv.org/abs/1906.08857).\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "b3195d2e-bdaa-41b8-aafd-45b1b0d9e6f1", + "metadata": {}, + "source": [ + "We begin with importing the necessary libraries:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e296cf9d-86c2-4952-9df3-318e04866766", + "metadata": {}, + "outputs": [], + "source": [ + "from evotorch import Problem, Solution, SolutionBatch\n", + "from evotorch.algorithms import GeneticAlgorithm\n", + "from evotorch.tools import ObjectArray, as_tensor\n", + "from evotorch.logging import StdOutLogger, PicklingLogger\n", + "\n", + "import numpy as np\n", + "import gymnasium as gym\n", + "import pickle\n", + "import os\n", + "from typing import Iterable, Optional, Union\n", + "from datetime import datetime\n", + "import torch\n", + "from torch import nn\n", + "from copy import deepcopy" + ] + }, + { + "cell_type": "markdown", + "id": "a8d4cdc7-555e-48f6-be8a-cca47ac41173", + "metadata": {}, + "source": [ + "Below are utility functions for generating and loading observation normalization data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70c6cebe-435a-40ba-a6ef-6e217a701cc1", + "metadata": {}, + "outputs": [], + "source": [ + "def env_name_to_file_name(env_name: str) -> str:\n", + " \"\"\"\n", + " Convert the gymnasium environment ID to a more file-name-friendly counterpart.\n", + "\n", + " The character ':' in the input string will be replaced with '__colon__'.\n", + " Similarly, the character '/' in the input string will be replaced with '__slash__'.\n", + "\n", + " Args:\n", + " env_name: gymnasium environment ID\n", + " Returns:\n", + " File-name-friendly counterpart of the input string.\n", + " \"\"\"\n", + " result = env_name\n", + " result = result.replace(\":\", \"__colon__\")\n", + " result = result.replace(\"/\", \"__slash__\")\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6afff2a-e216-4141-ab99-1d90cc4d6043", + "metadata": {}, + "outputs": [], + "source": [ + "def create_obs_data(\n", + " *,\n", + " env_name: str,\n", + " num_timesteps: int,\n", + " report_interval: Union[int, float] = 5,\n", + " seed: int = 0,\n", + ") -> tuple:\n", + " \"\"\"\n", + " Create observation normalization data with the help of random actions.\n", + "\n", + " This function creates a gymnasium environment from the given `env_name`.\n", + " Then, it keeps sending random actions to this environment, and collects stats from the observations.\n", + "\n", + " Args:\n", + " env_name: ID of the gymnasium environment\n", + " num_timesteps: For how many timesteps will the function operate on the environment\n", + " report_interval: Time interval, in seconds, for reporting the status\n", + " seed: A seed that will be used for regulating the randomness of both the environment\n", + " and of the random actions.\n", + " Returns:\n", + " A tuple of the form `(mean, stdev)`, where `mean` is the elementwise mean of the observation vectors,\n", + " and `stdev` is the elementwise standard deviation of the observation vectors.\n", + " \"\"\"\n", + " print(\"Creating observation data for\", env_name)\n", + "\n", + " class accumulated:\n", + " sum: Optional[np.ndarray] = None\n", + " sum_of_squares: Optional[np.ndarray] = None\n", + " count: int = 0\n", + "\n", + " def accumulate(obs: np.ndarray):\n", + " if accumulated.sum is None:\n", + " accumulated.sum = obs.copy()\n", + " else:\n", + " accumulated.sum += obs\n", + "\n", + " squared = obs ** 2\n", + " if accumulated.sum_of_squares is None:\n", + " accumulated.sum_of_squares = squared\n", + " else:\n", + " accumulated.sum_of_squares += squared\n", + "\n", + " accumulated.count += 1\n", + "\n", + " rndgen = np.random.RandomState(seed)\n", + "\n", + " env = gym.make(env_name)\n", + " assert isinstance(env.action_space, gym.spaces.Box), \"Can only work with Box action spaces\"\n", + "\n", + " def reset_env() -> tuple:\n", + " return env.reset(seed=rndgen.randint(2 ** 32))\n", + "\n", + " action_gap = env.action_space.high - env.action_space.low\n", + " def sample_action() -> np.ndarray:\n", + " return (rndgen.rand(*(env.action_space.shape)) * action_gap) + env.action_space.low\n", + "\n", + " observation, _ = reset_env()\n", + " accumulate(observation)\n", + "\n", + " last_report_time = datetime.now()\n", + "\n", + " for t in range(num_timesteps):\n", + " action = sample_action()\n", + " observation, _, terminated, truncated, _ = env.step(action)\n", + " accumulate(observation)\n", + "\n", + " done = terminated | truncated\n", + " if done:\n", + " observation, info = reset_env()\n", + " accumulate(observation)\n", + "\n", + " tnow = datetime.now()\n", + " if (tnow - last_report_time).total_seconds() > report_interval:\n", + " print(\"Number of timesteps:\", t, \"/\", num_timesteps)\n", + " last_report_time = tnow\n", + "\n", + " E_x = accumulated.sum / accumulated.count\n", + " E_x2 = accumulated.sum_of_squares / accumulated.count\n", + "\n", + " mean = E_x\n", + " variance = np.maximum(E_x2 - ((E_x) ** 2), 1e-2)\n", + " stdev = np.sqrt(variance)\n", + "\n", + " print(\"Done.\")\n", + " \n", + " return mean, stdev" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebaae4a9-4013-404a-a5af-530dbf5d53ed", + "metadata": {}, + "outputs": [], + "source": [ + "def get_obs_data(env_name: str, num_timesteps: int = 50000, seed: int = 0) -> tuple:\n", + " \"\"\"\n", + " Generate observation normalization data for the gymnasium environment whose name is given.\n", + "\n", + " If such normalization data was already generated and saved into a pickle file, that pickle file will be loaded.\n", + " Otherwise, new normalization data will be generated and saved into a new pickle file.\n", + "\n", + " Args:\n", + " env_name: ID of the gymnasium environment\n", + " num_timesteps: For how many timesteps will the observation collector operate on the environment\n", + " seed: A seed that will be used for regulating the randomness of both the environment\n", + " and of the random actions.\n", + " Returns:\n", + " A tuple of the form `(mean, stdev)`, where `mean` is the elementwise mean of the observation vectors,\n", + " and `stdev` is the elementwise standard deviation of the observation vectors.\n", + " \"\"\"\n", + " num_timesteps = int(num_timesteps)\n", + " envfname = env_name_to_file_name(env_name)\n", + " fname = f\"obs_seed{seed}_t{num_timesteps}_{envfname}.pickle\"\n", + " if os.path.isfile(fname):\n", + " with open(fname, \"rb\") as f:\n", + " return pickle.load(f)\n", + " else:\n", + " obsdata = create_obs_data(env_name=env_name, num_timesteps=num_timesteps, seed=seed)\n", + " with open(fname, \"wb\") as f:\n", + " pickle.dump(obsdata, f)\n", + " return obsdata" + ] + }, + { + "cell_type": "markdown", + "id": "0140ac43-4c60-42ee-80b6-eb69516b5625", + "metadata": {}, + "source": [ + "Below is the environment ID of the MuJoCo Ant environment that we are targeting.\n", + "To try this notebook on a different environment, one can change the value of `ENV_NAME` below.\n", + "Please note, however, that different environments will most probably require different hyperparameter settings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d29932a-c56d-4147-b1b9-db046f9f726f", + "metadata": {}, + "outputs": [], + "source": [ + "ENV_NAME = \"Ant-v4\"" + ] + }, + { + "cell_type": "markdown", + "id": "a8fd4bdf-202d-4796-a45b-24b30fc15f95", + "metadata": {}, + "source": [ + "Below, we generate (or load) observation normalization data for the environment:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "626d76d1-05ce-45ab-8b1a-cb6490aece1d", + "metadata": {}, + "outputs": [], + "source": [ + "obs_mean, obs_stdev = get_obs_data(ENV_NAME)\n", + "obs_mean, obs_stdev" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "215d251c-62c7-4bff-957e-68447c44ce97", + "metadata": {}, + "outputs": [], + "source": [ + "class PolicySearchProblem(Problem):\n", + " \"\"\"\n", + " A policy search problem where solutions are expressed as lists of sublists of seeds.\n", + "\n", + " This problem class can be seen as a simplified re-implementation of\n", + " `evotorch.neuroevolution.GymNE`, the main difference being that a policy (a solution)\n", + " is represented via a list of sublists of seeds, instead of vectors of real numbers.\n", + "\n", + " Among the simplifications compared to `evotorch.neuroevolution.GymNE` are:\n", + "\n", + " - There is no online observation normalization. Instead, a static normalization data is\n", + " given at the moment of initialization.\n", + " - Discrete-actioned environments are not considered here. Both observations and actions\n", + " are assumed to be in the form of 1-dimensional numeric arrays.\n", + " \"\"\"\n", + " def __init__(\n", + " self,\n", + " *,\n", + " env_name: str,\n", + " obs_mean: Optional[np.ndarray] = None,\n", + " obs_stdev: Optional[np.ndarray] = None,\n", + " mutation_power: float = 0.5,\n", + " mutation_decay: float = 0.9,\n", + " min_mutation_power: float = 0.05,\n", + " hidden_sizes: tuple = (64,),\n", + " bin: Optional[float] = 0.2,\n", + " num_episodes: int = 5,\n", + " decrease_rewards_by: Optional[float] = 1.0,\n", + " num_actors: Optional[Union[int, str]] = 4, # \"max\"\n", + " ):\n", + " \"\"\"\n", + " `__init__(...)`: Initialize the `PolicySearchProblem`.\n", + "\n", + " Args:\n", + " env_name: ID of the gymnasium environment, as a string.\n", + " obs_mean: Mean vector for the observations of the environment.\n", + " obs_stdev: Standard deviation vector for the observations of the environment.\n", + " mutation_power: Multiplier that affects the magnitude of the pseudo-random mutation noise.\n", + " mutation_decay: As more seeds are added to a sublist of a solution, the magnitude of the pseudo-random\n", + " mutation can be decayed with the help of this argument. If `mutation_decay` is 1.0, there will be no\n", + " decaying. If `mutation_decay` is 0.9, the magnitude of the pseudo-random noise is decayed via\n", + " multiplication by 0.9 with each new seed.\n", + " min_mutation_power: Lower bound for the mutation power, to prevent it from decaying too much.\n", + " hidden_sizes: A tuple containing the sizes for the hidden layers of the network.\n", + " The integers within this tuple determines the layer sizes, and the number of elements within this\n", + " tuple determines the number of hidden layers of the feedforward policy network.\n", + " bin: When given as a real number `r`, the action vector generated by the policy network will be\n", + " subject to binning, the two action bins being `-r` and `+r`. Can be left as None if action binning\n", + " is not desired.\n", + " num_episodes: For how many episodes will a solution be tested.\n", + " decrease_rewards_by: When given as a real number, each reward will be decreased by this amount for each\n", + " timestep. Can be used to nullify the alive bonuses of the environment.\n", + " This feature can only be used when the actual rewards of the environment is used. Therefore,\n", + " if `eval_by_x` or `eval_by_distance` is True, this argument must be left as None.\n", + " num_actors: Number of actors to be created for parallelized evaluation.\n", + " If given as \"max\", all available CPUs will be used.\n", + " \"\"\"\n", + " self._env_name = str(env_name)\n", + "\n", + " self._env: Optional[gym.Env] = None\n", + " self._policy: Optional[nn.Module] = None\n", + " self._policy_dtype: Optional[torch.dtype] = None\n", + "\n", + " self._mutation_power: float = float(mutation_power)\n", + " self._mutation_decay: float = float(mutation_decay)\n", + " self._min_mutation_power: float = float(min_mutation_power)\n", + "\n", + " self._hidden_sizes: tuple = tuple(int(item) for item in hidden_sizes)\n", + " self._num_episodes: int = int(num_episodes)\n", + "\n", + " if bin is None:\n", + " self._bin_ub = None\n", + " self._bin_lb = None\n", + " else:\n", + " self._bin_ub = abs(float(bin))\n", + " self._bin_lb = -(self._bin_ub)\n", + "\n", + " self._num_layers: int = sum(1 for _ in self._make_neural_network(1, 1).parameters())\n", + " self._obs_mean: Optional[np.ndarray] = obs_mean\n", + " self._obs_stdev: Optional[np.ndarray] = obs_stdev\n", + "\n", + " self._decrease_rewards_by = None if decrease_rewards_by is None else float(decrease_rewards_by)\n", + "\n", + " # Please notice how we set the `dtype` of the problem as `object`.\n", + " # Having `dtype=object` causes this Problem instance to work with `ObjectArray` instances, rather than\n", + " # PyTorch tensors.\n", + " # An ObjectArray can store `None`s, lists, dictionaries, strings, floats, ints, numpy arrays, and PyTorch\n", + " # tensors within itself.\n", + " # In our example, each solution will be a list of lists, where the sublists contain integers representing\n", + " # random seeds.\n", + "\n", + " super().__init__(\n", + " objective_sense=\"max\",\n", + " dtype=object,\n", + " num_actors=num_actors,\n", + " )\n", + "\n", + " def _fill(self, values: ObjectArray):\n", + " # Fill the given `values` with newly created solutions.\n", + " # Each item within `values` represents the decision values for another solution.\n", + " n = len(values)\n", + " all_seeds = np.random.randint(2 ** 32, size=(n, self._num_layers))\n", + " values[:] = [\n", + " [[seed] for seed in seeds_for_layers]\n", + " for seeds_for_layers in all_seeds\n", + " ]\n", + "\n", + " def _get_env(self) -> gym.Env:\n", + " # Get the gymnasium environment instance.\n", + " # If the environment is not created yet, create and return it.\n", + " if self._env is None:\n", + " self._env = gym.make(self._env_name)\n", + " assert isinstance(self._env.observation_space, gym.spaces.Box)\n", + " assert isinstance(self._env.action_space, gym.spaces.Box)\n", + " return self._env\n", + "\n", + " def _make_neural_network(self, input_size: int, output_size: int) -> nn.Module:\n", + " from evotorch.neuroevolution.net.layers import Bin\n", + " # Make the neural network policy in the form of an `nn.Module` instance.\n", + " sizes = (input_size,) + self._hidden_sizes + (output_size,)\n", + " submodules = []\n", + " last_i = len(sizes) - 2\n", + " for i in range(1 + last_i):\n", + " if i > 0:\n", + " submodules.append(nn.Tanh())\n", + " submodules.append(nn.Linear(sizes[i], sizes[i + 1]))\n", + " if (self._bin_lb is not None) and (self._bin_ub is not None):\n", + " submodules.append(Bin(self._bin_lb, self._bin_ub))\n", + " return nn.Sequential(*submodules)\n", + " \n", + " def _get_policy(self, solution: Optional[Iterable] = None) -> nn.Module:\n", + " # Get the policy neural network.\n", + " # If the policy neural network is not created yet, create it with the help of the internal method\n", + " # `make_neural_network(...)`\n", + " # If `solution` is given, decode the `solution` into parameter tensors, and parameterize the neural network\n", + " # policy with these tensors.\n", + "\n", + " if self._policy is None:\n", + " env = self._get_env()\n", + " [obs_length] = env.observation_space.shape\n", + " [act_length] = env.action_space.shape\n", + " self._policy = self._make_neural_network(obs_length, act_length)\n", + "\n", + " for p in self._policy.parameters():\n", + " self._policy_dtype = p.dtype\n", + " break\n", + "\n", + " if solution is not None:\n", + " if isinstance(solution, Solution):\n", + " # Get the decision values stored by the `solution`.\n", + " # In the case of this example, given how we initially `_fill` our solutions,\n", + " # we should get a list of lists of random seeds.\n", + " # Note that the `.values` property gives us the decision values in a read-only form.\n", + " # Therefore, the \"list\"s we get here are actually `ImmutableList` instances.\n", + " # Within this method, having these lists in read-only form should not break anything, because\n", + " # we just want to read and decode them.\n", + " solution = solution.values\n", + " elif isinstance(solution, SolutionBatch):\n", + " raise TypeError(\n", + " \"Cannot fill the policy network from a `SolutionBatch` (which could contain multiple solutions).\"\n", + " \" Please try again with a single `Solution` object.\"\n", + " )\n", + "\n", + " seeds_for_all_layers = solution\n", + " parameters_for_all_layers = list(self._policy.parameters())\n", + " assert len(seeds_for_all_layers) == len(parameters_for_all_layers)\n", + "\n", + " with torch.no_grad():\n", + " for seeds, parameter_tensor in zip(seeds_for_all_layers, parameters_for_all_layers):\n", + " parameter_tensor[:] = 0\n", + " mutation_power = self._mutation_power\n", + " for seed in seeds:\n", + " parameter_tensor += torch.as_tensor(\n", + " (np.random.RandomState(seed).randn(*(parameter_tensor.shape)) * mutation_power),\n", + " dtype=self._policy_dtype,\n", + " )\n", + " mutation_power = max(mutation_power * self._mutation_decay, self._min_mutation_power)\n", + " \n", + " return self._policy\n", + "\n", + " @torch.no_grad()\n", + " def to_torch_module(self, solution: Iterable) -> nn.Module:\n", + " \"\"\"\n", + " Get an independent (cloned) policy neural network, parameterized via decoding the given `solution`.\n", + "\n", + " Args:\n", + " solution: The solution which expresses the parameters of the policy by storing random seeds.\n", + " Can be a `Solution` instance or a list-like object.\n", + " Returns:\n", + " The policy, as a `nn.Module` instance. Note that, this `to_torch_module(...)` method implementation\n", + " does not contain observation normalization and action clipping layers in this returned module.\n", + " \"\"\"\n", + " if solution is None:\n", + " raise TypeError(\"The argument `solution` cannot be None\")\n", + " return deepcopy(self._get_policy(solution))\n", + "\n", + " def run_solution(\n", + " self,\n", + " solution: Iterable,\n", + " *,\n", + " visualize: bool = False,\n", + " print_scores: bool = False,\n", + " ) -> float:\n", + " \"\"\"\n", + " Run the policy expressed by the given `solution`. Return the fitness.\n", + "\n", + " Args:\n", + " solution: The solution which expresses the parameters of the policy by storing random seeds.\n", + " Can be a `Solution` instance or a list-like object.\n", + " visualize: Whether or not to render the environment onto the screen while running the solution.\n", + " print_scores: Whether or not to print the scores while running the solution.\n", + " Returns:\n", + " The fitness.\n", + " \"\"\"\n", + " if visualize:\n", + " env = gym.make(self._env_name, render_mode=\"human\")\n", + " else:\n", + " env = self._get_env()\n", + " policy = self._get_policy(solution)\n", + "\n", + " def normalized(obs: np.ndarray) -> np.ndarray:\n", + " if (self._obs_mean is not None) and (self._obs_stdev is not None):\n", + " obs = obs - self._obs_mean\n", + " obs = obs / self._obs_stdev\n", + " return obs\n", + "\n", + " def use_policy(obs: np.ndarray) -> np.ndarray:\n", + " obs = torch.as_tensor(normalized(obs), dtype=self._policy_dtype)\n", + " with torch.no_grad():\n", + " action = policy(obs)\n", + " action = action.numpy()\n", + " return action\n", + "\n", + " scores = []\n", + "\n", + " for _ in range(self._num_episodes):\n", + " cumulative_reward = 0.0\n", + " unmodified_cumulative_reward = 0.0\n", + " observation, _ = env.reset()\n", + " if visualize:\n", + " env.render()\n", + " while True:\n", + " action = use_policy(observation)\n", + " observation, reward, terminated, truncated, info = env.step(action)\n", + " if visualize:\n", + " env.render()\n", + " unmodified_cumulative_reward += reward\n", + " if self._decrease_rewards_by is not None:\n", + " reward -= self._decrease_rewards_by\n", + " cumulative_reward += reward\n", + " if terminated or truncated:\n", + " break\n", + " episode_score = cumulative_reward\n", + " if print_scores:\n", + " print(\"Episode score:\", episode_score, \" Cumulative reward:\", unmodified_cumulative_reward)\n", + " scores.append(episode_score)\n", + "\n", + " return float(np.mean(scores))\n", + "\n", + " def visualize(\n", + " self,\n", + " solution: Iterable,\n", + " *,\n", + " print_scores: bool = True,\n", + " ) -> float:\n", + " \"\"\"\n", + " Visualize the policy expressed by the given `solution`. Return the fitness.\n", + "\n", + " Args:\n", + " solution: The solution which expresses the parameters of the policy by storing random seeds.\n", + " Can be a `Solution` instance or a list-like object.\n", + " print_scores: Whether or not to print the scores while running the solution.\n", + " Returns:\n", + " The fitness.\n", + " \"\"\"\n", + " return self.run_solution(\n", + " solution,\n", + " visualize=True,\n", + " print_scores=print_scores,\n", + " )\n", + "\n", + " def _evaluate(self, solution: Solution):\n", + " solution.set_evaluation(self.run_solution(solution))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42e14515-0f8c-4cb8-9c37-eed22631cc34", + "metadata": {}, + "outputs": [], + "source": [ + "# The `mutate` function we implement below uses this constant, which determines how many mutated decision values\n", + "# will be generated from the parent decision values.\n", + "num_mutated_solutions = 16\n", + "\n", + "def mutate(all_values: ObjectArray) -> ObjectArray:\n", + " \"\"\"\n", + " Generate mutated decision values, taking the given decision values as a base.\n", + "\n", + " Args:\n", + " all_values: An `ObjectArray` containing the source decision values. New mutated decision values will be\n", + " generated based on these values. Each item within this `ObjectArray` represents the decision values of a\n", + " different solution.\n", + " Returns:\n", + " Mutated decision values.\n", + " \"\"\"\n", + " # Generate an empty list that will contain the mutated decision values\n", + " mutated_values = [None for _ in range(num_mutated_solutions)]\n", + " num_original_solutions = len(all_values)\n", + "\n", + " for i in range(num_mutated_solutions):\n", + " original_solution_index = i % num_original_solutions\n", + "\n", + " # When we take an element from the `ObjectArray`, containers such as lists are stored in a read-only manner\n", + " # (i.e. they are actually `ImmutableList` instances).\n", + " # By using the clone method of such an immutable container, we get the native Python counterpart of the\n", + " # container, as an independent clone. Now, we can modify this clone.\n", + " solution_values = all_values[original_solution_index].clone()\n", + " num_layers = len(solution_values)\n", + " chosen_layer_index = int(np.random.randint(num_layers, size=tuple()))\n", + " solution_values[chosen_layer_index].append(int(np.random.randint(2 ** 32, size=tuple())))\n", + " mutated_values[i] = solution_values\n", + "\n", + " # Note that the `as_tensor(...)` function we use below is from the namespace `evotorch.tools`.\n", + " # This `as_tensor(...)` function, upon receiving the keyword argument `dtype=object`, will convert the list it\n", + " # receives to `ObjectArray`, which is the expected return type for mutation functions such as this one\n", + " # when the problem's dtype is `object`.\n", + " return as_tensor(mutated_values, dtype=object)" + ] + }, + { + "cell_type": "markdown", + "id": "2fe8f8e0-ca6c-467b-be17-e1428cb7e455", + "metadata": {}, + "source": [ + "Now that we have the implementation of our `Problem` subclass, we instantiate it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc1893a3-0bf9-4bd0-abac-bc136d8d1db2", + "metadata": {}, + "outputs": [], + "source": [ + "problem = PolicySearchProblem(\n", + " env_name=ENV_NAME,\n", + " obs_mean=obs_mean,\n", + " obs_stdev=obs_stdev,\n", + " num_actors=\"max\", # <- \"max\" means 'use all CPUs'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "26241767-d6cc-49ae-bede-e895dd944536", + "metadata": {}, + "source": [ + "Instantiate the `GeneticAlgorithm`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ad83512-8019-4e10-9d91-8314b22037d5", + "metadata": {}, + "outputs": [], + "source": [ + "searcher = GeneticAlgorithm(\n", + " # This is the `Problem` instance that we will work on:\n", + " problem,\n", + "\n", + " # We only have one mutation operator, which is implemented within the `mutate` function:\n", + " operators=[mutate],\n", + "\n", + " # Population size:\n", + " popsize=4,\n", + "\n", + " # If `elitist` is given as True:\n", + " # - Mutated solutions generated with the help of `mutate(...)` will be added to the parent population, creating\n", + " # an extended population.\n", + " # - The top-`popsize` of the solutions from within this extended population will be picked as the next\n", + " # generation's solutions.\n", + " #\n", + " # If `elitist` is given as False:\n", + " # - If the number of mutated solutions is less than `popsize`, the worst solutions of the main population\n", + " # will be replaced by the mutated solutions\n", + " # - If the number of mutated solutions is equal or greater than `popsize`, the main population will be replaced\n", + " # entirely by the top-`popsize` mutated solutions.\n", + " # - With `num_mutated_solutions>=popsize` (which is the default case within this notebook, given that `popsize=4`\n", + " # and `num_mutated_solutions=16`), it is NOT recommended to set `elitist=False`, because the mutation\n", + " # operator we implement here does not have any guarantee of improving the parent solutions. So, randomly made\n", + " # mutated solutions would entirely replace the parents, turning this genetic algorithm to a blind search.\n", + " elitist=True,\n", + ")\n", + "\n", + "searcher" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2610bcab-8e2f-4422-bb0a-66f01a37ea35", + "metadata": {}, + "outputs": [], + "source": [ + "_ = StdOutLogger(searcher)\n", + "pickling_logger = PicklingLogger(searcher, items_to_save=[\"best\", \"pop_best\"], interval=5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "337daf81-10f4-4d00-8532-c026d6453129", + "metadata": {}, + "outputs": [], + "source": [ + "t0 = datetime.now()\n", + "searcher.run(30)\n", + "t1 = datetime.now()\n", + "\n", + "print(\"Elapsed:\", (t1 - t0).total_seconds(), \"seconds.\")" + ] + }, + { + "cell_type": "markdown", + "id": "f3005822-a3a7-4bab-88e4-41f526fb78cb", + "metadata": {}, + "source": [ + "---\n", + "\n", + "# Visualization of the solution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d043c619-7e7d-4ab5-a105-2f8b51c45a77", + "metadata": {}, + "outputs": [], + "source": [ + "# \"pop_best\": solution which got the best score in the last population\n", + "# \"best\": solution which got the best score during the entire evolutionary run\n", + "\n", + "# Please note that, in the case of Ant-v4, the simulator (and therefore the problem\n", + "# we are solving in this example) is stochastic. Therefore, even when the \"best\" solution\n", + "# has a better recorded score, it does not mean that it is definitely better than the\n", + "# \"pop_best\" score.\n", + "# If our optimization problem were purely numeric and if we were using a distribution-based algorithm\n", + "# such as PGPE, CEM, etc., the \"center\" solution would also be available (and would usually be a\n", + "# reliable candidate for being picked as the final solution for a stochastic problem such as this).\n", + "\n", + "problem.visualize(searcher.status[\"pop_best\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1835b151-0f6f-4028-a5e4-51838d64f659", + "metadata": {}, + "outputs": [], + "source": [ + "searcher.status[\"pop_best\"]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/notebooks/README.md b/examples/notebooks/README.md index ad11028c..e2eeeb9c 100644 --- a/examples/notebooks/README.md +++ b/examples/notebooks/README.md @@ -24,3 +24,4 @@ The notebook examples are listed below: - **[Variational Quantum Eigensolvers with SNES](Variational_Quantum_Eigensolvers_with_SNES.ipynb):** re-implements (with some minor changes in experimental setup), [experiments in a recent paper](https://iopscience.iop.org/article/10.1088/2632-2153/abf3ac) demonstrating that `SNES` is a scalable alternative to [analytic gradients on a quantum computer](https://journals.aps.org/pra/abstract/10.1103/PhysRevA.99.032331), and can practically optimize [Quantum Eigensolvers](https://www.nature.com/articles/ncomms5213). - **[Genetic Programming](Genetic_Programming.ipynb):** demonstrates genetic programming with GPU support. - **[Feature Space Illumination with MAPElites](Feature_Space_Illumination_with_MAPElites.ipynb):** demonstrates how one can use the MAPElites algorithm to obtain a population organized according to the features of the solutions. +- **[Evolving_Objects](Evolving_Objects.ipynb):** demonstrates how to declare and solve optimization problems with custom structured solutions (storing not-necessarily numeric data and/or having varying lengths). In more details, this example evolves simple gaits for the `Ant-v4` reinforcement learning environment using a custom solution encoding such that each solution contains multiple sublists of integers. diff --git a/src/evotorch/algorithms/ga.py b/src/evotorch/algorithms/ga.py index 5b915598..701a143a 100644 --- a/src/evotorch/algorithms/ga.py +++ b/src/evotorch/algorithms/ga.py @@ -25,6 +25,7 @@ def _use_operator(batch: SolutionBatch, operator: Callable) -> SolutionBatch: from ..operators import CopyingOperator, Operator + from ..tools import is_dtype_object if isinstance(operator, CopyingOperator): result = operator(batch) @@ -32,9 +33,22 @@ def _use_operator(batch: SolutionBatch, operator: Callable) -> SolutionBatch: result = batch.clone() operator(result) else: - result = batch.clone() - old_values = result.access_values() - result.set_values(operator(old_values)) + cloned_batch = batch.clone() + original_values = cloned_batch.access_values(keep_evals=True) + new_values = operator(original_values) + if not is_dtype_object(new_values.dtype): + if new_values.ndim != 2: + raise ValueError( + "The tensor returned by the given operator was expected to have 2 dimensions." + f" However, it has {new_values.ndim} dimensions, with shape {new_values.shape}." + ) + n = len(new_values) + if n == len(cloned_batch): + cloned_batch.set_values(new_values) + result = cloned_batch + else: + result = SolutionBatch(popsize=n, like=batch, empty=True) + result.set_values(new_values) return result diff --git a/src/evotorch/tools/misc.py b/src/evotorch/tools/misc.py index 748b5e68..edfbb1ba 100644 --- a/src/evotorch/tools/misc.py +++ b/src/evotorch/tools/misc.py @@ -1743,12 +1743,12 @@ def as_tensor(x: Any, *, dtype: Optional[DType] = None, device: Optional[Device] return x else: raise ValueError( - f"An ObjectArray cannot be moved into a device other than 'cpu'." f" The received device is: {device}." + f"An ObjectArray cannot be moved into a device other than 'cpu'. The received device is: {device}." ) elif is_dtype_object(dtype): - if (device is None) or (str(device) == "cpu"): + if (device is not None) and (str(device) != "cpu"): raise ValueError( - f"An ObjectArray cannot be created on a device other than 'cpu'." f" The received device is: {device}." + f"An ObjectArray cannot be created on a device other than 'cpu'. The received device is: {device}." ) if isinstance(x, ObjectArray): return x