In [None]:
import numpy as np
from typing import Callable

In [None]:
def reward(params):
    return -(np.power(params[0] - 1.0, 2) + 0.5*np.power(params[1] - 2.0, 2) + 0.25*np.power(params[2] + 1.0, 2))

In [None]:
def es_fit(reward: Callable, num_params: int, generations: int, num_children: int, learning_rate: float, std_dev: float, seed: int = None):
    if seed != None:
        np.random.seed(seed)
    params = np.zeros(num_params)
    for generation in range(1, generations + 1):
        new_params = np.random.normal(params, std_dev, (num_children, num_params))
        rewards = np.array([reward(new_param) for new_param in new_params])
        advantages = (rewards - np.mean(rewards)) / np.std(rewards)
        pot_params = params + learning_rate/(num_children * std_dev**2) * np.dot(new_params.T, advantages)
        if reward(pot_params) > reward(params):
            params = pot_params
    return params


In [None]:
params_es = es_fit(reward, 3, 200, 64, 0.01, 0.1, 42)
reward_es = reward(params_es)
print(f"Estimated params: {params_es}, reward: {reward_es}")

In [None]:
def es_sel_fit(reward: Callable, num_params: int, generations: int, prob_success: float, seed: int = None):
    if seed != None:
        np.random.seed(seed)
    params = np.zeros(num_params)
    std_dev = 1.0
    reward_parent = reward(params)
    for generation in range(1, generations + 1):
        params_child = np.random.normal(params, std_dev, num_params)
        reward_child = reward(params_child)
        if reward_child > reward_parent:
            params = params_child
            reward_parent = reward_child
            std_dev *= np.exp(1.0/3.0)
        else:
            std_dev *= np.exp(-prob_success/(3.0*(1.0 - prob_success)))
    return params


In [None]:
params_es_sel = es_sel_fit(reward, 3, 4*200, 0.2, 42)
reward_es_sel = reward(params_es_sel)
print(f"Estimated params: {params_es_sel}, reward: {reward_es_sel}")