In [None]:
import numpy as np
from typing import Callable

In [None]:
def reward(params):
    return -(np.power(params[0] - 1.0, 2) + 0.5*np.power(params[1] - 2.0, 2) + 0.25*np.power(params[2] + 1.0, 2))

In [None]:
params = np.array([1.0, 2.0, -1.0])
reward(params)

In [None]:
def es_fit(reward: Callable, num_params: int, generations: int, num_populations: int, learning_rate: float, std_dev: float, seed: int = None):
    if seed != None:
        np.random.seed(seed)
    params = np.zeros(num_params)
    for generation in range(1, generations + 1):
        new_params = np.random.normal(params, std_dev, (num_populations, num_params))
        rewards = np.array([reward(new_param) for new_param in new_params])
        actions = (rewards - np.mean(rewards)) / np.std(rewards)
        pot_params = params + learning_rate/(num_populations * std_dev**2) * np.dot(new_params.T, actions)
        if reward(pot_params) > reward(params):
            params = pot_params
        print(f"Generation {generation} - Params: {params}, reward: {reward(params)}")
    return params


In [None]:
params_es = es_fit(reward, 3, 200, 64, 0.01, 0.1, 42)
reward_es = reward(params_es)
print(f"Estimated params: {params_es}, reward: {reward_es}")