In [None]:
import numpy as np
from typing import Callable

In [None]:
def reward(params):
    return -(np.power(params[0] - 1.0, 2) + 0.5*np.power(params[1] - 2.0, 2) + 0.25*np.power(params[2] + 1.0, 2))

In [None]:
params = np.array([1.0, 2.0, -1.0])
reward(params)

In [None]:
def es_fit(reward: Callable, num_params: int, generations: int, num_populations: int, learning_rate: float, std_dev: float, seed: int = None):
    if seed != None:
        np.random.seed(seed)
    params = np.zeros(num_params)
    for generation in range(1, generations + 1):
        etas = np.random.normal(0, std_dev, (num_populations, num_params))
        rewards = np.array([reward(params + eta) for eta in etas])
        std_dev_rewards = np.std(rewards)
        mean_rewards = np.mean(rewards)
        params += learning_rate/(num_populations * std_dev * std_dev_rewards) * np.dot(etas.transpose(), rewards - mean_rewards)
    return params


In [None]:
params_es = es_fit(reward, 3, 200, 64, 0.1, 0.1, 42)
reward_es = reward(params_es)
print(f"Estimated params: {params_es}, reward: {reward_es}")