Skip to content

Commit

Permalink
Merge pull request #10 from luksfarris/issue/#8
Browse files Browse the repository at this point in the history
Implement hyperparameter search
  • Loading branch information
luksfarris committed May 19, 2021
2 parents 963ef75 + d1cc57b commit 14376b8
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 32 deletions.
21 changes: 14 additions & 7 deletions pydeeprecsys/rl/agents/actor_critic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pydeeprecsys.rl.agents.agent import ReinforcementLearning
from typing import Any, List
from typing import Any, List, Optional
from pydeeprecsys.rl.experience_replay.experience_buffer import ExperienceReplayBuffer
from pydeeprecsys.rl.experience_replay.buffer_parameters import (
ExperienceReplayBufferParameters,
Expand All @@ -12,27 +12,34 @@ class ActorCriticAgent(ReinforcementLearning):
"""Policy estimator using a value estimator as a baseline.
It's on-policy, for discrete action spaces, and episodic environments.
This implementation uses stochastic policies.
TODO: could be a sub class of reinforces"""
TODO: could be a sub class of reinforce"""

def __init__(
self,
n_actions: int,
state_size: int,
discount_factor: int = 0.99,
learning_rate=1e-3,
actor_hidden_layers: Optional[List[int]] = None,
critic_hidden_layers: Optional[List[int]] = None,
actor_learning_rate=1e-3,
critic_learning_rate=1e-3,
):
if not actor_hidden_layers:
actor_hidden_layers = [state_size * 2, state_size * 2]
if not critic_hidden_layers:
critic_hidden_layers = [state_size * 2, int(state_size / 2)]
self.episode_count = 0
self.value_estimator = ValueEstimator(
state_size,
[state_size * 2, int(state_size / 2)],
critic_hidden_layers,
1,
learning_rate=learning_rate,
learning_rate=critic_learning_rate,
)
self.policy_estimator = PolicyEstimator(
state_size,
[state_size * 2, state_size * 2],
actor_hidden_layers,
n_actions,
learning_rate=learning_rate,
learning_rate=actor_learning_rate,
)
self.discount_factor = discount_factor
# starts the buffer
Expand Down
16 changes: 9 additions & 7 deletions pydeeprecsys/rl/agents/rainbow.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from numpy.random import RandomState
from typing import Any, Optional
from typing import Any, Optional, List
from numpy import arange
from copy import deepcopy
from pydeeprecsys.rl.neural_networks.dueling import DuelingDDQN
Expand Down Expand Up @@ -34,18 +34,20 @@ def __init__(
batch_size: int = 32,
noise_sigma: float = 0.017,
discount_factor: float = 0.99,
learning_rate: float = 0.99,
learning_rate: float = 0.0001,
hidden_layers: List[int] = None,
random_state: RandomState = RandomState(),
statistics: Optional[LearningStatistics] = None,
):

self.network = DuelingDDQN(
input_size,
output_size,
learning_rate,
noise_sigma,
discount_factor,
n_input=input_size,
n_output=output_size,
learning_rate=learning_rate,
noise_sigma=noise_sigma,
discount_factor=discount_factor,
statistics=statistics,
hidden_layers=hidden_layers,
)
self.target_network = deepcopy(self.network)

Expand Down
8 changes: 5 additions & 3 deletions pydeeprecsys/rl/agents/reinforce.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
from pydeeprecsys.rl.agents.agent import ReinforcementLearning
from typing import Any, List
from typing import Any, List, Optional
from pydeeprecsys.rl.experience_replay.experience_buffer import ExperienceReplayBuffer
from pydeeprecsys.rl.experience_replay.buffer_parameters import (
ExperienceReplayBufferParameters,
Expand All @@ -17,14 +17,16 @@ def __init__(
self,
n_actions: int,
state_size: int,
hidden_layers: Optional[List[int]] = None,
discount_factor: int = 0.99, # a.k.a gamma
learning_rate=1e-3,
):
self.episode_count = 0

if not hidden_layers:
hidden_layers = [state_size * 2, state_size * 2]
self.policy_estimator = PolicyEstimator(
state_size,
[state_size * 2, state_size * 2],
hidden_layers,
n_actions,
learning_rate=learning_rate,
)
Expand Down
24 changes: 18 additions & 6 deletions pydeeprecsys/rl/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ def hyperparameter_search(
agent: type,
params: dict,
default_params: dict,
learning_statistics: LearningStatistics,
episodes: int = 100,
runs_per_combination: int = 3,
verbose: bool = True,
) -> dict:
"""Given an agent class, and a dictionary of hyperparameter names and values,
will try all combinations, and return the mean reward of each combinatio
Expand All @@ -154,15 +154,27 @@ def hyperparameter_search(
if len(p_value) < 2:
continue
for value in p_value:
rl = agent(**{p_name: value, **default_params})
rl = agent(**{**default_params, p_name: value})
learning_statistics = LearningStatistics()
combination_key = f"{p_name}={value}"
for run in range(runs_per_combination):
print(f"Testing combination {p_name}={value} round {run}")
self.train(rl=rl, max_episodes=episodes, should_print=False)
self.train(
rl=rl,
max_episodes=episodes,
should_print=False,
statistics=learning_statistics,
)
combination_results[combination_key].append(
learning_statistics.moving_rewards[-1]
learning_statistics.moving_rewards.iloc[-1]
)
print(f"result was {learning_statistics.moving_rewards[-1]}")
if verbose:
print(
f"\rTested combination {p_name}={value} round {run} "
f"result was {learning_statistics.moving_rewards.iloc[-1]}"
"\t\t",
end="",
)

return combination_results


Expand Down
23 changes: 15 additions & 8 deletions pydeeprecsys/rl/neural_networks/dueling.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,37 @@ def __init__(
n_input: int,
n_output: int,
learning_rate: float,
hidden_layers: List[int] = None,
noise_sigma: float = 0.17,
discount_factor: float = 0.99,
statistics: Optional[LearningStatistics] = None,
):
super().__init__()
if not hidden_layers:
hidden_layers = [256, 256, 64, 64]
self.discount_factor = discount_factor
self._build_network(n_input, n_output, noise_sigma)
self._build_network(n_input, n_output, noise_sigma, hidden_layers=hidden_layers)
self.optimizer = Adam(self.parameters(), lr=learning_rate)
self.statistics = statistics

def _build_network(self, n_input: int, n_output: int, noise_sigma: float):
def _build_network(
self, n_input: int, n_output: int, noise_sigma: float, hidden_layers: List[int]
):
"""Builds the dueling network with noisy layers, the value
subnet and the advantage subnet. TODO: add `.to_device()` to Modules"""
self.fully_connected_1 = Linear(n_input, 256, bias=True)
self.fully_connected_2 = NoisyLayer(256, 256, bias=True, sigma=noise_sigma)
assert len(hidden_layers) == 4
fc_1, fc_2, value_size, advantage_size = hidden_layers
self.fully_connected_1 = Linear(n_input, fc_1, bias=True)
self.fully_connected_2 = NoisyLayer(fc_1, fc_2, bias=True, sigma=noise_sigma)
self.value_subnet = Sequential(
NoisyLayer(256, 64, bias=True, sigma=noise_sigma),
NoisyLayer(fc_2, value_size, bias=True, sigma=noise_sigma),
ReLU(),
Linear(64, 1, bias=True),
Linear(value_size, 1, bias=True),
)
self.advantage_subnet = Sequential(
NoisyLayer(256, 64, bias=True, sigma=noise_sigma),
NoisyLayer(fc_2, advantage_size, bias=True, sigma=noise_sigma),
ReLU(),
Linear(64, n_output, bias=True),
Linear(advantage_size, n_output, bias=True),
)

def forward(self, state):
Expand Down
6 changes: 5 additions & 1 deletion pydeeprecsys/tests/unit/test_actor_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ def test_reinforce_init():
def test_reinforce_interaction():
manager = CartpoleManager()
agent = ActorCriticAgent(
n_actions=2, state_size=4, discount_factor=0.95, learning_rate=0.001
n_actions=2,
state_size=4,
discount_factor=0.95,
actor_learning_rate=0.001,
critic_learning_rate=0.001,
)
learning_statistics = LearningStatistics()
manager.train(
Expand Down
32 changes: 32 additions & 0 deletions pydeeprecsys/tests/unit/test_manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,38 @@
from pydeeprecsys.rl.manager import MovieLensFairnessManager
from pydeeprecsys.rl.agents.reinforce import ReinforceAgent


def test_movie_lens_manager():
manager = MovieLensFairnessManager()
assert manager.env is not None


def test_hyperparameter_search():
manager = MovieLensFairnessManager()
agent = ReinforceAgent

default_params = {
"n_actions": manager.env.action_space.n,
"state_size": manager.env.observation_space.shape[0],
"hidden_layers": [64, 64],
"discount_factor": 0.95,
"learning_rate": 0.0001,
}

optimize_params = {
"hidden_layers": [[64, 64], [128, 128], [256, 256]],
"discount_factor": [0.9, 0.95, 0.99],
"learning_rate": [0.00001, 0.0001, 0.001],
}

results = manager.hyperparameter_search(
agent=agent,
runs_per_combination=2,
episodes=10,
params=optimize_params,
default_params=default_params,
)

assert results is not None
assert len(results.items()) == 9
assert len(results["discount_factor=0.9"]) == 2

0 comments on commit 14376b8

Please sign in to comment.