Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement hyperparameter search #10

Merged
merged 1 commit into from
May 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions pydeeprecsys/rl/agents/actor_critic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pydeeprecsys.rl.agents.agent import ReinforcementLearning
from typing import Any, List
from typing import Any, List, Optional
from pydeeprecsys.rl.experience_replay.experience_buffer import ExperienceReplayBuffer
from pydeeprecsys.rl.experience_replay.buffer_parameters import (
ExperienceReplayBufferParameters,
Expand All @@ -12,27 +12,34 @@ class ActorCriticAgent(ReinforcementLearning):
"""Policy estimator using a value estimator as a baseline.
It's on-policy, for discrete action spaces, and episodic environments.
This implementation uses stochastic policies.
TODO: could be a sub class of reinforces"""
TODO: could be a sub class of reinforce"""

def __init__(
self,
n_actions: int,
state_size: int,
discount_factor: int = 0.99,
learning_rate=1e-3,
actor_hidden_layers: Optional[List[int]] = None,
critic_hidden_layers: Optional[List[int]] = None,
actor_learning_rate=1e-3,
critic_learning_rate=1e-3,
):
if not actor_hidden_layers:
actor_hidden_layers = [state_size * 2, state_size * 2]
if not critic_hidden_layers:
critic_hidden_layers = [state_size * 2, int(state_size / 2)]
self.episode_count = 0
self.value_estimator = ValueEstimator(
state_size,
[state_size * 2, int(state_size / 2)],
critic_hidden_layers,
1,
learning_rate=learning_rate,
learning_rate=critic_learning_rate,
)
self.policy_estimator = PolicyEstimator(
state_size,
[state_size * 2, state_size * 2],
actor_hidden_layers,
n_actions,
learning_rate=learning_rate,
learning_rate=actor_learning_rate,
)
self.discount_factor = discount_factor
# starts the buffer
Expand Down
16 changes: 9 additions & 7 deletions pydeeprecsys/rl/agents/rainbow.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from numpy.random import RandomState
from typing import Any, Optional
from typing import Any, Optional, List
from numpy import arange
from copy import deepcopy
from pydeeprecsys.rl.neural_networks.dueling import DuelingDDQN
Expand Down Expand Up @@ -34,18 +34,20 @@ def __init__(
batch_size: int = 32,
noise_sigma: float = 0.017,
discount_factor: float = 0.99,
learning_rate: float = 0.99,
learning_rate: float = 0.0001,
hidden_layers: List[int] = None,
random_state: RandomState = RandomState(),
statistics: Optional[LearningStatistics] = None,
):

self.network = DuelingDDQN(
input_size,
output_size,
learning_rate,
noise_sigma,
discount_factor,
n_input=input_size,
n_output=output_size,
learning_rate=learning_rate,
noise_sigma=noise_sigma,
discount_factor=discount_factor,
statistics=statistics,
hidden_layers=hidden_layers,
)
self.target_network = deepcopy(self.network)

Expand Down
8 changes: 5 additions & 3 deletions pydeeprecsys/rl/agents/reinforce.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
from pydeeprecsys.rl.agents.agent import ReinforcementLearning
from typing import Any, List
from typing import Any, List, Optional
from pydeeprecsys.rl.experience_replay.experience_buffer import ExperienceReplayBuffer
from pydeeprecsys.rl.experience_replay.buffer_parameters import (
ExperienceReplayBufferParameters,
Expand All @@ -17,14 +17,16 @@ def __init__(
self,
n_actions: int,
state_size: int,
hidden_layers: Optional[List[int]] = None,
discount_factor: int = 0.99, # a.k.a gamma
learning_rate=1e-3,
):
self.episode_count = 0

if not hidden_layers:
hidden_layers = [state_size * 2, state_size * 2]
self.policy_estimator = PolicyEstimator(
state_size,
[state_size * 2, state_size * 2],
hidden_layers,
n_actions,
learning_rate=learning_rate,
)
Expand Down
24 changes: 18 additions & 6 deletions pydeeprecsys/rl/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ def hyperparameter_search(
agent: type,
params: dict,
default_params: dict,
learning_statistics: LearningStatistics,
episodes: int = 100,
runs_per_combination: int = 3,
verbose: bool = True,
) -> dict:
"""Given an agent class, and a dictionary of hyperparameter names and values,
will try all combinations, and return the mean reward of each combinatio
Expand All @@ -154,15 +154,27 @@ def hyperparameter_search(
if len(p_value) < 2:
continue
for value in p_value:
rl = agent(**{p_name: value, **default_params})
rl = agent(**{**default_params, p_name: value})
learning_statistics = LearningStatistics()
combination_key = f"{p_name}={value}"
for run in range(runs_per_combination):
print(f"Testing combination {p_name}={value} round {run}")
self.train(rl=rl, max_episodes=episodes, should_print=False)
self.train(
rl=rl,
max_episodes=episodes,
should_print=False,
statistics=learning_statistics,
)
combination_results[combination_key].append(
learning_statistics.moving_rewards[-1]
learning_statistics.moving_rewards.iloc[-1]
)
print(f"result was {learning_statistics.moving_rewards[-1]}")
if verbose:
print(
f"\rTested combination {p_name}={value} round {run} "
f"result was {learning_statistics.moving_rewards.iloc[-1]}"
"\t\t",
end="",
)

return combination_results


Expand Down
23 changes: 15 additions & 8 deletions pydeeprecsys/rl/neural_networks/dueling.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,37 @@ def __init__(
n_input: int,
n_output: int,
learning_rate: float,
hidden_layers: List[int] = None,
noise_sigma: float = 0.17,
discount_factor: float = 0.99,
statistics: Optional[LearningStatistics] = None,
):
super().__init__()
if not hidden_layers:
hidden_layers = [256, 256, 64, 64]
self.discount_factor = discount_factor
self._build_network(n_input, n_output, noise_sigma)
self._build_network(n_input, n_output, noise_sigma, hidden_layers=hidden_layers)
self.optimizer = Adam(self.parameters(), lr=learning_rate)
self.statistics = statistics

def _build_network(self, n_input: int, n_output: int, noise_sigma: float):
def _build_network(
self, n_input: int, n_output: int, noise_sigma: float, hidden_layers: List[int]
):
"""Builds the dueling network with noisy layers, the value
subnet and the advantage subnet. TODO: add `.to_device()` to Modules"""
self.fully_connected_1 = Linear(n_input, 256, bias=True)
self.fully_connected_2 = NoisyLayer(256, 256, bias=True, sigma=noise_sigma)
assert len(hidden_layers) == 4
fc_1, fc_2, value_size, advantage_size = hidden_layers
self.fully_connected_1 = Linear(n_input, fc_1, bias=True)
self.fully_connected_2 = NoisyLayer(fc_1, fc_2, bias=True, sigma=noise_sigma)
self.value_subnet = Sequential(
NoisyLayer(256, 64, bias=True, sigma=noise_sigma),
NoisyLayer(fc_2, value_size, bias=True, sigma=noise_sigma),
ReLU(),
Linear(64, 1, bias=True),
Linear(value_size, 1, bias=True),
)
self.advantage_subnet = Sequential(
NoisyLayer(256, 64, bias=True, sigma=noise_sigma),
NoisyLayer(fc_2, advantage_size, bias=True, sigma=noise_sigma),
ReLU(),
Linear(64, n_output, bias=True),
Linear(advantage_size, n_output, bias=True),
)

def forward(self, state):
Expand Down
6 changes: 5 additions & 1 deletion pydeeprecsys/tests/unit/test_actor_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ def test_reinforce_init():
def test_reinforce_interaction():
manager = CartpoleManager()
agent = ActorCriticAgent(
n_actions=2, state_size=4, discount_factor=0.95, learning_rate=0.001
n_actions=2,
state_size=4,
discount_factor=0.95,
actor_learning_rate=0.001,
critic_learning_rate=0.001,
)
learning_statistics = LearningStatistics()
manager.train(
Expand Down
32 changes: 32 additions & 0 deletions pydeeprecsys/tests/unit/test_manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,38 @@
from pydeeprecsys.rl.manager import MovieLensFairnessManager
from pydeeprecsys.rl.agents.reinforce import ReinforceAgent


def test_movie_lens_manager():
manager = MovieLensFairnessManager()
assert manager.env is not None


def test_hyperparameter_search():
manager = MovieLensFairnessManager()
agent = ReinforceAgent

default_params = {
"n_actions": manager.env.action_space.n,
"state_size": manager.env.observation_space.shape[0],
"hidden_layers": [64, 64],
"discount_factor": 0.95,
"learning_rate": 0.0001,
}

optimize_params = {
"hidden_layers": [[64, 64], [128, 128], [256, 256]],
"discount_factor": [0.9, 0.95, 0.99],
"learning_rate": [0.00001, 0.0001, 0.001],
}

results = manager.hyperparameter_search(
agent=agent,
runs_per_combination=2,
episodes=10,
params=optimize_params,
default_params=default_params,
)

assert results is not None
assert len(results.items()) == 9
assert len(results["discount_factor=0.9"]) == 2