In [1]:
from pydeeprecsys.rl.manager import MovieLensFairnessManager
from pydeeprecsys.rl.agents.rainbow import RainbowDQNAgent
from pydeeprecsys.rl.learning_statistics import LearningStatistics
from pydeeprecsys.rl.agents.reinforce import ReinforceAgent
from pydeeprecsys.rl.agents.agent import RandomAgent
import pandas as pd

training_iterations = 3
training_episodes = 2000
manager = MovieLensFairnessManager(slate_size=1)

In [4]:
def run_full_training(agent_class, params, trainings, episodes):
    statistics = []
    for i in range(trainings):
        manager = MovieLensFairnessManager(slate_size=1)
        stats = LearningStatistics()
        agent = agent_class(**params)        
        manager.train(agent, max_episodes=episodes, statistics=stats)
        statistics.append(stats)
    metrics = pd.DataFrame()
    for i in range(len(statistics)):
        stats = pd.DataFrame(statistics[i].collected_metrics)
        stats['model'] = agent_class.__name__
        metrics = pd.concat([metrics, stats])
    metrics.to_csv(f'output/{agent_class.__name__}_optimized_results.csv', index=False)

In [3]:
reinforce_params = {
    "n_actions": manager.env.action_space.n,
    "state_size": manager.env.observation_space.shape[0],
    "hidden_layers": [128, 128],
    "discount_factor": 0.9,
    "learning_rate": 0.001,
}
run_full_training(ReinforceAgent, reinforce_params, training_iterations, training_episodes)

Training...
Episode 1999 Mean Rewards 33.03 Last Reward 41.22		Training...
Episode 1999 Mean Rewards 34.60 Last Reward 48.30		Training...
Episode 1999 Mean Rewards 33.41 Last Reward 45.71		Training...
Episode 1999 Mean Rewards 38.50 Last Reward 47.29		Training...
Episode 1999 Mean Rewards 34.40 Last Reward 38.10		

In [4]:
from pydeeprecsys.rl.agents.actor_critic import ActorCriticAgent
ac_params = default_params = {
    "n_actions": manager.env.action_space.n,
    "state_size": manager.env.observation_space.shape[0],
    "actor_hidden_layers": [128, 128],
    "critic_hidden_layers": [128, 64],
    "discount_factor": 0.99,
    "actor_learning_rate": 0.001,
    "critic_learning_rate": 0.0001,
}
run_full_training(ActorCriticAgent, ac_params, training_iterations, training_episodes)

Training...
Episode 1999 Mean Rewards 31.43 Last Reward 49.74		Training...
Episode 1999 Mean Rewards 26.90 Last Reward 11.60		Training...
Episode 1999 Mean Rewards 25.30 Last Reward 27.90		Training...
Episode 1999 Mean Rewards 22.09 Last Reward 49.78		Training...
Episode 1999 Mean Rewards 24.22 Last Reward 10.00		

In [5]:
from pydeeprecsys.rl.agents.rainbow import RainbowDQNAgent
dqn_params = {
    "output_size": manager.env.action_space.n,
    "input_size": manager.env.observation_space.shape[0],
    "network_update_frequency": 3,
    "network_sync_frequency": 300,
    "priority_importance": 0.4,
    "priority_weigth_growth": 0.01,
    "buffer_size": 10000,
    "buffer_burn_in": 1000,
    "batch_size": 32,
    "noise_sigma": 0.017,
    "discount_factor": 0.95,
    "learning_rate": 0.001,
    "hidden_layers": [512, 512, 128, 128],
}
run_full_training(RainbowDQNAgent, dqn_params, training_iterations, training_episodes)

Training...
Episode 1999 Mean Rewards 40.77 Last Reward 50.00		Training...
Episode 1999 Mean Rewards 36.29 Last Reward 42.23		Training...
Episode 1999 Mean Rewards 36.42 Last Reward 48.80		