In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import numpy as np
from pathlib import Path
from runner import Runner
import interest_evolution
from full_slate_q_agent import FullSlateQAgent

In [18]:
def create_dqn_agent(env, eval_mode=False, summary_writer=None):
    return FullSlateQAgent(
        observation_space=env.observation_space,
        action_space=env.action_space,
        eval_mode=eval_mode,
        summary_writer=summary_writer
    )

In [19]:
seed = 0
np.random.seed(seed)

env_config = {
    'num_candidates': 10,
    'slate_size': 2,
    'resample_documents': True,
    'seed': seed,
}

In [20]:
import shutil
import os
from pathlib import Path

tmp_base_dir = './logs/dqn'

# Automatically delete train and eval directories if they exist
train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')

for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

# Recreate base log directory
Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)

# --- Initialize training runner ---
runner_dqn = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_dqn_agent,
    env=interest_evolution.create_environment(env_config),
)

# --- Run training + evaluation ---
runner_dqn.run_training(max_training_steps=50, num_iterations=5)
runner_dqn.run_evaluation(max_eval_episodes=2)


[TRAIN][Step 86] AvgLen: 86.00 | AvgRew: 154.83 | StdRew: 0.00 | Time/Step: 0.0012
[TRAIN][Step 172] AvgLen: 86.00 | AvgRew: 138.52 | StdRew: 0.00 | Time/Step: 0.0017
[TRAIN][Step 250] AvgLen: 78.00 | AvgRew: 163.14 | StdRew: 0.00 | Time/Step: 0.0021
[TRAIN][Step 319] AvgLen: 69.00 | AvgRew: 155.14 | StdRew: 0.00 | Time/Step: 0.0018
[TRAIN][Step 406] AvgLen: 87.00 | AvgRew: 141.24 | StdRew: 0.00 | Time/Step: 0.0022
[EVAL] ckpt_0.pkl | Episode 1 | Reward: 163.09 | Length: 79
[EVAL] ckpt_0.pkl | Episode 2 | Reward: 149.95 | Length: 61
[EVAL][Step 86] AvgLen: 70.00 | AvgRew: 156.52 | StdRew: 6.57 | Time/Step: 0.0000
[EVAL] ckpt_1.pkl | Episode 1 | Reward: 180.00 | Length: 64
[EVAL] ckpt_1.pkl | Episode 2 | Reward: 177.38 | Length: 82
[EVAL][Step 172] AvgLen: 73.00 | AvgRew: 178.69 | StdRew: 1.31 | Time/Step: 0.0000
[EVAL] ckpt_2.pkl | Episode 1 | Reward: 176.00 | Length: 98
[EVAL] ckpt_2.pkl | Episode 2 | Reward: 176.00 | Length: 94
[EVAL][Step 250] AvgLen: 96.00 | AvgRew: 176.00 | StdRew

In [21]:
from random_agent import RandomAgent
import shutil
import os
from pathlib import Path

def create_random_agent(env, **kwargs):
    return RandomAgent(action_space=env.action_space)

tmp_base_dir = './logs/random'

# Automatically delete train and eval directories if they exist
train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')

for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

# Recreate base log directory
Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)

# --- Initialize runner for RandomAgent ---
runner_random = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_random_agent,
    env=interest_evolution.create_environment(env_config),
)

# --- Run training + evaluation ---
runner_random.run_training(max_training_steps=50, num_iterations=5)
runner_random.run_evaluation(max_eval_episodes=2)


[TRAIN][Step 80] AvgLen: 80.00 | AvgRew: 168.00 | StdRew: 0.00 | Time/Step: 0.0001
[TRAIN][Step 162] AvgLen: 82.00 | AvgRew: 156.00 | StdRew: 0.00 | Time/Step: 0.0001
[TRAIN][Step 267] AvgLen: 105.00 | AvgRew: 181.00 | StdRew: 0.00 | Time/Step: 0.0001
[TRAIN][Step 347] AvgLen: 80.00 | AvgRew: 160.54 | StdRew: 0.00 | Time/Step: 0.0001
[TRAIN][Step 423] AvgLen: 76.00 | AvgRew: 151.22 | StdRew: 0.00 | Time/Step: 0.0001
[EVAL] CurrentAgent | Episode 1 | Reward: 160.42 | Length: 78
[EVAL] CurrentAgent | Episode 2 | Reward: 159.07 | Length: 69
[EVAL][Step 0] AvgLen: 73.50 | AvgRew: 159.75 | StdRew: 0.67 | Time/Step: 0.0000
