In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
import numpy as np
from pathlib import Path
from runner import Runner
import interest_evolution
from full_slate_q_agent import FullSlateQAgent

In [11]:
def create_dqn_agent(env, eval_mode=False, summary_writer=None):
    return FullSlateQAgent(
        observation_space=env.observation_space,
        action_space=env.action_space,
        eval_mode=eval_mode,
        summary_writer=summary_writer
    )

In [12]:
seed = 0
np.random.seed(seed)

env_config = {
    'num_candidates': 20,
    'slate_size': 5,
    'resample_documents': True,
    'seed': seed,
}

In [14]:
import shutil
import os
from pathlib import Path

tmp_base_dir = './logs/dqn'

# Automatically delete train and eval directories if they exist
train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')

for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

# Recreate base log directory
Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)

# --- Initialize training runner ---
runner_dqn = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_dqn_agent,
    env=interest_evolution.create_environment(env_config),
)

# --- Run training + evaluation ---
runner_dqn.run_training(max_training_steps=50, num_iterations=5)
runner_dqn.run_evaluation(max_eval_episodes=2)


[TRAIN][Step 61] AvgLen: 61.00 | AvgRew: 188.00 | StdRew: 0.00 | Time/Step: 0.6684
[TRAIN][Step 127] AvgLen: 66.00 | AvgRew: 137.29 | StdRew: 0.00 | Time/Step: 0.7114
[TRAIN][Step 188] AvgLen: 61.00 | AvgRew: 188.00 | StdRew: 0.00 | Time/Step: 0.7150
[TRAIN][Step 242] AvgLen: 54.00 | AvgRew: 161.75 | StdRew: 0.00 | Time/Step: 0.7958
[TRAIN][Step 299] AvgLen: 57.00 | AvgRew: 156.00 | StdRew: 0.00 | Time/Step: 0.7095
[EVAL] ckpt_0.pkl | Episode 1 | Reward: 170.61 | Length: 62
[EVAL] ckpt_0.pkl | Episode 2 | Reward: 167.45 | Length: 54
[EVAL][Step 61] AvgLen: 58.00 | AvgRew: 169.03 | StdRew: 1.58 | Time/Step: 0.0000
[EVAL] ckpt_1.pkl | Episode 1 | Reward: 148.00 | Length: 53
[EVAL] ckpt_1.pkl | Episode 2 | Reward: 180.00 | Length: 64
[EVAL][Step 127] AvgLen: 58.50 | AvgRew: 164.00 | StdRew: 16.00 | Time/Step: 0.0000
[EVAL] ckpt_2.pkl | Episode 1 | Reward: 168.10 | Length: 73
[EVAL] ckpt_2.pkl | Episode 2 | Reward: 167.15 | Length: 67
[EVAL][Step 188] AvgLen: 70.00 | AvgRew: 167.63 | StdRe

In [24]:
from random_agent import RandomAgent
import shutil
import os
from pathlib import Path

def create_random_agent(env, **kwargs):
    return RandomAgent(action_space=env.action_space)

tmp_base_dir = './logs/random'

# Automatically delete train and eval directories if they exist
train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')

for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

# Recreate base log directory
Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)

# --- Initialize runner for RandomAgent ---
runner_random = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_random_agent,
    env=interest_evolution.create_environment(env_config),
)

# --- Run training + evaluation ---
runner_random.run_training(max_training_steps=50, num_iterations=5)
runner_random.run_evaluation(max_eval_episodes=2)


[TRAIN][Step 60] AvgLen: 60.00 | AvgRew: 192.00 | StdRew: 0.00 | Time/Step: 0.0001
[TRAIN][Step 116] AvgLen: 56.00 | AvgRew: 144.00 | StdRew: 0.00 | Time/Step: 0.0001
[TRAIN][Step 174] AvgLen: 58.00 | AvgRew: 182.55 | StdRew: 0.00 | Time/Step: 0.0001
[TRAIN][Step 227] AvgLen: 53.00 | AvgRew: 160.99 | StdRew: 0.00 | Time/Step: 0.0001
[TRAIN][Step 287] AvgLen: 60.00 | AvgRew: 162.22 | StdRew: 0.00 | Time/Step: 0.0001
[EVAL] CurrentAgent | Episode 1 | Reward: 160.88 | Length: 59
[EVAL] CurrentAgent | Episode 2 | Reward: 156.68 | Length: 68
[EVAL][Step 0] AvgLen: 63.50 | AvgRew: 158.78 | StdRew: 2.10 | Time/Step: 0.0000
