In [1]:
%load_ext autoreload
%autoreload 2

In [29]:
import random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from ql import QL, QLP
from dqn import DQN, DQNP
from cartpole import CartPole
from lunarlander import LunarLander
from search_params import space
from simulation import compute_scores

In [31]:
import os
from pathlib import Path
import json

In [18]:
from skopt import dummy_minimize
from skopt.utils import use_named_args

---

In [47]:
env = LunarLander()
agent = DQNP(env)

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [48]:
default_config = agent.config
default_config

{'seed': 42,
 'lr_init': 0.005,
 'decay_freq': 200,
 'lr_decay': 0.1,
 'lr_min': 1e-05,
 'discount': 0.99,
 'history_len': 2,
 'idealization': 1,
 'policy': 'eps-greedy',
 'exploration_start': 1,
 'exploration_min': 0.01,
 'exploration_anneal_steps': 150,
 'exploration_temp': -1,
 'double': False,
 'target_update_freq': 0,
 'dueling': False,
 'streams_size': -1,
 'layer_sizes': (384, 192),
 'loss': 'mse',
 'hidden_activation': 'sigmoid',
 'out_activation': 'linear',
 'input_dropout': 0,
 'hidden_dropout': 0,
 'batch_normalization': False,
 'weights_init': 'lecun_uniform',
 'optimizer': 'adam',
 'q_clip': (-10000, 10000),
 'batch_size': 32,
 'n_epochs': 1,
 'memory_size': 50000,
 'min_mem_size': 1000,
 'normalize': False}

---

In [24]:
search_params = None

In [25]:
@use_named_args(space)
def f(**params):
    global search_params
    search_params = list(params.keys())
    return 0

In [26]:
_ = dummy_minimize(f, space, n_calls=1)

In [27]:
search_params

['exploration_start',
 'exploration_min',
 'exploration_anneal_steps',
 'exploration_temp',
 'lr_init',
 'lr_decay',
 'decay_freq',
 'idealization',
 'discount',
 'history_len',
 'target_update_freq',
 'batch_size',
 'n_epochs',
 'normalize',
 'input_dropout',
 'batch_normalization',
 'streams_size',
 'loss',
 'hidden_activation',
 'out_activation',
 'weights_init',
 'optimizer',
 'layer_sizes',
 'memory_size',
 'q_clip']

---

In [13]:
OUTPUTS_DIR = Path('outputs')
valid_names = [f for f in os.listdir(OUTPUTS_DIR) 
               if 'dqn' in f.lower() and 'lander' in f.lower()]

In [35]:
len(valid_names)

270

In [97]:
%%time
dicts = []
for name in valid_names:
    run_dict = {'name': name}
    run_dict.update(default_config)

    try:
        with open(OUTPUTS_DIR / name / 'agent.json') as f:
            run_dict.update(json.load(f))
    except:
        continue
    if 'prioritize_replay' in run_dict:
        continue    

    with open(OUTPUTS_DIR / name / 'stats.json') as f:
        run_dict.update(json.load(f))
    if run_dict['time'] < 1000:
        continue

    run_dict.update(compute_scores(
        train_df=pd.read_csv(OUTPUTS_DIR / name / 'train.csv'),
        eval_df=None,
    ))
    
    dicts.append(run_dict)

CPU times: user 407 ms, sys: 34.5 ms, total: 442 ms
Wall time: 442 ms


In [98]:
len(dicts)

190

In [99]:
df = pd.DataFrame(dicts)

In [100]:
y = df.aggregated

In [101]:
x = df[search_params]

In [102]:
x.to_csv('prior-xs.csv', index=False)

In [103]:
y.to_csv('prior-ys.csv', index=False)