In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import gym
import numpy as np
import random
from collections import namedtuple, deque
import wandb

GAMMA=0.99

class QNetwork1(nn.Module):
    def __init__(self, state_size, action_size, seed, fc1_units=128, fc2_units=64):
        super(QNetwork1, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fc1 = nn.Linear(state_size, fc1_units)
        self.fc2 = nn.Linear(fc1_units, fc2_units)
        self.fc_advantage = nn.Linear(fc2_units, action_size)
        self.fc_value = nn.Linear(fc2_units, 1)

    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        value = self.fc_value(x)
        advantage = self.fc_advantage(x)
        Q_values = value + (advantage - advantage.max(dim=1, keepdim=True)[0])
        return Q_values
    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class ReplayBuffer:
    def __init__(self, action_size, buffer_size, batch_size, seed):
        self.action_size = action_size
        self.memory = deque(maxlen=buffer_size)
        self.batch_size = batch_size
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(seed)

    def add(self, state, action, reward, next_state, done):
        e = self.experience(state, action, reward, next_state, done)
        self.memory.append(e)

    def sample(self):
        experiences = random.sample(self.memory, k=self.batch_size)
        states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
        actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).long().to(device)
        rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
        next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
        dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)
        return (states, actions, rewards, next_states, dones)

    def __len__(self):
        return len(self.memory)

class TutorialAgent1:
    def __init__(self, state_size, action_size, seed, lr, update_every, buffer_size, batch_size):
        self.state_size = state_size
        print(update_every)
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.qnetwork_local = QNetwork1(state_size, action_size, seed).to(device)
        self.qnetwork_target = QNetwork1(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=lr)
        self.memory = ReplayBuffer(action_size, buffer_size, batch_size, seed)
        self.t_step = 0
        self.update_every = update_every
        self.batch_size = batch_size

    def step(self, state, action, reward, next_state, done):
        self.memory.add(state, action, reward, next_state, done)
        if len(self.memory) >= self.batch_size:
            experiences = self.memory.sample()
            self.learn(experiences, GAMMA)
        self.t_step = (self.t_step + 1) % self.update_every
        if self.t_step == 0:
            self.qnetwork_target.load_state_dict(self.qnetwork_local.state_dict())

    def act(self, state, eps=0.):
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        self.qnetwork_local.eval()
        with torch.no_grad():
            action_values = self.qnetwork_local(state)
        self.qnetwork_local.train()
        if random.random() > eps:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))

    def learn(self, experiences, gamma):
        states, actions, rewards, next_states, dones = experiences
        Q_targets_next = self.qnetwork_target(next_states)
        next_state_values = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)
        Q_targets = rewards + (gamma * next_state_values * (1 - dones))
        Q_expected = self.qnetwork_local(states).gather(1, actions)
        loss = F.mse_loss(Q_expected, Q_targets)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
import matplotlib.pyplot as plt
def dqn(agent, env, n_episodes=10000, max_t=500, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    scores_window = deque(maxlen=100)
    all_scores = np.array([])
    moving_avg_scores = np.array([])
    eps = eps_start
    for i_episode in range(1, n_episodes+1):
        state,_ = env.reset()
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            next_state, reward, done, truncated,_ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done or truncated:
                break
        scores_window.append(score)
        all_scores = np.append(all_scores, score)
        eps = max(eps_end, eps_decay * eps)
        moving_avg_scores = np.append(moving_avg_scores, np.mean(scores_window))
        if i_episode == 250:
            break
    # plt.plot(np.arange(len(moving_avg_scores)), moving_avg_scores)
    # plt.ylabel('Moving Average Scores')
    # plt.xlabel('Episode #')
    # plt.title('Moving Average Scores over Episodes')
    # plt.show()
    return moving_avg_scores, True

def train_agent(lr, update_every, buffer_size, batch_size):
    env = gym.make('CartPole-v1')
    print(update_every)
    print(buffer_size)
    print(batch_size)
    state_shape = env.observation_space.shape[0]
    action_shape = env.action_space.n
    agent = TutorialAgent1(state_size=state_shape, action_size=action_shape, seed=0, lr=lr,
                           update_every=update_every, buffer_size=buffer_size, batch_size=batch_size)
    all_scores_1, _ = dqn(agent, env)
    regret = 0
    for i in all_scores_1:
        if i > 195:
            break
        else:
            regret += 195 - i
    return regret,all_scores_1

def run_training():
    config_defaults = {
        "lr": 5e-4,
        "update_every": 50,
        "buffer_size": 1e5,
        "batch_size": 64
    }
    config = wandb.init(config=config_defaults, project="dueling_max_cartpole")
    lr = config.config["lr"]
    update_every = config.config['update_every']
    batch_size = config.config["batch_size"]
    buffer_size = config.config["buffer_size"]
    regret,all_scores= train_agent(lr, update_every, buffer_size, batch_size)
    for i in all_scores:
        wandb.log({"mean_scores":i})
    wandb.log({"regret": regret})

sweep_config = {
    "method": "bayes",
    "metric": {"name": "regret", "goal": "minimize"},
    "parameters": {
        "lr": {"min": 1e-5, "max": 1e-2},
        "update_every": {"values": [20, 50, 75, 100]},
        "buffer_size": {"values": [ 1e2,1e3, 1e5]},
        "batch_size": {"values": [32, 64, 128]}
    },
    "project": "dueling_max_cartpole",
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 3,
        "max_iter": 100
    }
}

sweep_id = wandb.sweep(sweep_config)
wandb.agent(sweep_id, function=run_training)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 2ssb7pwi
Sweep URL: https://wandb.ai/rl_shobhith/dueling_max_cartpole/sweeps/2ssb7pwi


[34m[1mwandb[0m: Agent Starting Run: tvr15ov0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.00918765475565439
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mshobhith-v[0m ([33mrl_shobhith[0m). Use [1m`wandb login --relogin`[0m to force relogin


75
100
128
75


  if not isinstance(terminated, (bool, np.bool8)):


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▄▆███▇▇▇▇▆▆▆▆▆▆▆▆▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.71
regret,44571.12264


[34m[1mwandb[0m: Agent Starting Run: qiohigpi with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.006938038829321627
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▆▆▆▆▆▇▇█
regret,▁

0,1
mean_scores,100.84
regret,37680.33069


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j4gnnu3x with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.006782591437216842
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▇▇▇▇▇███
regret,▁

0,1
mean_scores,57.72
regret,39598.70397


[34m[1mwandb[0m: Agent Starting Run: rp2ipcnt with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.006816045466212297
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▆▇█
regret,▁

0,1
mean_scores,98.68
regret,38849.72163


[34m[1mwandb[0m: Agent Starting Run: hyqbp1lg with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.006925787031892141
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▁▁▂▂▃▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▅▆▇██████
regret,▁

0,1
mean_scores,62.68
regret,39803.71671


[34m[1mwandb[0m: Agent Starting Run: riwf3wz0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0023449658940296733
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▆▅▅▆▆▆▇▇██
regret,▁

0,1
mean_scores,90.92
regret,36866.45067


[34m[1mwandb[0m: Agent Starting Run: yua9wj0v with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0017770833987536556
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▅▅▅▅▅▆▆▆▆▆▇▆▆▇▇▇▇▇████
regret,▁

0,1
mean_scores,80.42
regret,36622.98997


[34m[1mwandb[0m: Agent Starting Run: mrfksgxy with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.005877820734947816
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇████
regret,▁

0,1
mean_scores,63.49
regret,39337.54923


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3eu7mdo9 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.006933035606962835
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇▇████
regret,▁

0,1
mean_scores,67.09
regret,39176.33306


[34m[1mwandb[0m: Agent Starting Run: r2i4fu4u with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.005378875846062624
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19432756739909615, max=1.…

0,1
mean_scores,▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█
regret,▁

0,1
mean_scores,74.57
regret,38009.09716


[34m[1mwandb[0m: Agent Starting Run: s0wm734w with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.003505753644476448
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂███▇▇▇▇▇▇▇▆▆▆▅▅▅▅▄▄▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.75
regret,44159.59876


[34m[1mwandb[0m: Agent Starting Run: 6mr6fhw2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0068894966596074235
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▇▇█
regret,▁

0,1
mean_scores,117.36
regret,37021.83016


[34m[1mwandb[0m: Agent Starting Run: 88e77ytj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00952416642990107
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇████
regret,▁

0,1
mean_scores,56.9
regret,39666.5899


[34m[1mwandb[0m: Agent Starting Run: fl354ri5 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.004298634552240046
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,█▇▆▆▅▄▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.97
regret,44130.30673


[34m[1mwandb[0m: Agent Starting Run: iyd2ya3k with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.008935053240910141
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▆▆▇▇▇██
regret,▁

0,1
mean_scores,72.12
regret,39473.04991


[34m[1mwandb[0m: Agent Starting Run: 8dhmmar3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.001462965709725362
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▆▆▆▆▆▇█▇█▇▇▇██
regret,▁

0,1
mean_scores,81.16
regret,36401.27456


[34m[1mwandb[0m: Agent Starting Run: xviev2b3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0050963961615036555
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▇▇█
regret,▁

0,1
mean_scores,65.89
regret,40235.2972


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3ubk0bap with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.002406939170688959
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▆▆▇▇█
regret,▁

0,1
mean_scores,93.44
regret,38747.32825


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ulexvgmz with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.009148315499766548
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▇██
regret,▁

0,1
mean_scores,95.87
regret,38106.70785


[34m[1mwandb[0m: Agent Starting Run: 5ofmrunw with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007764977150560176
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇██
regret,▁

0,1
mean_scores,81.54
regret,38337.75832


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: r1s6h7f7 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.005390258922189162
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▅▅▆▆▇█
regret,▁

0,1
mean_scores,103.01
regret,38460.66287


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j8ndju4v with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009009433725966626
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011455555555504463, max=1.0…

20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇███████
regret,▁

0,1
mean_scores,66.48
regret,37966.86138


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7vgqrpws with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00031625501015446986
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
100000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇██
regret,▁

0,1
mean_scores,221.2
regret,26580.44392


[34m[1mwandb[0m: Agent Starting Run: z32boh5n with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0024789175125948157
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██
regret,▁

0,1
mean_scores,114.12
regret,33929.78352


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jq72cyz8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.004574061458806312
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▅▆▇▇▇█
regret,▁

0,1
mean_scores,98.39
regret,38578.40693


[34m[1mwandb[0m: Agent Starting Run: dkf7ntxb with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.009339707803269388
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂▃▃▃▃▃▃▄▄▄▅▅▆▆▆▇▇▇█
regret,▁

0,1
mean_scores,132.67
regret,36011.20037


[34m[1mwandb[0m: Agent Starting Run: 3i45mpdw with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.001574286575927708
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇██
regret,▁

0,1
mean_scores,144.22
regret,32332.04169


[34m[1mwandb[0m: Agent Starting Run: 6f3i9blh with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007606309598106814
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▄▅▅▅▅▅▅▅▅▄▅▆▅▆▇█
regret,▁

0,1
mean_scores,49.12
regret,40796.82058


[34m[1mwandb[0m: Agent Starting Run: c3afgtmc with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.009637108810894615
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇█
regret,▁

0,1
mean_scores,103.37
regret,36050.98743


[34m[1mwandb[0m: Agent Starting Run: sj7dsg5r with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0018097975171812424
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇█
regret,▁

0,1
mean_scores,103.05
regret,33906.48576


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: et9kikag with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0010655475412679235
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▇▇▇▇█
regret,▁

0,1
mean_scores,224.72
regret,28910.56183


[34m[1mwandb[0m: Agent Starting Run: nla9sl4f with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0013147502758937086
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄█▆▇▆▆▅▅▅▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.77
regret,44556.40969


[34m[1mwandb[0m: Agent Starting Run: ykl1ic5o with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00918412544918712
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▆▇▇▇███
regret,▁

0,1
mean_scores,62.11
regret,38561.43115


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: s608n6x1 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007963574352930521
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▇▇▇▇██
regret,▁

0,1
mean_scores,78.39
regret,39022.65451


[34m[1mwandb[0m: Agent Starting Run: 4yjw9opj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0025221500239376894
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▇▇▇███
regret,▁

0,1
mean_scores,183.93
regret,30620.14321


[34m[1mwandb[0m: Agent Starting Run: hejjs0au with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0027721584516742516
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▆▇▇▇███
regret,▁

0,1
mean_scores,110.8
regret,36561.88529


[34m[1mwandb[0m: Agent Starting Run: 35xlf74s with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.006097682202775469
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▇▇▇██
regret,▁

0,1
mean_scores,83.41
regret,38049.0013


[34m[1mwandb[0m: Agent Starting Run: 7n3dtx5v with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007999803976213794
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▇▇▇▇▇▇▇▇███
regret,▁

0,1
mean_scores,65.01
regret,38905.75826


[34m[1mwandb[0m: Agent Starting Run: 173ctx87 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0021005439662009576
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▆▆▆▇▇▇███▇███▇▇▇▇▇
regret,▁

0,1
mean_scores,75.97
regret,36110.44517


[34m[1mwandb[0m: Agent Starting Run: dmnda3ul with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.004151368370358136
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888844262, max=1.0…

20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇███
regret,▁

0,1
mean_scores,75.69
regret,38265.3769


[34m[1mwandb[0m: Agent Starting Run: g67dfic3 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0018452411459855677
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

100
1000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▄▄▅▅▆▆▆▇▇▇██████████▇
regret,▁

0,1
mean_scores,106.92
regret,32767.61317


[34m[1mwandb[0m: Agent Starting Run: 9k97ovdy with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.009668107676615093
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▇▇█
regret,▁

0,1
mean_scores,104.72
regret,37126.50617


[34m[1mwandb[0m: Agent Starting Run: 7y2qrfak with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.004998409285057371
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▇▇▇█
regret,▁

0,1
mean_scores,85.89
regret,38376.92495


[34m[1mwandb[0m: Agent Starting Run: vj4dy4vt with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0027688187670144413
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▇▇█████▇▇█▇▇▇▇▆▆▆▅▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.52
regret,44421.69028


[34m[1mwandb[0m: Agent Starting Run: ylchs1d5 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.009538498825436236
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█
regret,▁

0,1
mean_scores,105.86
regret,35506.58005


[34m[1mwandb[0m: Agent Starting Run: mri1mzfl with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.005496016737152379
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██▇█████
regret,▁

0,1
mean_scores,58.86
regret,38457.29618


[34m[1mwandb[0m: Agent Starting Run: u7m5vcz0 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.003249520790704531
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

50
100000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,92.18
regret,37332.48583


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9sosxiox with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.009266679911043685
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▃█▇█▇██▇▇▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.27
regret,44387.64825


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5fmbuts5 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00020264560539766505
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▇▇▇▇█
regret,▁

0,1
mean_scores,291.0
regret,28277.74197


[34m[1mwandb[0m: Agent Starting Run: kzgeyqi3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.003381158476360841
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▃▄▃▃▄▄▆▆▇██
regret,▁

0,1
mean_scores,108.64
regret,37240.9276


[34m[1mwandb[0m: Agent Starting Run: msxp371s with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.006417161322546265
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
100000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇██████▇██
regret,▁

0,1
mean_scores,47.76
regret,39852.62841


[34m[1mwandb[0m: Agent Starting Run: 3h0gxq1s with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007769719183809647
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888844262, max=1.0…

75
100000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▆▆▇▇▇▇█████
regret,▁

0,1
mean_scores,62.88
regret,39543.37539


[34m[1mwandb[0m: Agent Starting Run: 9zc6ajsv with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00735691092645845
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▃▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▆▆▆▆▆▇█████
regret,▁

0,1
mean_scores,71.94
regret,38704.94908


[34m[1mwandb[0m: Agent Starting Run: puu55jzd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0008688644914040798
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▆▆▆▇▇▇███
regret,▁

0,1
mean_scores,225.0
regret,28404.62974


[34m[1mwandb[0m: Agent Starting Run: et5bju0e with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0013728493198895129
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888844262, max=1.0…

50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▇▇▇▇▇█
regret,▁

0,1
mean_scores,132.18
regret,34031.4659


[34m[1mwandb[0m: Agent Starting Run: v7cf5xs4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007554368342274598
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▆▇▇▇▇██
regret,▁

0,1
mean_scores,59.82
regret,39454.99979


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: iwo25z1h with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.004610831804723192
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▇▇████
regret,▁

0,1
mean_scores,64.06
regret,39502.88286


[34m[1mwandb[0m: Agent Starting Run: 11bh4di6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.006765954846496239
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▆▇▇▆▇▇▇▇██
regret,▁

0,1
mean_scores,51.69
regret,39335.71862


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sdr3mhsp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.009949311156531929
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
128
100


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▃█▇▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.03
regret,44166.16953


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ua1eut66 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007794020433062061
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▇▇▇▇███
regret,▁

0,1
mean_scores,95.28
regret,37124.3688


[34m[1mwandb[0m: Agent Starting Run: l8e4dyff with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0032461864583683457
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
100000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▅▄▄▄▄▅▅▅▅▅▆▆▆▇▇██
regret,▁

0,1
mean_scores,79.76
regret,37855.62799


[34m[1mwandb[0m: Agent Starting Run: webkeb5m with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.005184521445336861
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▃▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▆▇▆▇▇▇▇▇███
regret,▁

0,1
mean_scores,83.46
regret,36927.70971


[34m[1mwandb[0m: Agent Starting Run: v8lfn4c7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0029317029267489796
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▂▂▂▃▃▄▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇██████
regret,▁

0,1
mean_scores,53.49
regret,39368.38743


[34m[1mwandb[0m: Agent Starting Run: r8wb2457 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007517205302631413
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▃▂▂▂▃▃▃▃▃▃▃▃▃▄▄▅▅▅▅▆▆▆▇▆▇▇▇▇█▇▇▇▆▇▇█
regret,▁

0,1
mean_scores,51.56
regret,39864.7587


[34m[1mwandb[0m: Agent Starting Run: z43nfyqk with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009683405030071672
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

100
100000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇██████
regret,▁

0,1
mean_scores,54.36
regret,39829.83875


[34m[1mwandb[0m: Agent Starting Run: 03h34qbq with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0006881788174093433
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

20
100
128
20


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded\r'), FloatProgress(value=0.0, max=1.0)))

0,1
mean_scores,▅▇▇█▇▇▇▇▇▆▆▆▆▆▆▆▆▅▅▄▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
regret,▁

0,1
mean_scores,11.69
regret,44538.97502


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0gvo7dzp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0006593634202518402
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▆▆▆▇▇▇█████▇▇▇▇█
regret,▁

0,1
mean_scores,140.3
regret,30192.05769


[34m[1mwandb[0m: Agent Starting Run: 9co4ila7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0002082086154799102
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▇▇██
regret,▁

0,1
mean_scores,202.22
regret,30707.83324


[34m[1mwandb[0m: Agent Starting Run: pgsov2wj with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 3.157134607674301e-05
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▇█▆▅▅▅▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.63
regret,44759.27786


[34m[1mwandb[0m: Agent Starting Run: 6ubroteh with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.008151859576608477
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▇███
regret,▁

0,1
mean_scores,86.95
regret,38469.09409


[34m[1mwandb[0m: Agent Starting Run: alxqb5o6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.00496905020679698
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
128
75


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▇█▇▇▇▇▆▆▆▆▆▆▆▆▆▅▅▄▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.23
regret,44308.72132


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wabjlr5h with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0013033705073242972
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

20
100000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▆▆▇▇▇▇▇▇██████
regret,▁

0,1
mean_scores,156.19
regret,28393.76361


[34m[1mwandb[0m: Agent Starting Run: 96chn0h1 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0053178248797539004
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▃▂▂▂▃▃▄▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇██████
regret,▁

0,1
mean_scores,54.41
regret,39001.75528


[34m[1mwandb[0m: Agent Starting Run: b3o1puwp with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008879103921774533
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▅▆▇▇▆▆▇▇▇███
regret,▁

0,1
mean_scores,109.02
regret,35434.55251


[34m[1mwandb[0m: Agent Starting Run: yu81gxn0 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.004698962198173614
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▇▇██
regret,▁

0,1
mean_scores,148.7
regret,33473.93


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d1aktqv4 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.006983119391112843
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▇██▇▇▇▇▇▆▆▇▆▆▆▆▆▅▅▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.84
regret,44512.62285


[34m[1mwandb[0m: Agent Starting Run: uija8foi with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0004864935386964952
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▅▅▅▆▆▆▆▇▇▇███
regret,▁

0,1
mean_scores,243.91
regret,26738.21178


[34m[1mwandb[0m: Agent Starting Run: z25hqv8q with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0004027346257691348
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▇▇██
regret,▁

0,1
mean_scores,275.05
regret,27828.00997


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b7f6yjfo with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.004015673259557164
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▅▆▆▇▇██
regret,▁

0,1
mean_scores,106.94
regret,37561.65255


[34m[1mwandb[0m: Agent Starting Run: dszrzt2m with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0015408773647575366
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▃▂▃▃▃▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▆▆▆▇▇▇█
regret,▁

0,1
mean_scores,79.37
regret,35921.46338


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: uwtuq483 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.008429307255276516
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▅▅▆▇▇▇███
regret,▁

0,1
mean_scores,74.97
regret,39168.44676


[34m[1mwandb[0m: Agent Starting Run: d542btjj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.005706373221212562
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▁▂▃▃▃▃▃▃▃▄▄▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇█████████
regret,▁

0,1
mean_scores,56.63
regret,38656.96191


[34m[1mwandb[0m: Agent Starting Run: vmjs08j3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00514788704564452
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▅▆▆▆▆▇▇▇▇▇▇▇█████
regret,▁

0,1
mean_scores,56.01
regret,39099.66894


[34m[1mwandb[0m: Agent Starting Run: 1g407ra0 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0018169240524684004
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888844262, max=1.0…

50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▇▇██████▇██
regret,▁

0,1
mean_scores,101.34
regret,34905.98258


[34m[1mwandb[0m: Agent Starting Run: yrx9k15z with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00199852960706272
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▃▄▄▄▄▄▄▄▄▄▄▄▄▅▆▇█
regret,▁

0,1
mean_scores,92.54
regret,38691.97028


[34m[1mwandb[0m: Agent Starting Run: b42tuw69 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008903662015723408
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

20
100
128
20


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▇▇█▇▇▇▇▆▆▇▇▆▆▆▆▆▅▅▅▄▄▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.78
regret,44470.55065


[34m[1mwandb[0m: Agent Starting Run: cpcsflia with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0025577113244419397
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

50
100000
32
50


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded\r'), FloatProgress(value=0.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇███
regret,▁

0,1
mean_scores,89.27
regret,36115.31646


[34m[1mwandb[0m: Agent Starting Run: 7vez5uyi with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00486490594692939
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▇▇██
regret,▁

0,1
mean_scores,148.48
regret,34454.93837


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f1r00ksd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.005074106893798435
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█▇▇▇███
regret,▁

0,1
mean_scores,60.8
regret,38283.95065


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l8agn0we with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007631672449631111
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
100000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▅▅▅▆▇▇▇███
regret,▁

0,1
mean_scores,102.57
regret,37714.62796


[34m[1mwandb[0m: Agent Starting Run: n0gucyb6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008659289115882192
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇██
regret,▁

0,1
mean_scores,112.29
regret,36310.92511


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5hmw1ojo with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.003625978050340634
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▆██▇▇▇▆▆▆▇▇▇▇▇▆▆▆▅▅▅▅▅▄▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁
regret,▁

0,1
mean_scores,12.11
regret,44574.76003


[34m[1mwandb[0m: Agent Starting Run: 17tlgb3p with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.007328407306337308
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇██
regret,▁

0,1
mean_scores,81.52
regret,36667.54318


[34m[1mwandb[0m: Agent Starting Run: pju5f55t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.004076663723194383
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▁▁▂▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,129.97
regret,34780.01481


[34m[1mwandb[0m: Agent Starting Run: 8d55w5db with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009562433458878626
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▃▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇██
regret,▁

0,1
mean_scores,70.8
regret,39565.641


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2u2p6urs with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.008080267482168968
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇███
regret,▁

0,1
mean_scores,57.95
regret,40349.48365


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h6b1p15n with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0018733652970474664
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888844262, max=1.0…

75
100
128
75


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▆▇▆▇▆█▇███▇▇▇▇▇▆▆▆▆▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁
regret,▁

0,1
mean_scores,11.89
regret,44568.92999


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pqaj1rx3 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.003048478063493847
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▇▇██
regret,▁

0,1
mean_scores,94.1
regret,37377.5676


[34m[1mwandb[0m: Agent Starting Run: gn142kdj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.009985619222092243
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▅▇▇█▇▇▇▆▆▇▇▇▇▇▇██▇▆▆▅▅▅▆▆▆▆▆▆▅▅▅▅▆█
regret,▁

0,1
mean_scores,34.5
regret,41670.493


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: exvjensm with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0008220874453105529
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
100
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄███▇▇▆▆▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.28
regret,44261.6985


[34m[1mwandb[0m: Agent Starting Run: uwsrvhqu with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.009867123481776978
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

75
100
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂██▇▇▇█▇▇▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.85
regret,44322.43865


[34m[1mwandb[0m: Agent Starting Run: byhbzckm with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0010106020339834892
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▆▆▇▇▇▇▇▇▇▇███
regret,▁

0,1
mean_scores,121.43
regret,33214.49072


[34m[1mwandb[0m: Agent Starting Run: e2ff6yrg with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0097008198557158
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▇▇▇███
regret,▁

0,1
mean_scores,70.96
regret,38883.1509


[34m[1mwandb[0m: Agent Starting Run: n66bmjy6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.002393722347585692
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇█
regret,▁

0,1
mean_scores,82.53
regret,37896.32726


[34m[1mwandb[0m: Agent Starting Run: twm49rcj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0014973772789165043
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄█▇▆▇▇▇█▇▇▇▇▇▇▇▇▇▆▆▆▆▅▅▄▄▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁
regret,▁

0,1
mean_scores,12.19
regret,44497.97024


[34m[1mwandb[0m: Agent Starting Run: ejtpxg25 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0033020902196435387
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▅▅▄▅▅▅▆▆▇▇███
regret,▁

0,1
mean_scores,78.69
regret,38130.89778


[34m[1mwandb[0m: Agent Starting Run: rl6dwmg9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0010327115514178445
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▅▅▆▇▇█
regret,▁

0,1
mean_scores,156.01
regret,32957.81486


[34m[1mwandb[0m: Agent Starting Run: 48zym371 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0007271297109118611
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇████
regret,▁

0,1
mean_scores,194.81
regret,28162.11454


[34m[1mwandb[0m: Agent Starting Run: l2i98er6 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.006989231288664099
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▄▅▅▅▅▅▆▆▆▆▆▇▇▇████
regret,▁

0,1
mean_scores,74.63
regret,38170.69355


[34m[1mwandb[0m: Agent Starting Run: ubz481m3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008768873885618879
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇██
regret,▁

0,1
mean_scores,91.32
regret,37211.55296


[34m[1mwandb[0m: Agent Starting Run: ggjems3v with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009772698052466128
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇███
regret,▁

0,1
mean_scores,63.26
regret,39326.68399


[34m[1mwandb[0m: Agent Starting Run: oee6vr6l with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0002611997026223461
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▇▇▇█
regret,▁

0,1
mean_scores,283.58
regret,25772.09506


[34m[1mwandb[0m: Agent Starting Run: wvabtrao with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0068759114573321905
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▆▇██
regret,▁

0,1
mean_scores,116.42
regret,36933.56614


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vsss5uon with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0046854238822515056
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▇▇█
regret,▁

0,1
mean_scores,169.94
regret,34049.78249


[34m[1mwandb[0m: Agent Starting Run: oup5d6aq with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.00012477066265248653
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100
128
50


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▃▆██▇▇▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.91
regret,44191.21095


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j106hjm5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0032771622323862137
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
100
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▇▇████
regret,▁

0,1
mean_scores,129.76
regret,33302.0617


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gdl4ucpb with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0028373038334926245
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇█████
regret,▁

0,1
mean_scores,103.49
regret,34388.00981


[34m[1mwandb[0m: Agent Starting Run: tlpmlv6l with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.00332503305307313
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▇▇██
regret,▁

0,1
mean_scores,142.68
regret,33944.19036


[34m[1mwandb[0m: Agent Starting Run: jlx4isgm with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.005553319637896525
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▄▄▄▄▄▅▅▅▅▆▇▇▇██
regret,▁

0,1
mean_scores,62.06
regret,39471.46294


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sprub3f7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.002232161597644309
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▇▇██
regret,▁

0,1
mean_scores,154.83
regret,33243.18762


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 85fv59fh with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.007189596600735885
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▇▇██
regret,▁

0,1
mean_scores,105.98
regret,36589.13298


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 13ktqsm6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.004163411586163366
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▃▃▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇████
regret,▁

0,1
mean_scores,70.08
regret,38793.59224


[34m[1mwandb[0m: Agent Starting Run: c419e7yi with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.001667585034462614
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇███
regret,▁

0,1
mean_scores,76.55
regret,37284.32413


[34m[1mwandb[0m: Agent Starting Run: k1bikyvy with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0003697723054254365
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▇▇███
regret,▁

0,1
mean_scores,253.42
regret,23755.96767


[34m[1mwandb[0m: Agent Starting Run: lfteeddj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00466490805953428
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▄▄▄▃▃▃▃▃▃▃▄▄▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▇███
regret,▁

0,1
mean_scores,63.3
regret,39375.40395


[34m[1mwandb[0m: Agent Starting Run: h9bwjeh9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008748098784256118
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

75
100
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▆▆▇▇▇▇███
regret,▁

0,1
mean_scores,140.39
regret,33992.52005


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0quj9y0s with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008146011340644636
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
100
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▅▅▅▅▆▇▇▇▇██
regret,▁

0,1
mean_scores,176.64
regret,32037.06272


[34m[1mwandb[0m: Agent Starting Run: 3wle3x3a with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.000924456007005696
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇█████▇▇▇▇
regret,▁

0,1
mean_scores,120.93
regret,30930.75643


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1n1fgcg7 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.005464345567594092
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▅▅▅▅▅▅▄▅▆▆▆▇▇▇▇▇▇██
regret,▁

0,1
mean_scores,73.97
regret,38021.30788


[34m[1mwandb[0m: Agent Starting Run: 25levhz5 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0033786089297287124
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇███
regret,▁

0,1
mean_scores,78.41
regret,37750.34705


[34m[1mwandb[0m: Agent Starting Run: 0qhtlmaw with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.005541761949314899
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇█
regret,▁

0,1
mean_scores,141.8
regret,34292.27052


[34m[1mwandb[0m: Agent Starting Run: jtfricsz with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.004096102607908466
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▇▇██
regret,▁

0,1
mean_scores,121.94
regret,35968.60889


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h0bt0nnk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0018649979008181067
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
100
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄█▆▆▆▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.76
regret,44345.59208


[34m[1mwandb[0m: Agent Starting Run: rhqcvqfm with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0017826707726327717
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▂▂▂▃▃▄▃▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▅▆▆▆▆▆▆▆▇▇▇█
regret,▁

0,1
mean_scores,59.34
regret,38783.32365


[34m[1mwandb[0m: Agent Starting Run: 0bj6an6u with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007322553691101912
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
1000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇██
regret,▁

0,1
mean_scores,82.88
regret,37617.74704


[34m[1mwandb[0m: Agent Starting Run: ic8yk1w9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00840754117293834
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▇▇▇██
regret,▁

0,1
mean_scores,119.45
regret,37091.96078


[34m[1mwandb[0m: Agent Starting Run: 8m9gimb1 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.005736129576003145
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
1000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇█
regret,▁

0,1
mean_scores,68.43
regret,39571.70116


[34m[1mwandb[0m: Agent Starting Run: 7ficrgqp with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.005126474197598268
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇███
regret,▁

0,1
mean_scores,67.81
regret,38769.09583


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cepak1vt with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0036654710875924424
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▇▇▇██
regret,▁

0,1
mean_scores,111.34
regret,36653.61178


[34m[1mwandb[0m: Agent Starting Run: cfrhp0ow with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.004128703716909184
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▇▇▇█▇▇███
regret,▁

0,1
mean_scores,53.92
regret,39671.08317


[34m[1mwandb[0m: Agent Starting Run: 366qkfst with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00932376078999943
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█▇█
regret,▁

0,1
mean_scores,57.41
regret,39366.08862


[34m[1mwandb[0m: Agent Starting Run: 8tjlxucj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0029971888447256614
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

75
100000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▆▇▇▇██
regret,▁

0,1
mean_scores,65.62
regret,38152.4757


[34m[1mwandb[0m: Agent Starting Run: 0iu7z79e with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.003837692063529374
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,126.99
regret,34100.74633


[34m[1mwandb[0m: Agent Starting Run: r1nforyr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0040806447483608035
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████▇
regret,▁

0,1
mean_scores,111.11
regret,33830.5742


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8pbrhdpy with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008412448662019045
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇█
regret,▁

0,1
mean_scores,117.85
regret,35907.91137


[34m[1mwandb[0m: Agent Starting Run: 130yuww3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008356644811895717
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▆█▇▇▆▆▆▅▅▅▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.79
regret,44382.93899


[34m[1mwandb[0m: Agent Starting Run: a4yn7u0j with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.006366149296340714
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
100
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄███▇▇▆▆▆▆▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.52
regret,44503.64265


[34m[1mwandb[0m: Agent Starting Run: a5f9gmr0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0034705842645895114
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▅▅▇▇████
regret,▁

0,1
mean_scores,136.62
regret,35561.94291


[34m[1mwandb[0m: Agent Starting Run: evl0ai50 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00025252950833014277
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,238.3
regret,26163.08346


[34m[1mwandb[0m: Agent Starting Run: sowivtfz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0007417687260839119
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
regret,▁

0,1
mean_scores,213.26
regret,27852.86614


[34m[1mwandb[0m: Agent Starting Run: 610u8qjm with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0024913318518587097
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
regret,▁

0,1
mean_scores,97.74
regret,35783.44413


[34m[1mwandb[0m: Agent Starting Run: glwvdk7x with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.002432776877811402
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

20
1000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▇▇█
regret,▁

0,1
mean_scores,108.7
regret,36646.4527


[34m[1mwandb[0m: Agent Starting Run: rz1kjh7w with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.004728419721482625
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

20
100
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▅█▇▇▇▆▆▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.95
regret,44082.377


[34m[1mwandb[0m: Agent Starting Run: v5582617 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.004991763437935504
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂███▇▇▆▆▆▆▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.89
regret,44374.1838


[34m[1mwandb[0m: Agent Starting Run: 76ku45d9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.000914804272466021
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▅▅▅▆▆▆▆▇▇▇▇█
regret,▁

0,1
mean_scores,231.92
regret,28911.51199


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tbs03kii with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0019667306163714446
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▆▆▆▇▇▇▇██
regret,▁

0,1
mean_scores,95.79
regret,36814.86337


[34m[1mwandb[0m: Agent Starting Run: rjpjgjcn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0056382318761760185
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▃▄▄▅▅▅▆▆▆▇▇█
regret,▁

0,1
mean_scores,79.86
regret,39246.15174


[34m[1mwandb[0m: Agent Starting Run: tg1yb1ky with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.001266164978792679
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇▇████
regret,▁

0,1
mean_scores,168.71
regret,29807.75175


[34m[1mwandb[0m: Agent Starting Run: txeid0jx with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007178203736571228
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▃▄▄▄▄▃▄▄▄▅▅▅▅▅▅▅▅▅▅▇▇█▇███████████▇▇▇
regret,▁

0,1
mean_scores,42.32
regret,39878.49109


[34m[1mwandb[0m: Agent Starting Run: vi2v7rhe with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0025488504348795326
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇█▇███
regret,▁

0,1
mean_scores,168.33
regret,29799.01105


[34m[1mwandb[0m: Agent Starting Run: 8oqmf9zw with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00502186838411891
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇███
regret,▁

0,1
mean_scores,83.95
regret,37613.62741


[34m[1mwandb[0m: Agent Starting Run: pmlta0wy with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0046592440431185685
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▇▇▇▇█
regret,▁

0,1
mean_scores,123.43
regret,35092.56465


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5u017iob with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0001632622234884345
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▇▇▇███
regret,▁

0,1
mean_scores,240.97
regret,27171.47481


[34m[1mwandb[0m: Agent Starting Run: hfuf431d with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0009595916069725244
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇████
regret,▁

0,1
mean_scores,122.04
regret,31917.3733


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jp4jbxst with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00969258324823211
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇█▇█
regret,▁

0,1
mean_scores,56.45
regret,39777.5109


[34m[1mwandb[0m: Agent Starting Run: t9wuj2jz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 7.125592099171656e-05
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▄▄▅▆▆▇▇█
regret,▁

0,1
mean_scores,99.67
regret,41017.02702


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8zltcb4a with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0007529800848825589
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇███
regret,▁

0,1
mean_scores,217.56
regret,28924.59777


[34m[1mwandb[0m: Agent Starting Run: 5k358uru with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.002414293827893998
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▇▇▇███
regret,▁

0,1
mean_scores,129.12
regret,34376.23569


[34m[1mwandb[0m: Agent Starting Run: 0wbj31nk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007097955627249845
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▆▇▇▇▇██
regret,▁

0,1
mean_scores,93.21
regret,37341.88585


[34m[1mwandb[0m: Agent Starting Run: 27kytb6z with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0033851372393312626
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▇▇▇▇██
regret,▁

0,1
mean_scores,129.68
regret,34639.93306


[34m[1mwandb[0m: Agent Starting Run: 1wqngpc2 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0010543150487297226
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇████
regret,▁

0,1
mean_scores,108.07
regret,34470.32601


[34m[1mwandb[0m: Agent Starting Run: vvo9ndwo with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0006047660201799364
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇█
regret,▁

0,1
mean_scores,244.72
regret,27516.58015


[34m[1mwandb[0m: Agent Starting Run: qprbqtq2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.006295313741733877
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▇▇██
regret,▁

0,1
mean_scores,139.09
regret,34814.77565


[34m[1mwandb[0m: Agent Starting Run: an3aac0z with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.008916164464892481
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
128
50
