In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import gym
import numpy as np
import random
from collections import namedtuple, deque
import wandb

GAMMA=0.99

class QNetwork1(nn.Module):
    def __init__(self, state_size, action_size, seed, fc1_units=128, fc2_units=64):
        super(QNetwork1, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fc1 = nn.Linear(state_size, fc1_units)
        self.fc2 = nn.Linear(fc1_units, fc2_units)
        self.fc_advantage = nn.Linear(fc2_units, action_size)
        self.fc_value = nn.Linear(fc2_units, 1)

    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        value = self.fc_value(x)
        advantage = self.fc_advantage(x)
        Q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
        return Q_values
    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class ReplayBuffer:
    def __init__(self, action_size, buffer_size, batch_size, seed):
        self.action_size = action_size
        self.memory = deque(maxlen=buffer_size)
        self.batch_size = batch_size
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(seed)

    def add(self, state, action, reward, next_state, done):
        e = self.experience(state, action, reward, next_state, done)
        self.memory.append(e)

    def sample(self):
        experiences = random.sample(self.memory, k=self.batch_size)
        states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
        actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).long().to(device)
        rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
        next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
        dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)
        return (states, actions, rewards, next_states, dones)

    def __len__(self):
        return len(self.memory)

class TutorialAgent1:
    def __init__(self, state_size, action_size, seed, lr, update_every, buffer_size, batch_size):
        self.state_size = state_size
        print(update_every)
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.qnetwork_local = QNetwork1(state_size, action_size, seed).to(device)
        self.qnetwork_target = QNetwork1(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=lr)
        self.memory = ReplayBuffer(action_size, buffer_size, batch_size, seed)
        self.t_step = 0
        self.update_every = update_every
        self.batch_size = batch_size

    def step(self, state, action, reward, next_state, done):
        self.memory.add(state, action, reward, next_state, done)
        if len(self.memory) >= self.batch_size:
            experiences = self.memory.sample()
            self.learn(experiences, GAMMA)
        self.t_step = (self.t_step + 1) % self.update_every
        if self.t_step == 0:
            self.qnetwork_target.load_state_dict(self.qnetwork_local.state_dict())

    def act(self, state, eps=0.):
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        self.qnetwork_local.eval()
        with torch.no_grad():
            action_values = self.qnetwork_local(state)
        self.qnetwork_local.train()
        if random.random() > eps:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))

    def learn(self, experiences, gamma):
        states, actions, rewards, next_states, dones = experiences
        Q_targets_next = self.qnetwork_target(next_states)
        next_state_values = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)
        Q_targets = rewards + (gamma * next_state_values * (1 - dones))
        Q_expected = self.qnetwork_local(states).gather(1, actions)
        loss = F.mse_loss(Q_expected, Q_targets)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
import matplotlib.pyplot as plt
def dqn(agent, env, n_episodes=10000, max_t=500, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    scores_window = deque(maxlen=100)
    all_scores = np.array([])
    moving_avg_scores = np.array([])
    eps = eps_start
    for i_episode in range(1, n_episodes+1):
        state,_ = env.reset()
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            next_state, reward, done, truncated,_ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done or truncated:
                break
        scores_window.append(score)
        all_scores = np.append(all_scores, score)
        eps = max(eps_end, eps_decay * eps)
        moving_avg_scores = np.append(moving_avg_scores, np.mean(scores_window))
        if i_episode == 250:
            break
    # plt.plot(np.arange(len(moving_avg_scores)), moving_avg_scores)
    # plt.ylabel('Moving Average Scores')
    # plt.xlabel('Episode #')
    # plt.title('Moving Average Scores over Episodes')
    # plt.show()
    return moving_avg_scores, True

def train_agent(lr, update_every, buffer_size, batch_size):
    env = gym.make('CartPole-v1')
    print(update_every)
    print(buffer_size)
    print(batch_size)
    state_shape = env.observation_space.shape[0]
    action_shape = env.action_space.n
    agent = TutorialAgent1(state_size=state_shape, action_size=action_shape, seed=0, lr=lr,
                           update_every=update_every, buffer_size=buffer_size, batch_size=batch_size)
    all_scores_1, _ = dqn(agent, env)
    regret = 0
    for i in all_scores_1:
        if i > 195:
            break
        else:
            regret += 195 - i
    return regret,all_scores_1

def run_training():
    config_defaults = {
        "lr": 5e-4,
        "update_every": 50,
        "buffer_size": 1e5,
        "batch_size": 64
    }
    config = wandb.init(config=config_defaults, project="dueling_mean_cartpole_new")
    lr = config.config["lr"]
    update_every = config.config['update_every']
    batch_size = config.config["batch_size"]
    buffer_size = config.config["buffer_size"]
    regret,all_scores= train_agent(lr, update_every, buffer_size, batch_size)
    for i in all_scores:
        wandb.log({"mean_scores":i})
    wandb.log({"regret": regret})

sweep_config = {
    "method": "bayes",
    "metric": {"name": "regret", "goal": "minimize"},
    "parameters": {
        "lr": {"min": 1e-5, "max": 1e-2},
        "update_every": {"values": [20, 50, 75, 100]},
        "buffer_size": {"values": [ 1e2,1e3, 1e5]},
        "batch_size": {"values": [32, 64, 128]}
    },
    "project": "dueling_mean_cartpole_new",
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 3,
        "max_iter": 100
    }
}

sweep_id = wandb.sweep(sweep_config)
wandb.agent(sweep_id, function=run_training)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: i3l5pda3
Sweep URL: https://wandb.ai/rl_shobhith/dueling_mean_cartpole_new/sweeps/i3l5pda3


[34m[1mwandb[0m: Agent Starting Run: ydd4tm6h with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0006851826099915179
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mshobhith-v[0m ([33mrl_shobhith[0m). Use [1m`wandb login --relogin`[0m to force relogin


20
1000
64
20


  if not isinstance(terminated, (bool, np.bool8)):


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▅▅▅▅▆▆▆▇▇▇███
regret,▁

0,1
mean_scores,203.64
regret,28980.01009


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rt5mngc3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0032114565270325514
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▃▆██▇▇▇▇▆▆▆▇▆▆▆▆▆▅▅▄▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
regret,▁

0,1
mean_scores,11.98
regret,44529.23154


[34m[1mwandb[0m: Agent Starting Run: 1sq8baai with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.008704841934067646
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▃▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇█▇▇▇▇▇
regret,▁

0,1
mean_scores,55.19
regret,39821.07025


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yd6m4v3k with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.002345010618645681
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇████▇▇
regret,▁

0,1
mean_scores,49.5
regret,38964.9253


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: en1zp7u1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.004465484605114804
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
regret,▁

0,1
mean_scores,58.67
regret,39097.86398


[34m[1mwandb[0m: Agent Starting Run: o01a4ibb with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00723971295164426
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▃▄▄▄▄▅▅▅▆▆▆▆▇▇█
regret,▁

0,1
mean_scores,56.38
regret,40339.59412


[34m[1mwandb[0m: Agent Starting Run: shokchq8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.001777142252491941
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▄▅▅▅▆▆▆▆▆▆▆▆▆█
regret,▁

0,1
mean_scores,85.09
regret,38008.62287


[34m[1mwandb[0m: Agent Starting Run: kkb8w32y with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0002325893627361668
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▅▅▆▆▆▇▇██
regret,▁

0,1
mean_scores,262.53
regret,27450.37947


[34m[1mwandb[0m: Agent Starting Run: d2920c55 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.00751447191619619
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▅▆▆▇▇▇▇█
regret,▁

0,1
mean_scores,103.91
regret,37065.50347


[34m[1mwandb[0m: Agent Starting Run: vfw60obe with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009658203236841696
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▃▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▅▆▆▆▇▇██
regret,▁

0,1
mean_scores,86.39
regret,38550.36704


[34m[1mwandb[0m: Agent Starting Run: pn5s4cmj with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008496605554377486
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇█
regret,▁

0,1
mean_scores,108.92
regret,36756.49289


[34m[1mwandb[0m: Agent Starting Run: ke2dlrhc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.003396786365105716
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

50
1000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▃▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▅▅▆▆▆▇▇▆▇▇█
regret,▁

0,1
mean_scores,56.76
regret,39509.08865


[34m[1mwandb[0m: Agent Starting Run: 52rvyd1g with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0005151386353916433
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▆▆▆▇▇▇██████
regret,▁

0,1
mean_scores,193.84
regret,26517.80913


[34m[1mwandb[0m: Agent Starting Run: yvjkgas4 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0013449326679941954
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂█▇▆▆▆▅▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▁▂▁▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.04
regret,44447.15858


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: euedi5z3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.009190858869097618
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▆▆▇▇▇█
regret,▁

0,1
mean_scores,85.67
regret,39746.6689


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ns9gb3gz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007739190682329302
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

20
1000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▅▅▇█
regret,▁

0,1
mean_scores,98.96
regret,40161.52323


[34m[1mwandb[0m: Agent Starting Run: 3tiyvne2 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.006173134461609443
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇█████
regret,▁

0,1
mean_scores,46.14
regret,40696.88639


[34m[1mwandb[0m: Agent Starting Run: y7tvblv5 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0006514616147518169
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▇▇██
regret,▁

0,1
mean_scores,209.23
regret,29080.02891


[34m[1mwandb[0m: Agent Starting Run: 655qbbg9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.004469333760464303
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▆▇███
regret,▁

0,1
mean_scores,96.46
regret,38177.96226


[34m[1mwandb[0m: Agent Starting Run: hih5meja with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0007997395274187735
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
128
50


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▇█▇▇▇▇▇▇▆▇▇▇▆▇▆▆▆▆▅▅▅▄▄▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.66
regret,44312.12195


[34m[1mwandb[0m: Agent Starting Run: o0ciiee4 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.005225729024239809
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
128
75


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▃█▆▇▇▇█▇▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.18
regret,44291.9706


[34m[1mwandb[0m: Agent Starting Run: si7ycj56 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0017823653174291233
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▆▆▇▇▇▇█
regret,▁

0,1
mean_scores,141.2
regret,34401.66861


[34m[1mwandb[0m: Agent Starting Run: xo91c9vu with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.002825991976609428
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇███
regret,▁

0,1
mean_scores,78.68
regret,37677.88122


[34m[1mwandb[0m: Agent Starting Run: e1ihvrf7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0019417509628047432
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▇▇▆▇▇███
regret,▁

0,1
mean_scores,75.44
regret,38078.2794


[34m[1mwandb[0m: Agent Starting Run: fmyxapza with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.003371481855079034
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888844262, max=1.0…

20
100
128
20


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄█▇▇▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.07
regret,44226.38378


[34m[1mwandb[0m: Agent Starting Run: 1hp9r7wc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0007390511850477141
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▆▆▇▇█
regret,▁

0,1
mean_scores,166.3
regret,31141.96351


[34m[1mwandb[0m: Agent Starting Run: d43obbtr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00017357913722509247
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▆▆▆▇▇██
regret,▁

0,1
mean_scores,310.22
regret,27622.82926


[34m[1mwandb[0m: Agent Starting Run: a2b0o9ds with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.002110195074195025
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▅▅▆▆▇▇█████
regret,▁

0,1
mean_scores,162.21
regret,31479.72735


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ieve7rh9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.001730343427321439
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▇▇▇▇██
regret,▁

0,1
mean_scores,136.15
regret,34356.80053


[34m[1mwandb[0m: Agent Starting Run: h9eb81zt with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.001731824434925777
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▇▇█
regret,▁

0,1
mean_scores,161.64
regret,32825.38895


[34m[1mwandb[0m: Agent Starting Run: o2s6ot5r with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0019382809502013667
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇█
regret,▁

0,1
mean_scores,109.02
regret,35095.08801


[34m[1mwandb[0m: Agent Starting Run: r8g1fvrj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.006133617838882902
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇████▇▇█▇
regret,▁

0,1
mean_scores,60.79
regret,38889.51235


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yxrj0q1q with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007450825810098412
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▆▆▆▇▇▇▇█████▇▇▆
regret,▁

0,1
mean_scores,56.27
regret,38637.64906


[34m[1mwandb[0m: Agent Starting Run: 2jq5hn0q with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0020558235424734273
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇█▇█
regret,▁

0,1
mean_scores,75.64
regret,37548.29856


[34m[1mwandb[0m: Agent Starting Run: j75s4osn with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0027058168837618603
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▄▄▅▅▅▅▅▅▅▅▅▆▆▇▇▇██
regret,▁

0,1
mean_scores,57.7
regret,39718.90779


[34m[1mwandb[0m: Agent Starting Run: ba6lftz2 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.007021718709497665
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▃▆▇████▇▇▇▇▆▆▆▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.71
regret,44298.65122


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zt0yt3gw with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.009220391722586362
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
100
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▇▇▇██
regret,▁

0,1
mean_scores,121.76
regret,36281.05253


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: inr02oln with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.009398901464181111
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▇█
regret,▁

0,1
mean_scores,112.99
regret,37807.79226


[34m[1mwandb[0m: Agent Starting Run: n8zmqkbf with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.006527239450863962
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▃▃▄▅▅▅▅▅▅▆▆▇▇██
regret,▁

0,1
mean_scores,66.86
regret,39563.54119


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 96i6kiw7 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0027252979683216015
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▆▇▇▇████
regret,▁

0,1
mean_scores,118.8
regret,34490.5362


[34m[1mwandb[0m: Agent Starting Run: wo7uaka4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0033167296376270163
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▆▅▅▆▆▇▇▇▇██
regret,▁

0,1
mean_scores,66.25
regret,39181.52773


[34m[1mwandb[0m: Agent Starting Run: symtk4no with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0043403348846601285
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▃▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▆▆▇▇▇▇████
regret,▁

0,1
mean_scores,78.99
regret,37870.62121


[34m[1mwandb[0m: Agent Starting Run: fmydav0y with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.004926805007310783
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▃▃▃▄▄▄▅▅▆▇▇▇▇██
regret,▁

0,1
mean_scores,50.77
regret,41187.25102


[34m[1mwandb[0m: Agent Starting Run: icfxjpto with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0011150875533473515
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,206.12
regret,28207.10725


[34m[1mwandb[0m: Agent Starting Run: 9jyli1cu with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.000912189313615243
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▆▆▇▇▇▇▇█
regret,▁

0,1
mean_scores,180.12
regret,30991.06007


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gewpm8wk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0014856430896902147
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
1000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇██
regret,▁

0,1
mean_scores,110.03
regret,35348.70945


[34m[1mwandb[0m: Agent Starting Run: 2jqf17nd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0008801592519657341
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,177.81
regret,31276.72669


[34m[1mwandb[0m: Agent Starting Run: s82i69ku with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0010314618834549852
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▇▆▆▇▇▇▇█
regret,▁

0,1
mean_scores,121.09
regret,34351.75193


[34m[1mwandb[0m: Agent Starting Run: glmrdf6s with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.008667209863158887
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
100000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▆▇█████
regret,▁

0,1
mean_scores,54.1
regret,39705.34982


[34m[1mwandb[0m: Agent Starting Run: c3ogm36s with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0005799344642108979
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇██
regret,▁

0,1
mean_scores,242.37
regret,26336.12334


[34m[1mwandb[0m: Agent Starting Run: pl1648k3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009032338003313916
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇██████
regret,▁

0,1
mean_scores,56.65
regret,39498.89914


[34m[1mwandb[0m: Agent Starting Run: ggcnph54 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007556615511705749
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▅▅▆▇███
regret,▁

0,1
mean_scores,107.76
regret,38042.61911


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c883vrhg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.008260886166014331
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888844262, max=1.0…

20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▁▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▇▇▇▇▇█████
regret,▁

0,1
mean_scores,73.97
regret,37538.23435


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nu969y6j with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007917211505474923
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,55.64
regret,39883.53872


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0fiu3ooz with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.002385700189458746
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▄▄▅▅▅▅▆▆▆▇▇▇█
regret,▁

0,1
mean_scores,116.95
regret,35217.01095


[34m[1mwandb[0m: Agent Starting Run: 7bh2jyg7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.001392286387403521
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▅▅▆▅▅▅▅▅▆▆▇▇█
regret,▁

0,1
mean_scores,93.16
regret,36354.83743


[34m[1mwandb[0m: Agent Starting Run: 1nmukub7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009525763411473352
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▂▂▂▂▂▂▃▃▃▃▃▃▄▅▅▆▇██
regret,▁

0,1
mean_scores,61.95
regret,40764.2535


[34m[1mwandb[0m: Agent Starting Run: qpzcuxqi with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.009851190153139283
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▅▆▆▆▆▆▆▇▇███████
regret,▁

0,1
mean_scores,49.24
regret,40718.75222


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ilxgosb7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.00901774052885355
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇██
regret,▁

0,1
mean_scores,71.17
regret,38796.94878


[34m[1mwandb[0m: Agent Starting Run: nftlpqdn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.005432454061313299
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▃▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▆▆▇███
regret,▁

0,1
mean_scores,52.91
regret,40043.60692


[34m[1mwandb[0m: Agent Starting Run: akiovnjt with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0020794679296550754
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,87.07
regret,37788.76714


[34m[1mwandb[0m: Agent Starting Run: f1ovw714 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0050839159884247176
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▅▆▆▇▇██
regret,▁

0,1
mean_scores,145.37
regret,35450.29802


[34m[1mwandb[0m: Agent Starting Run: 722upfol with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0030984107136464907
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▇▇▇█
regret,▁

0,1
mean_scores,94.81
regret,37444.08961


[34m[1mwandb[0m: Agent Starting Run: 3v3q38mp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.005296169588419341
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▆▆▆▆▇▇▇▇█████
regret,▁

0,1
mean_scores,56.04
regret,39779.98252


[34m[1mwandb[0m: Agent Starting Run: 89e5vxkd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.006218524963276204
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▅▅▅▆▇▇█████
regret,▁

0,1
mean_scores,69.47
regret,39076.83346


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: il81u0j9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009582724883387832
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

75
100000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▂▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▃▄▄▄▅▅▅▅▆▆▆▆▇▇██
regret,▁

0,1
mean_scores,58.11
regret,40233.65105


[34m[1mwandb[0m: Agent Starting Run: tvoykrhf with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0061145072012611995
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇█████
regret,▁

0,1
mean_scores,77.95
regret,37423.08621


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g1kgdc2t with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.006696512034417471
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▆██▇▇▇▇▇███▇▇▇▆▆▆▅▅▅▄▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.97
regret,44452.17424


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kxjrjka4 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.009492182827210496
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
128
20


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▅▆██▇▇▇████▇▇▇▆▆▆▅▅▄▄▄▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.94
regret,44281.94797


[34m[1mwandb[0m: Agent Starting Run: 8b3zke0y with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.005365033443403923
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
1000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▅▅▆▇█
regret,▁

0,1
mean_scores,119.75
regret,38388.92125


[34m[1mwandb[0m: Agent Starting Run: tjkyr9mf with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0008639928126762852
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01128888888876342, max=1.0)…

100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▄▅▅▅▆▆▆▆▇██
regret,▁

0,1
mean_scores,164.18
regret,33100.26699


[34m[1mwandb[0m: Agent Starting Run: hi1ikgrp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00014791991399687023
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▆▆▇▇▇█
regret,▁

0,1
mean_scores,287.59
regret,29149.74624


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yth4f0e9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007983480926704872
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
1000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▅▇▇█
regret,▁

0,1
mean_scores,77.84
regret,39493.65972


[34m[1mwandb[0m: Agent Starting Run: qz05jci8 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0025394779128268103
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
100
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▆▆▇▇▇██
regret,▁

0,1
mean_scores,172.72
regret,32795.10315


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kauhk95m with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.001055025787359861
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▅▅▆▇▇▇███
regret,▁

0,1
mean_scores,94.98
regret,36766.43566


[34m[1mwandb[0m: Agent Starting Run: 7tsxvgnu with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0019536534674092615
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▅▅▆▆▆▆▇▆▇▇▇▇██████████
regret,▁

0,1
mean_scores,74.99
regret,36308.37444


[34m[1mwandb[0m: Agent Starting Run: r35gqgs3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0016516250313500789
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▆▆▆▆▇█████
regret,▁

0,1
mean_scores,119.01
regret,34990.83421


[34m[1mwandb[0m: Agent Starting Run: brh5mj86 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.004255821992998639
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂▃▂▃▄▄▄▄▅▅▅▅▆▆▆▇▇█
regret,▁

0,1
mean_scores,112.15
regret,38024.38665


[34m[1mwandb[0m: Agent Starting Run: u0igxeje with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.008680349469585019
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▇█
regret,▁

0,1
mean_scores,65.44
regret,39677.38962


[34m[1mwandb[0m: Agent Starting Run: r70gn3c6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.008918223516626725
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▄▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▅▄▄▄▄▃▄▄▄▅▅▆▆▆▆▆▇▇█
regret,▁

0,1
mean_scores,47.81
regret,41020.66509


[34m[1mwandb[0m: Agent Starting Run: nnnio92p with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0037889122187152497
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▃▃▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇██
regret,▁

0,1
mean_scores,59.38
regret,39836.91197


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jwcjjh1r with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.003963548792661359
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▃▃▅▅▇█▇▇▇▇▆▆▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁
regret,▁

0,1
mean_scores,12.09
regret,44661.17681


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hctkmc1q with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007440875758733937
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▄▃▃▄▄▃▃▃▃▃▃▃▃▃▃▄▄▆▇▇▇▇██
regret,▁

0,1
mean_scores,57.63
regret,40274.35268


[34m[1mwandb[0m: Agent Starting Run: pmihds8y with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.002615683089344257
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▃▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇██
regret,▁

0,1
mean_scores,100.46
regret,36499.70991


[34m[1mwandb[0m: Agent Starting Run: 0o5f0xk5 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.003541477854830792
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▆▆▆▆▆▇███
regret,▁

0,1
mean_scores,125.41
regret,35362.97475


[34m[1mwandb[0m: Agent Starting Run: uc8op6af with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007920438020963882
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇██
regret,▁

0,1
mean_scores,61.33
regret,39189.84334


[34m[1mwandb[0m: Agent Starting Run: 7j64s4i8 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007098280126336056
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▂▁▁▁▁▁▁▁▂▁▁▂▂▁▁▁▁▁▁▁▁▂▃▃▃▅▅▅▆▆▆▆▇▇███▇▇
regret,▁

0,1
mean_scores,62.67
regret,38454.46001


[34m[1mwandb[0m: Agent Starting Run: sl2yd4om with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00640952245341068
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▅▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,111.15
regret,37841.63106


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w09qm0b5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.001583902049364988
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▁▁▁▁▁▂▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇█
regret,▁

0,1
mean_scores,80.99
regret,37901.96778


[34m[1mwandb[0m: Agent Starting Run: l41b54bn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.006288843458512607
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▅▅▅▅▅▅▅▅▅▅▅▆▆▆▇▇███████
regret,▁

0,1
mean_scores,57.81
regret,39416.88693


[34m[1mwandb[0m: Agent Starting Run: xo3ch02u with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0032549964520660517
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▆▆███
regret,▁

0,1
mean_scores,112.72
regret,37719.40524


[34m[1mwandb[0m: Agent Starting Run: gx6kz6hl with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.008121615110489293
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▅▆▆▆▆▆▇▇▇▇██
regret,▁

0,1
mean_scores,104.45
regret,36664.75353


[34m[1mwandb[0m: Agent Starting Run: qzg1myro with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.005618647027881016
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▇▇▇▇▇██
regret,▁

0,1
mean_scores,101.9
regret,36083.80583


[34m[1mwandb[0m: Agent Starting Run: w55mw4pz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.001534629919099832
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▇▇██
regret,▁

0,1
mean_scores,95.66
regret,37290.29823


[34m[1mwandb[0m: Agent Starting Run: qq7b28og with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.003284795359350802
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▆▆▆▆▇▇▇███
regret,▁

0,1
mean_scores,99.06
regret,36341.05669


[34m[1mwandb[0m: Agent Starting Run: vamw7n0a with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.006292391743825648
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▃▄▃▄▄▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█████
regret,▁

0,1
mean_scores,44.56
regret,40325.77786


[34m[1mwandb[0m: Agent Starting Run: ekyxfk1v with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.003192795176303071
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

100
100
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇█
regret,▁

0,1
mean_scores,116.91
regret,37096.11258


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qm0eqjzf with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0014929179344533227
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
1000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▃▃▄▄▃▃▃▄▄▄▄▄▄▅▆▆▇▇█
regret,▁

0,1
mean_scores,114.64
regret,36527.88709


[34m[1mwandb[0m: Agent Starting Run: drf74elc with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.008253009613784015
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▄▃▄▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇███
regret,▁

0,1
mean_scores,47.9
regret,40450.56829


[34m[1mwandb[0m: Agent Starting Run: v9mgk4yj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.005179611876280867
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▆▆▇▇████
regret,▁

0,1
mean_scores,97.85
regret,37016.58685


[34m[1mwandb[0m: Agent Starting Run: 0x9fwowr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 2.43609431342868e-05
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄█▆▆▆▆▆▆▆▇▇▆▆▆▆▆▆▆▅▅▅▅▅▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁
regret,▁

0,1
mean_scores,11.65
regret,44438.92762


[34m[1mwandb[0m: Agent Starting Run: qswfxiri with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0048017543889781805
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇████
regret,▁

0,1
mean_scores,89.94
regret,37660.28892


[34m[1mwandb[0m: Agent Starting Run: p56jss2n with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00966827252048157
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
100000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▆▇▇█████▇▇▇▇▆▆▆
regret,▁

0,1
mean_scores,33.22
regret,41211.08448


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cr093pfc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0030618954687902464
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▅▅▆▆▆▆▆▆▆▇▇▇███
regret,▁

0,1
mean_scores,114.48
regret,34424.58426


[34m[1mwandb[0m: Agent Starting Run: bolnyrm1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0010968548783854678
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇█
regret,▁

0,1
mean_scores,120.61
regret,33781.12647


[34m[1mwandb[0m: Agent Starting Run: mkng31ev with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0010646016910626122
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▇▇▇███
regret,▁

0,1
mean_scores,185.18
regret,31801.33567


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jqffha00 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.004957193307173671
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
1000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▅▆▇▇█
regret,▁

0,1
mean_scores,73.12
regret,39112.04864


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9i7qfkvn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.006560515036593351
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▅▅▅▅▅▅▆▇▇███
regret,▁

0,1
mean_scores,93.41
regret,37612.12609


[34m[1mwandb[0m: Agent Starting Run: fowxi8c8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0023784154266129507
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇██▇▇▇
regret,▁

0,1
mean_scores,91.83
regret,35731.13831


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mrf20bp2 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.004820310532264384
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

20
1000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▅▆▆▆█
regret,▁

0,1
mean_scores,81.26
regret,39070.7099


[34m[1mwandb[0m: Agent Starting Run: 9fc5tmes with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.001854876288047977
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▆▅▆▇██
regret,▁

0,1
mean_scores,69.52
regret,39128.63144


[34m[1mwandb[0m: Agent Starting Run: hdy0zvpn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0006624859685558806
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇██
regret,▁

0,1
mean_scores,112.27
regret,33023.35809


[34m[1mwandb[0m: Agent Starting Run: gfzkwwie with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.006842409198330649
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▄▅▅▅▅▆▆▆▇▇▇▇██▇███
regret,▁

0,1
mean_scores,72.96
regret,37966.75264


[34m[1mwandb[0m: Agent Starting Run: t8233vce with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.004202623758158164
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▆▆▇▇▇▇█████
regret,▁

0,1
mean_scores,80.93
regret,37288.87708


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ofmj7j2l with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0031332656948000422
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

100
1000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂▃▃▃▄▄▄▅▅▆▇██
regret,▁

0,1
mean_scores,79.93
regret,39290.01009


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: iv5h4ryd with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00891628258008152
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇██
regret,▁

0,1
mean_scores,88.32
regret,37406.52176


[34m[1mwandb[0m: Agent Starting Run: 4gutybyc with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.003501625460743185
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▅▅▆▆▇█▇▇▇█
regret,▁

0,1
mean_scores,62.54
regret,39489.65873


[34m[1mwandb[0m: Agent Starting Run: jtj9ncgz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0007752994657057732
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▅▅▅▆▆▇▇██▇█▇▇▇████
regret,▁

0,1
mean_scores,163.39
regret,27376.72554


[34m[1mwandb[0m: Agent Starting Run: 1ed66i8f with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0020933062837398785
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇█▇██
regret,▁

0,1
mean_scores,89.55
regret,36538.88891


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: odlqzvz8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.001292671966209452
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▆▆▆▇▇██
regret,▁

0,1
mean_scores,145.83
regret,33757.22101


[34m[1mwandb[0m: Agent Starting Run: 4n7pivkj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.002362967591509321
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▇█▇▇▇▆▇▆▆▆▆▆▆▆▅▆▅▅▄▄▄▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
regret,▁

0,1
mean_scores,12.01
regret,44486.0675


[34m[1mwandb[0m: Agent Starting Run: 4qrujf01 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.009866210985075684
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100
128
50


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,█▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.03
regret,44184.20414


[34m[1mwandb[0m: Agent Starting Run: 9np2k1ty with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0035376908516391425
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▃▃▃▃▅▅▅▅▅▅▅▅▅▅▅▅▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇███
regret,▁

0,1
mean_scores,37.68
regret,40922.71636


[34m[1mwandb[0m: Agent Starting Run: h92nwuxv with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.006117960849745429
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▄▇████▇▇▇▇▇▆▆▆▆▆▅▅▄▄▄▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.24
regret,44338.46122


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fztyc6cu with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.006018425525428786
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
1000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▆▆▆▇▇▇▇▇███
regret,▁

0,1
mean_scores,95.83
regret,36941.44156


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fjd9fkcm with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.004028524398097918
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▅▅▄▄▅▅▆▆▇▇███
regret,▁

0,1
mean_scores,75.89
regret,39279.47743


[34m[1mwandb[0m: Agent Starting Run: emiyhwqu with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00017359775196670158
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇██
regret,▁

0,1
mean_scores,228.52
regret,29343.68803


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: s3spw48t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008351274692785581
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▇▇▇█
regret,▁

0,1
mean_scores,93.38
regret,37305.71487


[34m[1mwandb[0m: Agent Starting Run: 4jor92dy with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0004340305311347882
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

50
100000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▆▆▇▇█████
regret,▁

0,1
mean_scores,183.96
regret,29715.86451


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 33yfpol5 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.008787434561636491
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▇▇▇██
regret,▁

0,1
mean_scores,71.92
regret,39402.82157


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gz352tic with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0008833015992669701
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
128
20


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▃▇█▇▇▇▆▆▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,12.07
regret,44411.49895


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 23s8ff1j with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.002846894322813061
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇█████▇▇█
regret,▁

0,1
mean_scores,47.39
regret,40184.05858


[34m[1mwandb[0m: Agent Starting Run: kl2jnpug with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0035418945556648907
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆████
regret,▁

0,1
mean_scores,98.24
regret,36484.86045


[34m[1mwandb[0m: Agent Starting Run: ozstw2k0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0023886832065822287
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▃▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▇▇███
regret,▁

0,1
mean_scores,85.53
regret,38259.43385


[34m[1mwandb[0m: Agent Starting Run: 4bw6nddd with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0009094990089538792
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,199.9
regret,29892.67326


[34m[1mwandb[0m: Agent Starting Run: tn69cwn2 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009901148077295788
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▅▅▅▆▆▇▇▇██
regret,▁

0,1
mean_scores,117.12
regret,37100.48517


[34m[1mwandb[0m: Agent Starting Run: 1oz59a13 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0008187839803882974
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▄▄▄▄▄▅▅▆▆▆▇▇▇▇██▇▇▇██
regret,▁

0,1
mean_scores,158.3
regret,29411.71728


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k5aueb8y with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00293478529443532
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
1000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▇██
regret,▁

0,1
mean_scores,59.98
regret,40920.37211


[34m[1mwandb[0m: Agent Starting Run: 7053uopz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.008233332198605111
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▆▆▇▇▇▇███
regret,▁

0,1
mean_scores,80.75
regret,38541.77444


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fszszy4l with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.005113242885316164
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▇▇▇▇█
regret,▁

0,1
mean_scores,105.13
regret,37300.12726


[34m[1mwandb[0m: Agent Starting Run: 2s2u1xdw with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0024696711620961816
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
128
75


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▅▆▇██▇▇▇▇▇▇▇▆▆▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,11.84
regret,44054.69207


[34m[1mwandb[0m: Agent Starting Run: 2tg8a9d4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.008887722179427071
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▂▂▂▂▃▃▃▃▃▃▄▄▅▅▅▆▇██
regret,▁

0,1
mean_scores,86.64
regret,39275.5336


[34m[1mwandb[0m: Agent Starting Run: 3tn12n77 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00709332155868664
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▇▇▇▇▇██▇▇█▇█████▇
regret,▁

0,1
mean_scores,34.47
regret,41491.11699


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ie03rx0x with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0033408144611094463
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▇▇██
regret,▁

0,1
mean_scores,101.49
regret,36733.64645


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: oemoa7r6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.004054070224967912
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
100
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▇█
regret,▁

0,1
mean_scores,122.12
regret,36539.79515


[34m[1mwandb[0m: Agent Starting Run: cr6kzhky with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00673844563485382
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

20
100000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▆▇▇███
regret,▁

0,1
mean_scores,123.94
regret,37208.9861


[34m[1mwandb[0m: Agent Starting Run: 64z00d58 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.008766197304890271
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

20
100
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▃▄▄▄▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▇█
regret,▁

0,1
mean_scores,94.19
regret,38442.34627


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vlt6w9xp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00015210237012853992
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
128
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
regret,▁

0,1
mean_scores,218.52
regret,27957.72048


[34m[1mwandb[0m: Agent Starting Run: bz1x8lmw with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0018632408464163523
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▆▆▆▇▇▇██
regret,▁

0,1
mean_scores,54.13
regret,38914.62226


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: se1r8oup with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.008054392824892408
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
32
100
