In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import gym
import numpy as np
import random
from collections import namedtuple, deque
import wandb

GAMMA=0.99

class QNetwork1(nn.Module):
    def __init__(self, state_size, action_size, seed, fc1_units=128, fc2_units=64):
        super(QNetwork1, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fc1 = nn.Linear(state_size, fc1_units)
        self.fc2 = nn.Linear(fc1_units, fc2_units)
        self.fc_advantage = nn.Linear(fc2_units, action_size)
        self.fc_value = nn.Linear(fc2_units, 1)

    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        value = self.fc_value(x)
        advantage = self.fc_advantage(x)
        Q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
        return Q_values
    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class ReplayBuffer:
    def __init__(self, action_size, buffer_size, batch_size, seed):
        self.action_size = action_size
        self.memory = deque(maxlen=buffer_size)
        self.batch_size = batch_size
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(seed)

    def add(self, state, action, reward, next_state, done):
        e = self.experience(state, action, reward, next_state, done)
        self.memory.append(e)

    def sample(self):
        experiences = random.sample(self.memory, k=self.batch_size)
        states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
        actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).long().to(device)
        rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
        next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
        dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)
        return (states, actions, rewards, next_states, dones)

    def __len__(self):
        return len(self.memory)

class TutorialAgent1:
    def __init__(self, state_size, action_size, seed, lr, update_every, buffer_size, batch_size):
        self.state_size = state_size
        print(update_every)
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.qnetwork_local = QNetwork1(state_size, action_size, seed).to(device)
        self.qnetwork_target = QNetwork1(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=lr)
        self.memory = ReplayBuffer(action_size, buffer_size, batch_size, seed)
        self.t_step = 0
        self.update_every = update_every
        self.batch_size = batch_size

    def step(self, state, action, reward, next_state, done):
        self.memory.add(state, action, reward, next_state, done)
        if len(self.memory) >= self.batch_size:
            experiences = self.memory.sample()
            self.learn(experiences, GAMMA)
        self.t_step = (self.t_step + 1) % self.update_every
        if self.t_step == 0:
            self.qnetwork_target.load_state_dict(self.qnetwork_local.state_dict())

    def act(self, state, eps=0.):
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        self.qnetwork_local.eval()
        with torch.no_grad():
            action_values = self.qnetwork_local(state)
        self.qnetwork_local.train()
        if random.random() > eps:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))

    def learn(self, experiences, gamma):
        states, actions, rewards, next_states, dones = experiences
        Q_targets_next = self.qnetwork_target(next_states)
        next_state_values = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)
        Q_targets = rewards + (gamma * next_state_values * (1 - dones))
        Q_expected = self.qnetwork_local(states).gather(1, actions)
        loss = F.mse_loss(Q_expected, Q_targets)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
import matplotlib.pyplot as plt
def dqn(agent, env, n_episodes=10000, max_t=500, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    scores_window = deque(maxlen=100)
    all_scores = np.array([])
    moving_avg_scores = np.array([])
    eps = eps_start
    for i_episode in range(1, n_episodes+1):
        state,_ = env.reset()
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            next_state, reward, done, truncated,_ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done or truncated:
                break
        scores_window.append(score)
        all_scores = np.append(all_scores, score)
        eps = max(eps_end, eps_decay * eps)
        moving_avg_scores = np.append(moving_avg_scores, np.mean(scores_window))
        if i_episode == 250:
            break
    # plt.plot(np.arange(len(moving_avg_scores)), moving_avg_scores)
    # plt.ylabel('Moving Average Scores')
    # plt.xlabel('Episode #')
    # plt.title('Moving Average Scores over Episodes')
    # plt.show()
    return moving_avg_scores, True

def train_agent(lr, update_every, buffer_size, batch_size):
    env = gym.make('Acrobot-v1')
    print(update_every)
    print(buffer_size)
    print(batch_size)
    state_shape = env.observation_space.shape[0]
    action_shape = env.action_space.n
    agent = TutorialAgent1(state_size=state_shape, action_size=action_shape, seed=0, lr=lr,
                           update_every=update_every, buffer_size=buffer_size, batch_size=batch_size)
    all_scores_1, _ = dqn(agent, env)
    regret = -np.sum(all_scores_1)
    return regret,all_scores_1

def run_training():
    config_defaults = {
        "lr": 5e-4,
        "update_every": 50,
        "buffer_size": 1e5,
        "batch_size": 64
    }
    config = wandb.init(config=config_defaults, project="Acrobot_ddqn_mean")
    lr = config.config["lr"]
    update_every = config.config['update_every']
    batch_size = config.config["batch_size"]
    buffer_size = config.config["buffer_size"]
    regret,all_scores= train_agent(lr, update_every, buffer_size, batch_size)
    for i in all_scores:
        wandb.log({"mean_scores":i})
    wandb.log({"regret": regret})

sweep_config = {
    "method": "bayes",
    "metric": {"name": "regret", "goal": "minimize"},
    "parameters": {
        "lr": {"min": 1e-5, "max": 1e-2},
        "update_every": {"values": [20, 50, 75, 100]},
        "buffer_size": {"values": [ 1e2,1e3, 1e5]},
        "batch_size": {"values": [32, 64, 128]}
    },
    "project": "Acrobot_ddqn_mean",
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 3,
        "max_iter": 100
    }
}

sweep_id = wandb.sweep(sweep_config)
wandb.agent(sweep_id, function=run_training)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: i9ujsbuw
Sweep URL: https://wandb.ai/rl_shobhith/Acrobot_ddqn_mean/sweeps/i9ujsbuw


[34m[1mwandb[0m: Agent Starting Run: vi6koe58 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0028189111457887607
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mshobhith-v[0m ([33mrl_shobhith[0m). Use [1m`wandb login --relogin`[0m to force relogin


50
100
128
50


  if not isinstance(terminated, (bool, np.bool8)):


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▄▃▃▃▃▂▄▅▅▆▆▆▆▇▇▇▇▇▇███▆█▇▇▆▆▇▅▅▅▅▆▆
regret,▁

0,1
mean_scores,-490.53
regret,122437.67452


[34m[1mwandb[0m: Agent Starting Run: 5hgqxvem with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009213925723640798
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇█████████
regret,▁

0,1
mean_scores,-143.7
regret,74595.27575


[34m[1mwandb[0m: Agent Starting Run: kekyw6pv with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0016944582926936038
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▄▄▄▄▄▄▅▆▆▆▅▅▇▇██▇▇▇▆▆▆▇█████▆
regret,▁

0,1
mean_scores,-493.02
regret,123640.11825


[34m[1mwandb[0m: Agent Starting Run: pv7de6tf with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.009945687592281422
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
mean_scores,-500.0
regret,125000.0


[34m[1mwandb[0m: Agent Starting Run: 1xytlcu1 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00655876532826482
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇███
regret,▁

0,1
mean_scores,-196.71
regret,96600.13121


[34m[1mwandb[0m: Agent Starting Run: 7sc4x7hr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.004483833179064859
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇▇███████
regret,▁

0,1
mean_scores,-188.48
regret,94751.67591


[34m[1mwandb[0m: Agent Starting Run: jg60fwnz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.004173730924433378
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇▇▇▇▇███████
regret,▁

0,1
mean_scores,-139.57
regret,83551.11219


[34m[1mwandb[0m: Agent Starting Run: yi630xmp with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0003318367851917449
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇▇▇▇▇█████████
regret,▁

0,1
mean_scores,-146.1
regret,79398.69845


[34m[1mwandb[0m: Agent Starting Run: gfxy6u5g with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.003389793954061795
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇█████████
regret,▁

0,1
mean_scores,-153.71
regret,76968.3779


[34m[1mwandb[0m: Agent Starting Run: 5lr402yk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0061376405367062955
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▅▅▆▆▆▇▇▇▇▇▇███████
regret,▁

0,1
mean_scores,-214.09
regret,91322.2466


[34m[1mwandb[0m: Agent Starting Run: ltws1dg3 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.007982360255409125
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅█
regret,▁

0,1
mean_scores,-491.55
regret,124894.75


[34m[1mwandb[0m: Agent Starting Run: lazkfisn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.006990756927103932
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▆▆▅▅▅▅▅▆▇▇▇██▇█▇▇▇▇
regret,▁

0,1
mean_scores,-488.35
regret,123216.92101


[34m[1mwandb[0m: Agent Starting Run: g8xm626q with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.001874508892749604
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

50
100
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇███████
regret,▁

0,1
mean_scores,-182.43
regret,90441.38746


[34m[1mwandb[0m: Agent Starting Run: edz7oesr with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0030857495022028622
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▃▃▃▂▃▃▃▄▄▅▄▅▇▇▇▇▇▇▇▇▇▇█▇▇▇▆▇▅▅▅▅▆▆
regret,▁

0,1
mean_scores,-491.8
regret,123326.77725


[34m[1mwandb[0m: Agent Starting Run: j4hxd7r8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.003404885964008001
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██████
regret,▁

0,1
mean_scores,-129.57
regret,74398.42769


[34m[1mwandb[0m: Agent Starting Run: nzzxw1g1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.002536372433314217
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇█████████
regret,▁

0,1
mean_scores,-129.56
regret,72791.00153


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3f0acjuz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00876309735043097
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
100000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇████████
regret,▁

0,1
mean_scores,-140.42
regret,74567.10126


[34m[1mwandb[0m: Agent Starting Run: e4rcyucy with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.002528163608939952
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇██████
regret,▁

0,1
mean_scores,-130.27
regret,77071.79651


[34m[1mwandb[0m: Agent Starting Run: 0zchzh42 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.002908152709373024
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

20
1000
32
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇▇▇████████
regret,▁

0,1
mean_scores,-146.26
regret,85440.40277


[34m[1mwandb[0m: Agent Starting Run: tvvxv1q6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.006965966966823368
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████████
regret,▁

0,1
mean_scores,-141.46
regret,74768.63005


[34m[1mwandb[0m: Agent Starting Run: c5d0ecnv with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007084366298357717
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇█████████
regret,▁

0,1
mean_scores,-128.48
regret,71737.31053


[34m[1mwandb[0m: Agent Starting Run: kvsf643m with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.004338641535146351
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▃▄▄▄▄▅▅▆▆▇▇███
regret,▁

0,1
mean_scores,-315.43
regret,113905.21


[34m[1mwandb[0m: Agent Starting Run: rcwi2qs5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.005218344610691144
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇███████████
regret,▁

0,1
mean_scores,-154.32
regret,73936.22523


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1j7zbnpn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007290264735914558
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇██████████
regret,▁

0,1
mean_scores,-180.8
regret,80497.27505


[34m[1mwandb[0m: Agent Starting Run: l0cgn654 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.002148018017201482
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

100
100000
32
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇▇████████
regret,▁

0,1
mean_scores,-139.83
regret,75537.64342


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sr040wcg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007474459610234559
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

100
100000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇█████
regret,▁

0,1
mean_scores,-140.69
regret,98389.82968


[34m[1mwandb[0m: Agent Starting Run: dl9o4n8h with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0022080847970562343
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

20
100000
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▂▂▂▃▃▃▃▃▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇█████████
regret,▁

0,1
mean_scores,-127.98
regret,74962.35036


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zq8cm87o with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.007355837620433925
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

100
100
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▄▄▅▅▆▆▇███▇▇▇▇
regret,▁

0,1
mean_scores,-370.54
regret,112512.94912


[34m[1mwandb[0m: Agent Starting Run: jfhpv1ab with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.003576562772063777
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇█████████
regret,▁

0,1
mean_scores,-127.45
regret,72654.63306


[34m[1mwandb[0m: Agent Starting Run: 7y2garas with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.007159797517929318
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇████████
regret,▁

0,1
mean_scores,-136.0
regret,73795.36706


[34m[1mwandb[0m: Agent Starting Run: u5zko85b with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0005454814642580215
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100
64
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇██████
regret,▁

0,1
mean_scores,-151.41
regret,79026.01519


[34m[1mwandb[0m: Agent Starting Run: ziru33fc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.004823320797416782
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇▇▇▇██████
regret,▁

0,1
mean_scores,-196.72
regret,91958.17654


[34m[1mwandb[0m: Agent Starting Run: vk9y8vcx with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.003802541418292805
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

75
100000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▄▅▅▅▆▆▇▇▇▇▇███████
regret,▁

0,1
mean_scores,-154.93
regret,90159.79322


[34m[1mwandb[0m: Agent Starting Run: ue8h1266 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.003865588527510987
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

50
100000
64
50


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23135931267259896, max=1.…

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇█████████
regret,▁

0,1
mean_scores,-135.03
regret,73666.93332


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d3iqyq0w with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.00974283199013514
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
100000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇▇███████
regret,▁

0,1
mean_scores,-144.36
regret,90019.76489


[34m[1mwandb[0m: Agent Starting Run: hm2i9jqw with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.009400022700866636
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▄▅▅▆▆▇▇██
regret,▁

0,1
mean_scores,-190.93
regret,110047.17


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c4is9cd9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0005973748633163494
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
100000
32
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇████████
regret,▁

0,1
mean_scores,-124.22
regret,72005.48982


[34m[1mwandb[0m: Agent Starting Run: 9jdwj6i6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0072481585646986015
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▄▄▅▅▆▆▆▇▇███████
regret,▁

0,1
mean_scores,-152.55
regret,95018.8228


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 539ovgph with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.007556394995580481
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
1000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇████████
regret,▁

0,1
mean_scores,-161.95
regret,78055.39682


[34m[1mwandb[0m: Agent Starting Run: 53iab7hm with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.00019802534864585245
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

75
1000
64
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇████████
regret,▁

0,1
mean_scores,-128.9
regret,71669.81768


[34m[1mwandb[0m: Agent Starting Run: 7375cl12 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.008945805520328112
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
32
50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇████
regret,▁

0,1
mean_scores,-211.22
regret,105335.95295


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8ln9t88i with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.0038559968503223897
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100
1000
128
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇▇█████████
regret,▁

0,1
mean_scores,-142.85
regret,72985.40341


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5os213ho with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0007849064462111682
[34m[1mwandb[0m: 	update_every: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

100
100000
64
100


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇▇████████
regret,▁

0,1
mean_scores,-127.9
regret,74758.09559


[34m[1mwandb[0m: Agent Starting Run: adg3o8v3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	buffer_size: 1000
[34m[1mwandb[0m: 	lr: 0.002680687004135181
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
1000
64
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇█████████
regret,▁

0,1
mean_scores,-149.91
regret,76970.72313


[34m[1mwandb[0m: Agent Starting Run: ig7klcpt with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100
[34m[1mwandb[0m: 	lr: 0.0004350150225981163
[34m[1mwandb[0m: 	update_every: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


20
100
128
20


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,█▂▂▁▁▃▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▃▃▃▃▃▃▃▄▄▄▄▃▃
regret,▁

0,1
mean_scores,-492.23
regret,123152.83227


[34m[1mwandb[0m: Agent Starting Run: evjfuijp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.008762546727525834
[34m[1mwandb[0m: 	update_every: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


75
100000
128
75


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
mean_scores,▁▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇████████
regret,▁

0,1
mean_scores,-143.3
regret,78451.20747


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sar5pp7e with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	buffer_size: 100000
[34m[1mwandb[0m: 	lr: 0.0049343198011501145
[34m[1mwandb[0m: 	update_every: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


50
100000
128
50
