In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import gym
import wandb
import numpy as np
from tqdm import tqdm

class Policy(nn.Module):
    def __init__(self, state_dim, hidden_dim, action_dim):
        super(Policy, self).__init__()
        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)
        self.fc2 = torch.nn.Linear(hidden_dim, action_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return F.softmax(self.fc2(x), dim=1)

class Value(nn.Module):
    def __init__(self, observation_space):
        super(Value, self).__init__()
        self.input_layer = nn.Linear(observation_space, 128)
        self.output_layer = nn.Linear(128, 1)

    def forward(self, x):
        x = self.input_layer(x)
        x = F.relu(x)
        state_value = self.output_layer(x)
        return state_value

class REINFORCE:
    def __init__(self, state_dim, hidden_dim, action_dim, learning_rate, gamma,
                 device, optimizer_type, v_lr):
        self.policy_net = Policy(state_dim, hidden_dim, action_dim).to(device)
        self.value_net = Value(state_dim).to(device)
        self.gamma = gamma
        self.device = device
        self.v_lr = v_lr

        if optimizer_type == 'adam':
            self.optimizer = torch.optim.Adam(self.policy_net.parameters(),
                                              lr=learning_rate)
            self.voptimizer = torch.optim.Adam(self.value_net.parameters(),
                                               lr=self.v_lr)
        elif optimizer_type == 'sgd':
            self.optimizer = torch.optim.SGD(self.policy_net.parameters(),
                                             lr=learning_rate)
            self.voptimizer = torch.optim.SGD(self.value_net.parameters(),
                                              lr=self.v_lr)
        else:
            raise ValueError("Unsupported optimizer type")

    def take_action(self, state):
        state = torch.tensor(state.reshape(1, -1), dtype=torch.float).to(self.device) 
        probs = self.policy_net(state)
        action_dist = torch.distributions.Categorical(probs) 
        action = action_dist.sample()
        return action.item()

    def update(self, transition_dict):
        reward_list = transition_dict['rewards']
        state_list = transition_dict['states']
        action_list = transition_dict['actions']

        self.optimizer.zero_grad()
        self.voptimizer.zero_grad()
        
        for i in range(len(reward_list)):  # Iterate over transitions
            reward = reward_list[i]
            state = torch.tensor(state_list[i].reshape(1, -1),
                                dtype=torch.float).to(self.device)
            action = torch.tensor(action_list[i]).view(-1, 1).to(self.device)
            
            # Calculate next state value
            next_state = torch.tensor(state_list[i+1].reshape(1, -1),
                                    dtype=torch.float).to(self.device) if i < len(reward_list) - 1 else None
            next_state_value = self.value_net(next_state) if next_state is not None else torch.tensor([[0]], dtype=torch.float).to(self.device)
            
            # Calculate target value for the value network (TD(0) update)
            target_value = reward + self.gamma * next_state_value
            
            # Get current state value
            state_value = self.value_net(state)

            # Value loss
            value_loss = F.mse_loss(state_value, target_value)

            # Accumulate gradients for value network
            value_loss.backward(retain_graph=True)

        # Perform optimization steps for both networks
        self.optimizer.step()
        self.voptimizer.step()

# agent params
gamma = 0.99
num_pbar = 10
num_episodes = 300
num_seeds = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env_name = "Acrobot-v1"

# Define the sweep configuration
sweep_config = {
    "method": "bayes",
    "metric": {"name": "regret", "goal": "minimize"},
    "parameters": {
        "learning_rate": {"min": 1e-5, "max": 1e-3},
        "v_lr": {"min": 1e-4, "max": 1e-2},
        "hidden_dim": {"values": [32,64, 128, 256]},
        "optimizer_type": {"values": ["adam", "sgd"]}
    },
    "project": "acrobot_with_baseline",
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 3,
        "max_iter": 100
    }
}
# Initialize the sweep
sweep_id = wandb.sweep(sweep_config)

def train(learning_rate, v_lr, hidden_dim, optimizer_type):
    env = gym.make(env_name)
    env.reset(seed=0)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n
    agent = REINFORCE(state_dim, hidden_dim, action_dim, learning_rate, gamma,
                      device, optimizer_type, v_lr)
    return_list = []
    for i_episode in range(num_episodes):
        episode_return = 0
        transition_dict = {
            "states": [],
            "actions": [],
            "next_states": [],
            "rewards": [],
            "dones": []
        }
        state, _ = env.reset()
        terminated, truncated = False, False
        while not terminated and not truncated:
            action = agent.take_action(state)
            next_state, reward, terminated, truncated, _ = env.step(action)
            transition_dict["states"].append(state)
            transition_dict["actions"].append(action)
            transition_dict["next_states"].append(next_state)
            transition_dict["rewards"].append(reward)
            transition_dict["dones"].append(terminated)
            state = next_state
            episode_return += reward
        return_list.append(episode_return)
        agent.update(transition_dict)

    avg = []
    for i in range(num_episodes):
        if i < 100:
            avg_return = np.mean(return_list[:i+1])
        else:
            avg_return = np.mean(return_list[i-100:i+1])
        wandb.log({"avg_scores": avg_return})
        avg.append(avg_return)

    regret = np.sum(avg)

    return -regret

def run_training():
    config_defaults = {
        "learning_rate": 1e-3,
        "v_lr": 0.005,
        "hidden_dim": 128,
        "optimizer_type": "adam"
    }
    config = wandb.init(config=config_defaults)
    learning_rate = config.config["learning_rate"]
    v_lr = config.config['v_lr']
    hidden_dim = config.config["hidden_dim"]
    optimizer_type = config.config["optimizer_type"]
    regret = train(learning_rate, v_lr, hidden_dim, optimizer_type)
    wandb.log({"regret": regret})

# Run the sweep
wandb.agent(sweep_id, function=run_training)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: jxrppr7j
Sweep URL: https://wandb.ai/rl_shobhith/acrobot_with_baseline_td0/sweeps/jxrppr7j


[34m[1mwandb[0m: Agent Starting Run: q8w92x6d with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007002305629469535
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.008147603237404374
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mshobhith-v[0m ([33mrl_shobhith[0m). Use [1m`wandb login --relogin`[0m to force relogin


  if not isinstance(terminated, (bool, np.bool8)):


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▇▁▆▆█▅▃▅▅▆▅▅▅▅▄▄▅▅▆▇▇▅▆▇▇▇███▇▆▇▇▇▇▇▆▆▆▆
regret,▁

0,1
avg_scores,-378.24752
regret,113383.97091


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bnr39763 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007120269350918952
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.008733320082496818
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁██████████
regret,▁

0,1
avg_scores,-499.35644
regret,149951.08911


[34m[1mwandb[0m: Agent Starting Run: 2rcdzy8t with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0006864281687371852
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.0024386535209900023
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█████████████▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,149941.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 23xqxp0m with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0006323788897782303
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.00499084309748483
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19359850839030454, max=1.…

0,1
avg_scores,▆▇▆▆▅▇█▆▇▅▅▅▅▅▄▃▃▃▁▁▂▂▂▂▂▁▂▂▃▄▃▅▇▇█▅▇██▇
regret,▁

0,1
avg_scores,-454.50495
regret,137869.57951


[34m[1mwandb[0m: Agent Starting Run: yqg1y1nq with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0003224857084934717
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.009650604813243204
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pwft6x6w with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0007461654248531522
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.0029313137658310098
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19372378437160168, max=1.…

0,1
avg_scores,▃▅▂▂▁▄▃▂▂▁▂▂▂▂▃▃▃▄▄▄▅▅▆▆▆▆▇▆▇▇█▇▇▆▆▇▆▆▆▄
regret,▁

0,1
avg_scores,-476.59406
regret,143013.79163


[34m[1mwandb[0m: Agent Starting Run: 37pf0jfj with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008932896044875784
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.008570566083803452
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▇▂▅▄▄▃▅▅▆▆▆▆▆▆▆▆▆▆▇█▆▇▇▇▇▇▇▆▆▅▅▅▄▄▄▅▅▄▄
regret,▁

0,1
avg_scores,-426.62376
regret,125918.6266


[34m[1mwandb[0m: Agent Starting Run: fwm9ece7 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006488842859180981
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.007436332614543275
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,█▂▃▅▅▄▃▁▁▂▂▂▁▂▂▁▂▂▂▃▄▅▅▅▅▆▆▆▆▅▅▄▄▄▃▄▄▄▄▃
regret,▁

0,1
avg_scores,-370.06931
regret,110417.71857


[34m[1mwandb[0m: Agent Starting Run: mymbqx0g with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006207838677259286
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.007809631522223578
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19904229848363927, max=1.…

0,1
avg_scores,█▁▃▃▂▂▁▂▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▆▅▆▆▆▆▅▅▅▄▄▂▂
regret,▁

0,1
avg_scores,-451.10891
regret,131859.80766


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xzid5gae with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001407959143827823
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.008632543896729463
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▂█▁▄▄▃▄▄▅▄▄▄▄▄▄▅▆▆▆▅▆▇▆▇▇▇▇▇▇▅▅▆▅▅▄▄▄▄▅▅
regret,▁

0,1
avg_scores,-364.12871
regret,109397.62966


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7fbnscqa with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005526895115906171
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0012512939932745268
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁█▆▅▅▆▅▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆
regret,▁

0,1
avg_scores,-458.29703
regret,140134.68882


[34m[1mwandb[0m: Agent Starting Run: amq3f06k with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00011953254412546264
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0013718007161361582
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19894703254626675, max=1.…

0,1
avg_scores,▆█▄▂▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▃▃▄▄▃▃▃▃
regret,▁

0,1
avg_scores,-425.9604
regret,127540.68107


[34m[1mwandb[0m: Agent Starting Run: e77w1r57 with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0007983805109329486
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.007782777667826105
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▁▁▆▅█▇▆▆▅▅▅▄▄▄▆▅▅▆▅▅▅▅▅▅▆▆▆▅▅▄▄▂▂▂▂▂▄▄
regret,▁

0,1
avg_scores,-496.50495
regret,148623.04855


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nus0ajq4 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001072859897259602
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.0039585174060560065
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.194357855361596, max=1.0)…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: dc75wrvz with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005058585152369206
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.0021799099090512863
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01128888888876342, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19438815276695245, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: j8sawl4u with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0003860930362570451
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.009956619344493168
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19381411252719927, max=1.…

0,1
avg_scores,▁▁██▆▅▅▅▆▇▇▆▇▆▇▆▆▆▆▅▅▄▄▃▃▂▂▂▂▂▂▄▄▅▅▆▅▅▅▅
regret,▁

0,1
avg_scores,-493.0099
regret,148178.89103


[34m[1mwandb[0m: Agent Starting Run: 82i1r6i4 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008639502555093318
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.006797794272424045
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19438815276695245, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 51ozravx with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0008991175425529412
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.004124063559696455
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19369369369369369, max=1.…

0,1
avg_scores,▂▃▁▃▂▂▃▄▄▄▄▄▅▅▄▄▄▅▆▆▆▆▆▇█▇███▇█▇▇▇▇▆▇▇▆▅
regret,▁

0,1
avg_scores,-485.58416
regret,145631.38098


[34m[1mwandb[0m: Agent Starting Run: bcvg96je with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0007995460849672135
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0004567390165696663
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▇█▆▆▅▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▅▅▄▅▅▅▆▅▅▆▆▆▆▆▆▆▆
regret,▁

0,1
avg_scores,-489.08911
regret,146967.37924


[34m[1mwandb[0m: Agent Starting Run: v29gpbcx with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005396690409485756
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.003679053995486746
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19378399378399377, max=1.…

0,1
avg_scores,▁█▇▅▄▄▄▄▄▄▅▄▅▄▄▄▅▅▅▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▄▄▃▄▃▃
regret,▁

0,1
avg_scores,-455.42574
regret,134365.11847


[34m[1mwandb[0m: Agent Starting Run: o5z1kz60 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0007275524804056399
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.008567801595157104
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1937538844002486, max=1.0…

0,1
avg_scores,▁▁▁▁▁▁█▇▆██▇▇▆▆▆▆▇▇▃▄▄▄▂▂▂▂▂▂▂▂▂▂▄▄▄▄▃▃▃
regret,▁

0,1
avg_scores,-498.20792
regret,149314.60334


[34m[1mwandb[0m: Agent Starting Run: 40dg4iyc with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0003183055625297829
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.009003732563859957
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19904229848363927, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 7rahhze2 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0008607614254708846
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.00876015633497032
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19453978159126364, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 2bie2z3n with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 9.276705843091504e-05
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.001117216443962356
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19365868821883742, max=1.…

0,1
avg_scores,▁▇▆█▇▇█▇█████████████▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆
regret,▁

0,1
avg_scores,-464.21782
regret,137456.2479


[34m[1mwandb[0m: Agent Starting Run: vhwaj73k with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00033815589041512616
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.009063236962740945
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19366361236216803, max=1.…

0,1
avg_scores,▁▄▇▇▆▇▇▇█▇▇▇▇▇▇▇▇██▇▇▇▇██████▇▇▇▇▇▇▇▇▇▇▇
regret,▁

0,1
avg_scores,-326.54455
regret,98327.72933


[34m[1mwandb[0m: Agent Starting Run: 77m9tsmo with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00033020790845325815
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.002081931456299344
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19904229848363927, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: b9hlnjaq with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0007498729394833997
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.004360008747492271
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 1l5124g7 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 4.698266422271527e-05
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.00800015873660566
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19907407407407407, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 8nyu0dw3 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.000751539755691872
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.005357312024684903
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1991058598115919, max=1.0…

0,1
avg_scores,▁▆▆▇▄▄▃▂▂▂▃▄▃▄▄▄▄▅▆▆▇███▆▇▇▇▇▆▅▆▅▅▅▅▆█▇█
regret,▁

0,1
avg_scores,-457.56436
regret,139839.08479


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1q4knu2t with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 4.617214441857386e-05
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0038264907580693136
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,█▃▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▂▃▃▃▃▄▄▄▃▃▃▃▃▃▃▃▃▃▂▂
regret,▁

0,1
avg_scores,-366.29703
regret,108556.84257


[34m[1mwandb[0m: Agent Starting Run: ewi2b8j7 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006296103455289107
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.00998123696587658
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19369369369369369, max=1.…

0,1
avg_scores,▁▁▁▁▂▂▃▃▃▂▂▂▄▄▄▅▅▅▄▄▄▄▄▄▄▄▂▃▂▄▅▅▅▅▇█████
regret,▁

0,1
avg_scores,-496.46535
regret,149542.23672


[34m[1mwandb[0m: Agent Starting Run: vp9uwpif with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00014616181421122948
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.002793026273804274
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.194357855361596, max=1.0)…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4apqyog5 with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0004816355151504588
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.007756391931924099
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19907407407407407, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 8t70ttnm with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00020919580975621552
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.0020586750863492214
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1989787777245891, max=1.0…

0,1
avg_scores,▁▁█▆▅▆▅▅▅▅▅▆▅▅▅▄▄▄▄▄▄▄▃▃▃▃▄▃▃▃▄▄▄▄▅▅▇▅▆▅
regret,▁

0,1
avg_scores,-497.07921
regret,149267.9951


[34m[1mwandb[0m: Agent Starting Run: ykwizxwk with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0007137064893214592
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.004076056619201766
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19438815276695245, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: yurhk03m with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0007446825340731723
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.008416266784862273
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19907407407407407, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: wnnws1kf with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0003745020038301387
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.007578173330376824
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1990105330354293, max=1.0…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: dmrbntjs with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005754843127009831
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0012634731211569098
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19366361236216803, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄▄███████████▅
regret,▁

0,1
avg_scores,-499.70297
regret,149956.75248


[34m[1mwandb[0m: Agent Starting Run: d3ar1fbw with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005262586266752514
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0035318360459026055
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1989787777245891, max=1.0…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 9k85ihkq with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0007884689787944072
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.007803267194509761
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19907407407407407, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁█████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,149938.86876


[34m[1mwandb[0m: Agent Starting Run: 9poo1qcx with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0007216210673666851
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.005092184788864998
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19441845961958215, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: nduzyk3i with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000732353030897374
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.006085010925009025
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19904229848363927, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gdnxnuqw with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000278718437145008
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.004379046118587397
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 8duh8w6n with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00033634278472444645
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.004475142679914155
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19432756739909615, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: m7pqetw1 with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0006370572800646154
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.00764756365007584
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19378399378399377, max=1.…

0,1
avg_scores,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▁▁▁▁▁▂▂▂▂▂▁▁▁▂
regret,▁

0,1
avg_scores,-493.85149
regret,147889.30538


[34m[1mwandb[0m: Agent Starting Run: gp3jyoab with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 8.582385349576916e-05
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.005529940174794339
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19411581569115816, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁█▇▇▇▇▇▇▇▇▇▇▇▇▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,149946.88545


[34m[1mwandb[0m: Agent Starting Run: vk280fxm with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0007625746234305957
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.004615586446963871
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19384424063422975, max=1.…

0,1
avg_scores,███▅▃▄▃▃▂▂▂▂▂▃▂▁▁▁▁▂▂▂▃▃▃▃▂▃▃▃▄▃▃▃▂▂▂▃▃▄
regret,▁

0,1
avg_scores,-310.87129
regret,94480.57495


[34m[1mwandb[0m: Agent Starting Run: n00ldr3g with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0007938449906321044
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0009134057762326588
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1990105330354293, max=1.0…

0,1
avg_scores,▃▁▁▁▁▁▂▂▂▂▃▃▃▄▅▆▆▆▆▆▆▇▇▆▆▆▆▄▄▅▄▅▅▅▅▆▇▇█▇
regret,▁

0,1
avg_scores,-461.18812
regret,142174.77425


[34m[1mwandb[0m: Agent Starting Run: kx7vfup0 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008614481942348435
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.007521082099028074
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1937538844002486, max=1.0…

0,1
avg_scores,▁▁▁▁▁▁▇▆▅▇▆▆▅▅▅████▅▅▅▄▅▅▅▅▅▂▃▄▅▅▆▆▆▅▅▅▅
regret,▁

0,1
avg_scores,-496.59406
regret,148993.02693


[34m[1mwandb[0m: Agent Starting Run: 31zcancz with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0008195396540721597
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.007018972088549681
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.6238119059529765, max=1.0…

0,1
avg_scores,▁▁▁▇▅▄▅▄▄▄▄▃▃▃▃▄▃▃▃▃▃▃▃▃▄▅▅▅▄▄▄▄▄▆▇██▇▆▇
regret,▁

0,1
avg_scores,-494.30693
regret,148996.66391


[34m[1mwandb[0m: Agent Starting Run: m9a2alve with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001170453540925848
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.004151848829433186
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1937538844002486, max=1.0…

0,1
avg_scores,▅█▄▂▁▁▁▁▂▂▂▁▂▂▂▁▂▃▃▃▄▃▃▃▄▄▃▃▃▂▂▂▂▁▂▃▃▃▃▃
regret,▁

0,1
avg_scores,-474.75248
regret,143019.9219


[34m[1mwandb[0m: Agent Starting Run: 4rc9qxhc with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0009334664975148778
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.008379338306475066
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19366361236216803, max=1.…

0,1
avg_scores,▄█▆▃▄▃▄▄▃▃▂▂▂▃▃▃▄▃▃▃▃▃▃▄▄▅▄▃▃▂▂▃▃▃▃▃▂▂▁▁
regret,▁

0,1
avg_scores,-479.54455
regret,140860.51161


[34m[1mwandb[0m: Agent Starting Run: g04xqnxw with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00041635626207467673
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0026053424201807934
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1989787777245891, max=1.0…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁█▇▇▇▇▇▇▇▇▇▇▇▇▁▁▁▁▁▁▁▁▁▁▁▁▁▁▆▆
regret,▁

0,1
avg_scores,-499.46535
regret,149930.34633


[34m[1mwandb[0m: Agent Starting Run: l88xwog0 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 8.786996152073315e-05
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0003579877920824529
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19413690940277561, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2ctanytu with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005381840212855779
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.008023642690594272
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19438815276695245, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 4or9h3b5 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 9.708297018947209e-05
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.005601847638746059
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19378399378399377, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄▄▄▄▄▄▄▄▄▄▄▄██▅▅▅▅▅▅▅▅▅▅
regret,▁

0,1
avg_scores,-498.55446
regret,149784.79208


[34m[1mwandb[0m: Agent Starting Run: um48p460 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0008057473121510967
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.008536562439395698
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▃▂▂▂▂▂▂▁▁▁▁▁▅▆▆▆▆▆▆▆█████▄▃▃▃▃▅▅▇▄▄▄▄▄
regret,▁

0,1
avg_scores,-497.0297
regret,149180.0443


[34m[1mwandb[0m: Agent Starting Run: 7u4x6y3m with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0008373942533662296
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.00487989331997694
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19907407407407407, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: ep5oh1jz with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0003157509194719833
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.009578671476994318
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 9hege9l5 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008428779128931453
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.0011812009958700102
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1990105330354293, max=1.0…

0,1
avg_scores,▁▁▁▁▁▁█▇▆▆▅▅▅▄▄▄▅▅▅▂▂▂▂▂▃▃▃▇▇▇▆▆▆▆▆▆▆▅▅▅
regret,▁

0,1
avg_scores,-498.42574
regret,149553.17311


[34m[1mwandb[0m: Agent Starting Run: 4vokl2mc with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0008267802289900728
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.008570091039979124
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19366361236216803, max=1.…

0,1
avg_scores,▁▅█▆▆▅▅▅▅▅▅▅▆▆▆▆▆▅▆▅▆▆▆▅▅▅▅▅▄▄▅▅▅▅▅▅▅▅▅▄
regret,▁

0,1
avg_scores,-420.76238
regret,123847.00194


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wt1qbu1o with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0007682094229006119
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.009157628424339552
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19444877592390458, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: ngbs93b2 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0008437499050469759
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.004691076315064133
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01128888888876342, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1990105330354293, max=1.0…

0,1
avg_scores,▁▁▄▆▄▆▇▇██▇▆▇▆▆▆▆▇▇▅▅▄▄▄▅▅▅▆▆▇▇▆▆██▇█▇█▇
regret,▁

0,1
avg_scores,-345.40594
regret,105774.88668


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5cxuj5ps with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 6.806545271131044e-05
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.003262981239857313
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19384424063422975, max=1.…

0,1
avg_scores,▁▃█▆▄▃▅▄▅▅▅▄▅▄▃▄▅▆▅▅▆▆▆▆▅▄▄▅▄▃▂▃▂▂▃▃▁▄▃▆
regret,▁

0,1
avg_scores,-406.57426
regret,124004.61223


[34m[1mwandb[0m: Agent Starting Run: fey256mu with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 1.5819729424472997e-05
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.006135865691368024
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1990105330354293, max=1.0…

0,1
avg_scores,█▄▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▂▂▁▁▁▂▂▂▂▂▂▂▂▁▁▁▂▂▁▁▁▁▁
regret,▁

0,1
avg_scores,-497.29703
regret,148105.51522


[34m[1mwandb[0m: Agent Starting Run: h23ikhl2 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009919988467486712
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.003368102688907324
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: fyo14qyy with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00028804256618742677
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.001853565504850176
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▃▂▂▁▁▂▃▂▃▃▃▄▄▄▅▆▇▇▇▆▇███▇▇▇▅▆▅▅▆▆▆▆▆▆▆▆▇
regret,▁

0,1
avg_scores,-332.53465
regret,103127.48293


[34m[1mwandb[0m: Agent Starting Run: m5ndfic4 with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.00014975512904585658
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0024931991190460636
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1989787777245891, max=1.0…

0,1
avg_scores,█▆▄▆▇▇▅▄▃▃▃▃▄▄▃▃▃▁▁▂▂▂▂▃▂▂▂▂▂▃▄▄▄▄▅▅▅▆▅▆
regret,▁

0,1
avg_scores,-439.13861
regret,134402.44177


[34m[1mwandb[0m: Agent Starting Run: t4mtwnlm with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000856312283084551
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0012110622577055172
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
regret,▁

0,1
avg_scores,-499.83168
regret,149999.32673


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tkqpadil with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0004507882596497935
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.008499462277182037
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19438815276695245, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 5nd06uxt with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0009025262917692004
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.001324836483333241
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19904229848363927, max=1.…

0,1
avg_scores,█▇▄▃▄▄▃▃▃▃▃▃▃▃▂▃▃▂▂▂▂▃▃▂▂▂▂▂▁▂▂▂▂▂▁▁▁▁▂▂
regret,▁

0,1
avg_scores,-457.85149
regret,135936.66442


[34m[1mwandb[0m: Agent Starting Run: 3wrj7lyb with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0009185713165353147
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.001926303339092321
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁█▆▅▄▃▃▃▃▂▂▂▂▂▁▁▁▁▂▂▃▃▃▃▃▄▅▅▆▆▆▇▆▅▆▇▇▇▇
regret,▁

0,1
avg_scores,-491.29703
regret,148727.70675


[34m[1mwandb[0m: Agent Starting Run: dqurl5bp with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00039143853466448904
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.003114251823217429
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19366361236216803, max=1.…

0,1
avg_scores,▂▁▂▃▅▅▅▅▅▆▆▆▆▆▇▇▇▆▆▇▇▇▆▆▆▆▇▆▆▇█▇▇▇▇████▇
regret,▁

0,1
avg_scores,-407.9604
regret,124900.92985


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b86zt7i7 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005711005690484794
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.0049547625153138364
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19369369369369369, max=1.…

0,1
avg_scores,▇▁█▆▅▃▅▅▆▅▅▅▄▄▅▄▄▅▅▅▅▃▃▃▂▂▃▃▃▃▃▄▅▅▄▅▅▅▆▅
regret,▁

0,1
avg_scores,-419.50495
regret,126903.86167


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k6o23gbu with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00024099231291722096
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.007963216386964204
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.194357855361596, max=1.0)…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: s3383ppo with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006159095363940249
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.0009723892660374294
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1990105330354293, max=1.0…

0,1
avg_scores,▁▁▁▁▁▅▄▄▇▇▆▆▅▅▅▅▅▅▄▄▄▄▁▁▁▂▂▂▃▄▄▄▅▆▇▇▇███
regret,▁

0,1
avg_scores,-492.34653
regret,148923.44139


[34m[1mwandb[0m: Agent Starting Run: 25vn4wjp with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0002412004125633464
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.006762550998167215
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19369369369369369, max=1.…

0,1
avg_scores,▄▁▆█▇▆▆▆▆▆▆▆▆▅▅▄▄▅▆▅▅▅▄▄▃▄▅▄▅▅▅▅▅▅▆▅▆▇▆▆
regret,▁

0,1
avg_scores,-410.33663
regret,124208.68213


[34m[1mwandb[0m: Agent Starting Run: qbx6wint with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00026483116908717204
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.005129398787171496
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19438815276695245, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 1g00ie2w with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.000819975346956006
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.00948496684360797
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19381411252719927, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▄▄▄▄▄▄▃▃▅▅▇▇█▇▆▆▆▆▆▆▆▄▅▃▃▄▃▄▄▄▄▄▄▄
regret,▁

0,1
avg_scores,-497.87129
regret,149316.48626


[34m[1mwandb[0m: Agent Starting Run: hgsnn48w with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0005968316316612836
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.004466680268455534
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19372378437160168, max=1.…

0,1
avg_scores,▁▃▂▂▂▁▄▃███▇█▇█▇▇▇▇▇▇▆▅▅▆▆▅▅▅▆▆▇▇▆▆▅▆▅▅▅
regret,▁

0,1
avg_scores,-495.08911
regret,148393.56422


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3ktvjbdj with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0006028065592810503
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.00625224508315932
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19444877592390458, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: ijuo4fgh with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0009722320633322308
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.0020917000475362186
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19888268156424582, max=1.…

0,1
avg_scores,▄█▄▄▃▃▄▆▇▆▇▇▇▆▇████▇▆▇▅▃▃▃▄▃▂▁▁▁▂▁▁▁▂▁▁▁
regret,▁

0,1
avg_scores,-314.83168
regret,91301.62543


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 71mpq46x with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0006088245399185074
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.005006834599379041
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁▁▃▃▂▃▄▄▄▄▅▄▄▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▇▆▇█▇▇▇▇▇▇
regret,▁

0,1
avg_scores,-492.48515
regret,148526.64218


[34m[1mwandb[0m: Agent Starting Run: x3t8yxd1 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00025174047503916417
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.003783436153888993
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01128888888876342, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19378399378399377, max=1.…

0,1
avg_scores,▄▃▅█▅▄▄▃▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▂▂▂▂▁▁▁▂▂▃▃▃▃▄▄
regret,▁

0,1
avg_scores,-280.9505
regret,86617.32844


[34m[1mwandb[0m: Agent Starting Run: 7l9qijzo with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.000729495939676903
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.00222899831138715
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19378399378399377, max=1.…

0,1
avg_scores,▁▇▅▄▅▅▆▅▆▅▅▅▅▅▆▆▇▇▇▆▆▆▆▆▇▇▆▆▆▆▆▇▆▇██▇▇▇▇
regret,▁

0,1
avg_scores,-466.69307
regret,142000.93461


[34m[1mwandb[0m: Agent Starting Run: 3u8s6xuo with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0007942845367823624
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.003219821363117849
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▃█▆▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-490.41584
regret,145719.55581


[34m[1mwandb[0m: Agent Starting Run: dwocs5g9 with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0004689292793303726
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.005311124842069651
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19372378437160168, max=1.…

0,1
avg_scores,▁▁▁▁▁▄▄▃▃▃▃▃▂▂▂▂██▇▇▇▇▇▇▇▇▇▇▇▁▁▇▇▇▇▇▇▇▇▇
regret,▁

0,1
avg_scores,-498.59406
regret,149719.0511


[34m[1mwandb[0m: Agent Starting Run: 4zq4ndqi with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.000494637542574013
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.007204755392223756
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19907407407407407, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 5shllb07 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00013610869316096465
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.002291922946589119
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▂█▅▅▄▄▂▃▃▂▁▂▃▃▂▃▂▂▂▃▃▆▆▆▆▅▅▆▆▆▅█▇▆▅▄▅▄▆▄
regret,▁

0,1
avg_scores,-476.56436
regret,143128.28158


[34m[1mwandb[0m: Agent Starting Run: zwmmqhxl with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00046333776424524746
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.006537695557784315
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19438815276695245, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 8ytirbws with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0007852523789166988
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	v_lr: 0.007761572603305624
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▁█▆▅▄▃▃▃▃▃▃▂▂▂▁▁▁▁▁▁▁▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,149370.74562


[34m[1mwandb[0m: Agent Starting Run: hs9ega80 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0004918318977319041
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.0035816550284165864
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19904229848363927, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Agent Starting Run: 948db8i6 with config:
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0005728554444161877
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.0064864815532692105
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
avg_scores,▁▃▄▄▅▆▆▅▆▇▇▇▇▇▇████████▇▆▆▆▇▆▆▅▅▅▆▆▆▆▆▆▆
regret,▁

0,1
avg_scores,-470.19802
regret,140405.54476


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7w2xyjur with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0004756093031086585
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.007465189325806406
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19444877592390458, max=1.…

0,1
avg_scores,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
regret,▁

0,1
avg_scores,-500.0
regret,150000.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rbcgypxm with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001318502158022645
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	v_lr: 0.004904228283436049
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
