## Dice Game - Parameter Optimisation Using WandB
### This notebook is for reference of parameter tuning only

In [1]:
%pip install wandb

Note: you may need to restart the kernel to use updated packages.


In [2]:
from dice_game import DiceGame
import numpy as np

In [3]:
from abc import ABC, abstractmethod

class DiceGameAgent(ABC):
    def __init__(self, game):
        self.game = game
    
    @abstractmethod
    def play(self, state):
        pass

def play_game_with_agent(agent, game, verbose=False):
    state = game.reset()
    
    if(verbose): print(f"Testing agent: \n\t{type(agent).__name__}")
    if(verbose): print(f"Starting dice: \n\t{state}\n")
    
    game_over = False
    actions = 0
    while not game_over:
        action = agent.play(state)
        actions += 1
        
        if(verbose): print(f"Action {actions}: \t{action}")
        _, state, game_over = game.roll(action)
        if(verbose and not game_over): print(f"Dice: \t\t{state}")

    if(verbose): print(f"\nFinal dice: {state}, score: {game.score}")
        
    return game.score

In [4]:

class MyAgent(DiceGameAgent):
    def __init__(self, game, theta=0.2, gamma=1):
        super().__init__(game)
        self.gamma = gamma
        self.theta = theta
        self.thSquared = theta**2
        self.values = self.valueIteration()
    
    
    #MDP implementation. Calculate action value (expected)
    def calcActionValue(self, action, state):
        states, end, reward, prob = self.nextStates[state][action]
        if end:
            return self.game.final_scores[state]
        expected = sum([self.valueAction[s] * p for p, s in zip(prob, states)])
        return reward + self.gamma * expected

    #Get next states from game object
    def getNextStates(self,key,state):
        return self.game.get_next_states(key,state)
    
    #Calculate the value of a state
    def stateValue(self, state, prob, dict,end,reward):
        if end:
            return reward * prob
        return (reward + self.gamma * dict[state][0]) * prob
        

    #One step look ahead for all actions
    def nextActionsList(self,dictionary,state):
        ActionDict = {a:0 for a in self.game.actions}
        for actionKey in ActionDict.keys():
            nextStates, end, reward, prob = self.getNextStates(actionKey,state)
            #Loop through all the next states
            for nextState, prob in zip(nextStates, prob):
                ActionDict[actionKey] += self.stateValue(nextState, prob, dictionary, end, reward)
        return ActionDict

    def valueIteration(self):
        valuesDictionary = {s: [0, None] for s in self.game.states}
        while True:
            delta = 0
            #Loop through all states
            for state in self.game.states:
                actionList = self.nextActionsList(valuesDictionary,state)
                #Get best action
                bestActionValue = max(actionList.values())
                #Change to either previous delta or max difference
                delta = max(delta, abs(valuesDictionary[state][0] - bestActionValue))
                delSquared = delta ** 2
                #Update value
                valuesDictionary[state] = [bestActionValue, max(actionList, key=actionList.get)]
            if delSquared < self.thSquared:
                return valuesDictionary

        
    def play(self, state):
        return self.values[state][1]

In [5]:
# Import and log in to wandb
import wandb
!wandb login # You will be prompted to paste an API key here

wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\Beast/.netrc
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmichalis[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
# Set config dict - parameters to iterate over, search method, and objective
sweep_config = {
  "method": "grid",
  "metric": {"name": "Avg Score", "goal": "maximize"},
  "parameters": {
        "gamma": {
                "values": [0.955, 0.96, 0.965, 0.97, 0.975, 0.98, 0.985, 0.99, 0.995, 1]},
      "theta": {
            "values": [0.1, 0.01, 0.001, 0.05, 0.005]
        }
    }
}

In [7]:
import time

def train():
    """
    The core train function that the wandb program will run iteratively for each parameter in the sweep config
    """
    wandb.init()
    config = wandb.config
    np.random.seed(10)
    n = 10000

    game = DiceGame()
    total_score = 0
    total_time = 0
    start_time = time.process_time()
    test_agent = MyAgent(game, config.theta, config.gamma)
    total_time += time.process_time() - start_time
    for i in range(n):
        start_time = time.process_time()
        score = play_game_with_agent(test_agent, game, verbose=False)
        total_time += time.process_time() - start_time
        total_score += score
    wandb.log({"Theta": config.theta, "Gamma": config.gamma, "Avg Score": total_score/n, "Avg Time": total_time/n})
    print(f"Average score: {total_score/n}")

In [8]:
# A sweep is the name of the iterative parameter selection process
sweep_id = wandb.sweep(sweep_config, project='DiceGameNew')

# Run the sweep
wandb.agent(sweep_id, function=train)



Create sweep with ID: dw19vts6
Sweep URL: https://wandb.ai/michalis/DiceGameNew/sweeps/dw19vts6


[34m[1mwandb[0m: Agent Starting Run: tcn24amj with config:
[34m[1mwandb[0m: 	gamma: 0.955
[34m[1mwandb[0m: 	theta: 0.1


Average score: 13.0183


Exception in thread ChkStopThr:
Traceback (most recent call last):
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2032.0_x64__qbz5n2kfra8p0\lib\threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2032.0_x64__qbz5n2kfra8p0\lib\threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\Beast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\wandb\sdk\wandb_run.py", line 190, in check_status
    status_response = self._interface.communicate_stop_status()
  File "C:\Users\Beast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\wandb\sdk\interface\interface.py", line 128, in communicate_stop_status
    resp = self._communicate_stop_status(status)
  File "C:\Users\Beast\AppData\Local\Packages\

0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0183
Avg Time,0.00013
Gamma,0.955
Theta,0.1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a5w54ai7 with config:
[34m[1mwandb[0m: 	gamma: 0.955
[34m[1mwandb[0m: 	theta: 0.01


Average score: 13.0183


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0183
Avg Time,0.00013
Gamma,0.955
Theta,0.01


[34m[1mwandb[0m: Agent Starting Run: w33xj8yp with config:
[34m[1mwandb[0m: 	gamma: 0.955
[34m[1mwandb[0m: 	theta: 0.001


Average score: 13.0183


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0183
Avg Time,0.00015
Gamma,0.955
Theta,0.001


[34m[1mwandb[0m: Agent Starting Run: c05ha1y1 with config:
[34m[1mwandb[0m: 	gamma: 0.955
[34m[1mwandb[0m: 	theta: 0.05


Average score: 13.0183


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0183
Avg Time,0.00013
Gamma,0.955
Theta,0.05


[34m[1mwandb[0m: Agent Starting Run: l71ua0fn with config:
[34m[1mwandb[0m: 	gamma: 0.955
[34m[1mwandb[0m: 	theta: 0.005


Average score: 13.0183


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0183
Avg Time,0.00013
Gamma,0.955
Theta,0.005


[34m[1mwandb[0m: Agent Starting Run: l3ge8dnl with config:
[34m[1mwandb[0m: 	gamma: 0.96
[34m[1mwandb[0m: 	theta: 0.1


Average score: 13.0183


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0183
Avg Time,0.00011
Gamma,0.96
Theta,0.1


[34m[1mwandb[0m: Agent Starting Run: wl6wza46 with config:
[34m[1mwandb[0m: 	gamma: 0.96
[34m[1mwandb[0m: 	theta: 0.01


Average score: 13.0183


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0183
Avg Time,0.00013
Gamma,0.96
Theta,0.01


[34m[1mwandb[0m: Agent Starting Run: kr23ietz with config:
[34m[1mwandb[0m: 	gamma: 0.96
[34m[1mwandb[0m: 	theta: 0.001


Average score: 13.0183


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0183
Avg Time,0.00015
Gamma,0.96
Theta,0.001


[34m[1mwandb[0m: Agent Starting Run: 56uqrd7l with config:
[34m[1mwandb[0m: 	gamma: 0.96
[34m[1mwandb[0m: 	theta: 0.05


Average score: 13.0183


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0183
Avg Time,0.00012
Gamma,0.96
Theta,0.05


[34m[1mwandb[0m: Agent Starting Run: jvhczysr with config:
[34m[1mwandb[0m: 	gamma: 0.96
[34m[1mwandb[0m: 	theta: 0.005


Average score: 13.0183


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0183
Avg Time,0.00013
Gamma,0.96
Theta,0.005


[34m[1mwandb[0m: Agent Starting Run: 8r8tbvub with config:
[34m[1mwandb[0m: 	gamma: 0.965
[34m[1mwandb[0m: 	theta: 0.1


Average score: 13.0701


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0701
Avg Time,0.00012
Gamma,0.965
Theta,0.1


[34m[1mwandb[0m: Agent Starting Run: 8pdo23y8 with config:
[34m[1mwandb[0m: 	gamma: 0.965
[34m[1mwandb[0m: 	theta: 0.01


Average score: 13.0701


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0701
Avg Time,0.00013
Gamma,0.965
Theta,0.01


[34m[1mwandb[0m: Agent Starting Run: 7fruhk5r with config:
[34m[1mwandb[0m: 	gamma: 0.965
[34m[1mwandb[0m: 	theta: 0.001


Average score: 13.0701


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0701
Avg Time,0.00015
Gamma,0.965
Theta,0.001


[34m[1mwandb[0m: Agent Starting Run: g96gd79b with config:
[34m[1mwandb[0m: 	gamma: 0.965
[34m[1mwandb[0m: 	theta: 0.05


Average score: 13.0701


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0701
Avg Time,0.00012
Gamma,0.965
Theta,0.05


[34m[1mwandb[0m: Agent Starting Run: 6m5y1a1z with config:
[34m[1mwandb[0m: 	gamma: 0.965
[34m[1mwandb[0m: 	theta: 0.005


Average score: 13.0701


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.0701
Avg Time,0.00014
Gamma,0.965
Theta,0.005


[34m[1mwandb[0m: Agent Starting Run: tps69yzg with config:
[34m[1mwandb[0m: 	gamma: 0.97
[34m[1mwandb[0m: 	theta: 0.1


Average score: 13.1981


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.1981
Avg Time,0.00013
Gamma,0.97
Theta,0.1


[34m[1mwandb[0m: Agent Starting Run: zoqho2i6 with config:
[34m[1mwandb[0m: 	gamma: 0.97
[34m[1mwandb[0m: 	theta: 0.01


Average score: 13.1981


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.1981
Avg Time,0.00013
Gamma,0.97
Theta,0.01


[34m[1mwandb[0m: Agent Starting Run: k4tiumlf with config:
[34m[1mwandb[0m: 	gamma: 0.97
[34m[1mwandb[0m: 	theta: 0.001


Average score: 13.1981


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.1981
Avg Time,0.00015
Gamma,0.97
Theta,0.001


[34m[1mwandb[0m: Agent Starting Run: zre8uh28 with config:
[34m[1mwandb[0m: 	gamma: 0.97
[34m[1mwandb[0m: 	theta: 0.05


Average score: 13.1981


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.1981
Avg Time,0.00012
Gamma,0.97
Theta,0.05


[34m[1mwandb[0m: Agent Starting Run: b4qdmmhq with config:
[34m[1mwandb[0m: 	gamma: 0.97
[34m[1mwandb[0m: 	theta: 0.005


Average score: 13.1981


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.1981
Avg Time,0.00013
Gamma,0.97
Theta,0.005


[34m[1mwandb[0m: Agent Starting Run: 7v6kn5ye with config:
[34m[1mwandb[0m: 	gamma: 0.975
[34m[1mwandb[0m: 	theta: 0.1


Average score: 13.3003


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3003
Avg Time,0.00013
Gamma,0.975
Theta,0.1


[34m[1mwandb[0m: Agent Starting Run: nzf5w2ql with config:
[34m[1mwandb[0m: 	gamma: 0.975
[34m[1mwandb[0m: 	theta: 0.01


Average score: 13.3082


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3082
Avg Time,0.00015
Gamma,0.975
Theta,0.01


[34m[1mwandb[0m: Agent Starting Run: 4zodgkvx with config:
[34m[1mwandb[0m: 	gamma: 0.975
[34m[1mwandb[0m: 	theta: 0.001


Average score: 13.3082


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3082
Avg Time,0.00016
Gamma,0.975
Theta,0.001


[34m[1mwandb[0m: Agent Starting Run: tzn7s4aa with config:
[34m[1mwandb[0m: 	gamma: 0.975
[34m[1mwandb[0m: 	theta: 0.05


Average score: 13.3003


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3003
Avg Time,0.00013
Gamma,0.975
Theta,0.05


[34m[1mwandb[0m: Agent Starting Run: wcr31vnh with config:
[34m[1mwandb[0m: 	gamma: 0.975
[34m[1mwandb[0m: 	theta: 0.005


Average score: 13.3082


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3082
Avg Time,0.00015
Gamma,0.975
Theta,0.005


[34m[1mwandb[0m: Agent Starting Run: 0pf6zv1z with config:
[34m[1mwandb[0m: 	gamma: 0.98
[34m[1mwandb[0m: 	theta: 0.1


Average score: 13.3349


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3349
Avg Time,0.00013
Gamma,0.98
Theta,0.1


[34m[1mwandb[0m: Agent Starting Run: ojhhqjhc with config:
[34m[1mwandb[0m: 	gamma: 0.98
[34m[1mwandb[0m: 	theta: 0.01


Average score: 13.3349


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3349
Avg Time,0.00015
Gamma,0.98
Theta,0.01


[34m[1mwandb[0m: Agent Starting Run: c2mas2dd with config:
[34m[1mwandb[0m: 	gamma: 0.98
[34m[1mwandb[0m: 	theta: 0.001


Average score: 13.3349


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3349
Avg Time,0.00017
Gamma,0.98
Theta,0.001


[34m[1mwandb[0m: Agent Starting Run: r9ypq8fv with config:
[34m[1mwandb[0m: 	gamma: 0.98
[34m[1mwandb[0m: 	theta: 0.05


Average score: 13.3349


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3349
Avg Time,0.00014
Gamma,0.98
Theta,0.05


[34m[1mwandb[0m: Agent Starting Run: 48l11wyd with config:
[34m[1mwandb[0m: 	gamma: 0.98
[34m[1mwandb[0m: 	theta: 0.005


Average score: 13.3349


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3349
Avg Time,0.00016
Gamma,0.98
Theta,0.005


[34m[1mwandb[0m: Agent Starting Run: 5tf2ia9c with config:
[34m[1mwandb[0m: 	gamma: 0.985
[34m[1mwandb[0m: 	theta: 0.1


Average score: 13.3372


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3372
Avg Time,0.00013
Gamma,0.985
Theta,0.1


[34m[1mwandb[0m: Agent Starting Run: 9lw1isa0 with config:
[34m[1mwandb[0m: 	gamma: 0.985
[34m[1mwandb[0m: 	theta: 0.01


Average score: 13.3372


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3372
Avg Time,0.00015
Gamma,0.985
Theta,0.01


[34m[1mwandb[0m: Agent Starting Run: vefxidzy with config:
[34m[1mwandb[0m: 	gamma: 0.985
[34m[1mwandb[0m: 	theta: 0.001


Average score: 13.3372


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3372
Avg Time,0.00017
Gamma,0.985
Theta,0.001


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: x9wyw78r with config:
[34m[1mwandb[0m: 	gamma: 0.985
[34m[1mwandb[0m: 	theta: 0.05


Average score: 13.3372


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3372
Avg Time,0.00015
Gamma,0.985
Theta,0.05


[34m[1mwandb[0m: Agent Starting Run: qiolt1em with config:
[34m[1mwandb[0m: 	gamma: 0.985
[34m[1mwandb[0m: 	theta: 0.005


Average score: 13.3372


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3372
Avg Time,0.00017
Gamma,0.985
Theta,0.005


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 248fsx1m with config:
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	theta: 0.1


Average score: 13.3372


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3372
Avg Time,0.00014
Gamma,0.99
Theta,0.1


[34m[1mwandb[0m: Agent Starting Run: kwjk0npn with config:
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	theta: 0.01


Average score: 13.3372


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3372
Avg Time,0.00015
Gamma,0.99
Theta,0.01


[34m[1mwandb[0m: Agent Starting Run: 9vjj6znx with config:
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	theta: 0.001


Average score: 13.3372


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3372
Avg Time,0.00017
Gamma,0.99
Theta,0.001


[34m[1mwandb[0m: Agent Starting Run: 18iez77m with config:
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	theta: 0.05


Average score: 13.3372


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3372
Avg Time,0.00014
Gamma,0.99
Theta,0.05


[34m[1mwandb[0m: Agent Starting Run: i9w8dsfi with config:
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	theta: 0.005


Average score: 13.3372


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3372
Avg Time,0.00017
Gamma,0.99
Theta,0.005


[34m[1mwandb[0m: Agent Starting Run: 6db69nvw with config:
[34m[1mwandb[0m: 	gamma: 0.995
[34m[1mwandb[0m: 	theta: 0.1


Average score: 13.3687


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3687
Avg Time,0.00014
Gamma,0.995
Theta,0.1


[34m[1mwandb[0m: Agent Starting Run: a3716ker with config:
[34m[1mwandb[0m: 	gamma: 0.995
[34m[1mwandb[0m: 	theta: 0.01


Average score: 13.3687


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3687
Avg Time,0.0002
Gamma,0.995
Theta,0.01


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 885ukd0l with config:
[34m[1mwandb[0m: 	gamma: 0.995
[34m[1mwandb[0m: 	theta: 0.001


Average score: 13.3687


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3687
Avg Time,0.00017
Gamma,0.995
Theta,0.001


[34m[1mwandb[0m: Agent Starting Run: pqfkxln1 with config:
[34m[1mwandb[0m: 	gamma: 0.995
[34m[1mwandb[0m: 	theta: 0.05


Average score: 13.3687


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3687
Avg Time,0.00015
Gamma,0.995
Theta,0.05


[34m[1mwandb[0m: Agent Starting Run: 6uw87zgw with config:
[34m[1mwandb[0m: 	gamma: 0.995
[34m[1mwandb[0m: 	theta: 0.005


Average score: 13.3687


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3687
Avg Time,0.00017
Gamma,0.995
Theta,0.005


[34m[1mwandb[0m: Agent Starting Run: m9zqi009 with config:
[34m[1mwandb[0m: 	gamma: 1
[34m[1mwandb[0m: 	theta: 0.1


Average score: 13.3687


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3687
Avg Time,0.00014
Gamma,1.0
Theta,0.1


[34m[1mwandb[0m: Agent Starting Run: b0xfhbfq with config:
[34m[1mwandb[0m: 	gamma: 1
[34m[1mwandb[0m: 	theta: 0.01


Average score: 13.3677


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3677
Avg Time,0.00016
Gamma,1.0
Theta,0.01


[34m[1mwandb[0m: Agent Starting Run: xevff8rl with config:
[34m[1mwandb[0m: 	gamma: 1
[34m[1mwandb[0m: 	theta: 0.001


Average score: 13.3677


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3677
Avg Time,0.00019
Gamma,1.0
Theta,0.001


[34m[1mwandb[0m: Agent Starting Run: dions0t4 with config:
[34m[1mwandb[0m: 	gamma: 1
[34m[1mwandb[0m: 	theta: 0.05


Average score: 13.3687


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3687
Avg Time,0.00015
Gamma,1.0
Theta,0.05


[34m[1mwandb[0m: Agent Starting Run: ad1ojp0w with config:
[34m[1mwandb[0m: 	gamma: 1
[34m[1mwandb[0m: 	theta: 0.005


Average score: 13.3677


0,1
Avg Score,▁
Avg Time,▁
Gamma,▁
Theta,▁

0,1
Avg Score,13.3677
Avg Time,0.00017
Gamma,1.0
Theta,0.005


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


Error in callback <function _WandbInit._pause_backend at 0x0000023B82CD2290> (for post_run_cell):


ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host