In [1]:
import torch
import tensorflow
import gym
import keras
import random

import numpy as np
from gym import Env

from gym.spaces import Discrete, Box

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# get training envs
import pandas as pd
all_puzzles = pd.read_csv("sudoku.csv")
all_puzzles_df = pd.DataFrame(all_puzzles)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Reshape
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

import time

cuda


In [2]:
class board_player_env(Env):
    def __init__(self, game_no = np.random.randint(100000)):
        super(board_player_env, self).__init__()
        # creating the action space
        # self.game_no = game_no
        random.seed(time.perf_counter())
        game_no = np.random.randint(100000)
        
        print("game being played now:", game_no)
        self.action_space_list = ['up', 'down', 'left', 'right', 1, 2, 3, 4, 5, 6, 7, 8, 9]
        self.action_space = Discrete(len(self.action_space_list))
        # number of valid actions
        self.n_actions = len(self.action_space_list)
        self.n_features = 2

        # observation space
        self.observation_space = Box(low=0, high=9, shape=(9, 9), dtype=int)

        # initializing environment
        self.puzzle = all_puzzles_df.iloc[game_no]['quizzes']
        self.puzzle_arr = np.array(list(self.puzzle), dtype=int).reshape((9, 9))

        # initializing perfect solution
        self.solution = all_puzzles_df.iloc[game_no]['solutions']
        self.solution_arr = np.array(list(self.solution), dtype=int).reshape((9, 9))

        # initializing the agent at the first index
        self.agent = [0, 0]

        # initializing rewards
        self.rewards = 0
        self.done = False

    def render(self):
        print("\nsolved now: \n")
        print(self.puzzle_arr)

    def reset(self):
        # set the agent at first location
        # self.state = [0, 0]
        
        # maybe it shoulf be there/maybe not
        # self.rewards = 0
        # self.done = False
        return [0, 0]

    def step(self, action):

        # current agent location
        state = self.agent
        # print("self.agent: ", self.agent)
        # print("state: ", state[0])
        # print("action: ", action, type(action))

        # rewards at each step
        # reward += 1 for reaching empty
        # reward += 3 for filling one spot correctly
        # reward += 10 for jackpot of full puzzle
        # punishment += -2 for filling wrong

        # action 0, 1, 2, 3
        # imply only some filling in rules of the sudoku game, so that the board doesn't change
        if (action == 0):
            # print("\nup")
            if(state[0] > 0):
                self.agent = [state[0] - 1, state[1]]
                if(self.puzzle_arr[state[0] - 1, state[1]] == 0):
                    print("\nreward for up")
                    self.rewards += 1
                else:
                    print("\npunishment for up")
                    self.rewards -= 1

        elif (action == 1):
            # print("\ndown")
            if(state[0] < 8):
                self.agent = [state[0] + 1, state[1]]
                if(self.puzzle_arr[state[0] + 1, state[1]] == 0):
                    print("\nreward for down")
                    self.rewards += 1
                else:
                    print("\npunishment for down")
                    self.rewards -= 1

        elif (action == 2):
            # print("\nleft")
            if(state[1] > 0):
                self.agent = [state[0], state[1] - 1]
                if(self.puzzle_arr[state[0], state[1] - 1] == 0):
                    print("\nreward for left")
                    self.rewards += 1
                else:
                    print("\npunishment for left")
                    self.rewards -= 1

        elif (action == 3):
            # print("\nright")
            if(state[1] < 8):
                self.agent = [state[0], state[1] + 1]
                if(self.puzzle_arr[state[0], state[1] + 1] == 0):
                    print("\nreward for right")
                    self.rewards += 1
                else:
                    print("\npunishment for right")
                    self.rewards -= 1

        # action 5 means fill 2
        elif (action > 3):
            if(self.puzzle_arr[state[0], state[1]] == 0):
                if(self.action_space_list[action] == self.solution_arr[state[0], state[1]]):
                    self.puzzle_arr[state[0], state[1]] = self.action_space_list[action]
                    print("\nreward for action:", self.action_space_list[action])
                    self.rewards += 3
                else:
                    print("\npunishment for action:", self.action_space_list[action])
                    self.rewards -= 2

        if(np.array_equal(self.solution_arr, self.puzzle_arr)):
            self.rewards += 10
            self.done = True
            self.render()
            # maybe/maybe not
            self.__init__()
            

        return self.agent, self.rewards, bool(self.done), {}

In [3]:
def build_model(states, actions):
    model = Sequential()
    # model.add(Reshape(target_shape=(actions,), input_shape=states))
    model.add(Dense(24, activation='relu', input_shape=(1, 2, )))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions))
    model.add(Flatten())
    return model

In [4]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-3)
    return dqn

In [5]:
env = board_player_env()
states = env.observation_space.shape
actions = env.action_space.n

game being played now: 77410


del model

In [6]:
model = build_model(states, actions)

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1, 24)             72        
_________________________________________________________________
dense_1 (Dense)              (None, 1, 24)             600       
_________________________________________________________________
dense_2 (Dense)              (None, 1, 13)             325       
_________________________________________________________________
flatten (Flatten)            (None, 13)                0         
Total params: 997
Trainable params: 997
Non-trainable params: 0
_________________________________________________________________


In [8]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(board_player_env(), nb_steps=50000, visualize=False, verbose=1)

game being played now: 75072
Training for 50000 steps ...
Interval 1 (0 steps performed)





punishment for action: 7
    1/10000 [..............................] - ETA: 1:07:34 - reward: -2.0000
punishment for action: 5

punishment for action: 2

reward for action: 3

punishment for down

punishment for up




   12/10000 [..............................] - ETA: 22:42 - reward: -4.4167  
punishment for down
   18/10000 [..............................] - ETA: 15:15 - reward: -4.6667
reward for down

punishment for action: 1

punishment for action: 4

reward for right

punishment for action: 9
   24/10000 [..............................] - ETA: 11:38 - reward: -5.3750
punishment for down

reward for down
   29/10000 [..............................] - ETA: 9:52 - reward: -6.3103 
punishment for action: 9

reward for action: 2

reward for down
   36/10000 [..............................] - ETA: 8:09 - reward: -6.8889
punishment for action: 2

punishment for action: 7

punishment for action: 7

punishment for action: 4

punishment for action: 9

punishment for action: 2
   42/10000 [..............................] - ETA: 7:10 - reward: -8.0476
punishment for action: 8

punishment for action: 2

punishment for action: 4

punishment for action: 5

punishment for action: 4
   47/10000 [..............


punishment for action: 4
  854/10000 [=>............................] - ETA: 1:28 - reward: -89.7892
punishment for action: 4

punishment for action: 7

punishment for action: 4

punishment for action: 4

punishment for action: 9

punishment for action: 4

punishment for action: 7
  861/10000 [=>............................] - ETA: 1:28 - reward: -90.6690
punishment for action: 7

punishment for action: 4

punishment for action: 4

punishment for action: 4

punishment for action: 7

punishment for down
  939/10000 [=>............................] - ETA: 1:25 - reward: -100.9702
punishment for down
  971/10000 [=>............................] - ETA: 1:24 - reward: -104.7549
punishment for down
  987/10000 [=>............................] - ETA: 1:24 - reward: -106.5684
punishment for down
 1019/10000 [==>...........................] - ETA: 1:23 - reward: -110.0658
punishment for down

punishment for down
 1152/10000 [==>...........................] - ETA: 1:19 - reward: -122.7552
punis


punishment for up
 1702/10000 [====>.........................] - ETA: 1:10 - reward: -222.8954
reward for right

reward for down
 1709/10000 [====>.........................] - ETA: 1:10 - reward: -224.0445
reward for down

punishment for down

punishment for right

punishment for right

reward for down

punishment for right
 1715/10000 [====>.........................] - ETA: 1:10 - reward: -225.0198
reward for down

reward for right

reward for right

reward for right

punishment for right
 1865/10000 [====>.........................] - ETA: 1:08 - reward: -247.2177
reward for left

reward for action: 8

reward for left

reward for left

reward for left

punishment for action: 8

reward for action: 7
 1872/10000 [====>.........................] - ETA: 1:08 - reward: -248.1474
punishment for left

reward for left

punishment for action: 8
 1877/10000 [====>.........................] - ETA: 1:08 - reward: -248.8023
punishment for left
 1884/10000 [====>.........................] - ETA: 1


reward for right
punishment for left
punishment for left
punishment for down
punishment for up

punishment for up
reward for up

reward for right

punishment for action: 6

punishment for action: 7

punishment for action: 7
reward for down

punishment for action: 8

punishment for action: 4

punishment for action: 2

reward for up

punishment for action: 9
reward for action: 8

reward for down

punishment for action: 8

punishment for action: 5

punishment for action: 9

punishment for action: 8
punishment for action: 3

punishment for down

reward for right
punishment for down
reward for left

punishment for action: 6
punishment for action: 1

punishment for action: 6

punishment for action: 9

punishment for action: 2

punishment for left
punishment for up
punishment for down

reward for right

punishment for action: 7
reward for action: 4

punishment for left

punishment for left
punishment for left
punishment for left

punishment for up

punishment for up
punishment for up

reward

punishment for action: 1

punishment for action: 4

reward for action: 2

punishment for down

punishment for up
punishment for down
punishment for down

punishment for left
punishment for up
punishment for right

punishment for down
reward for down

punishment for action: 3

punishment for up
punishment for left

punishment for left

punishment for up

punishment for up
punishment for up
punishment for down
punishment for left
punishment for left

punishment for down

punishment for right
punishment for right
punishment for right

punishment for down
punishment for right
reward for down

punishment for action: 6

punishment for down
reward for down

punishment for up

reward for up

punishment for action: 9
punishment for up

punishment for up
punishment for down
reward for down

punishment for up
reward for down

punishment for action: 3

reward for action: 7
punishment for up
punishment for up

punishment for left

punishment for right

punishment for down
punishment for left

punis

reward for left

punishment for right
reward for down

punishment for action: 7

punishment for action: 9
punishment for down

punishment for down
reward for down

punishment for action: 2
punishment for action: 5

punishment for action: 2

punishment for right

punishment for right

punishment for down
punishment for up

punishment for down
punishment for up

punishment for right

punishment for down
punishment for left

punishment for right

punishment for left
punishment for right

punishment for down
punishment for up
punishment for down
reward for left

punishment for action: 3

punishment for action: 7

punishment for action: 4

punishment for action: 5

punishment for action: 3
punishment for action: 6

punishment for up

reward for down

punishment for action: 8
reward for down

reward for up

punishment for action: 3

punishment for action: 9

punishment for action: 3

punishment for action: 7
punishment for action: 9

punishment for action: 9

punishment for action: 3

punish

punishment for right

punishment for left

reward for left
punishment for action: 1

punishment for action: 3

reward for left

punishment for action: 7
punishment for action: 8

punishment for action: 7

punishment for down
reward for right

punishment for left

reward for up

punishment for action: 7

punishment for action: 6
punishment for left
punishment for up

punishment for right
punishment for up
reward for left

punishment for left

punishment for up

punishment for down
punishment for up

punishment for down

reward for right

punishment for action: 1
punishment for action: 2

punishment for left

punishment for down
punishment for up

reward for right

punishment for action: 5

punishment for action: 4
punishment for up

punishment for left
punishment for right
punishment for up
punishment for right

reward for up

punishment for action: 5

punishment for action: 7
punishment for right

punishment for up
punishment for right

reward for down

punishment for action: 1

punish

punishment for up
punishment for left
punishment for right
punishment for down

punishment for left

punishment for down
punishment for up

punishment for right
punishment for up

punishment for left
punishment for left

punishment for up

punishment for down

reward for down

punishment for action: 8

reward for action: 9
punishment for right

punishment for right

punishment for up

punishment for right
punishment for up
punishment for down

punishment for up

punishment for up
punishment for down
punishment for down

punishment for down

punishment for left
punishment for right
punishment for down

punishment for down
punishment for left

punishment for up

punishment for down
punishment for right
punishment for up

punishment for left

punishment for right

punishment for left
punishment for right

punishment for down
punishment for up

punishment for left
punishment for up

punishment for down
punishment for left

punishment for down
reward for left

punishment for down
reward for

punishment for right
punishment for left

punishment for right

punishment for down
punishment for left
punishment for up

punishment for right
punishment for down

punishment for up
punishment for up

reward for right

punishment for left
punishment for down
punishment for left

punishment for right

punishment for up

punishment for down
punishment for right

punishment for down
reward for right

punishment for action: 1
reward for action: 2

punishment for up
punishment for right

reward for down
punishment for action: 4

punishment for action: 1

punishment for up
punishment for up

punishment for down
punishment for left

punishment for right

punishment for up

punishment for right
punishment for left

punishment for right

punishment for up
punishment for up

reward for left

punishment for right

punishment for down
punishment for left

reward for up

punishment for action: 3
punishment for right

reward for left

punishment for action: 8
punishment for down

punishment for dow

punishment for down

punishment for down
reward for left

punishment for action: 4

punishment for action: 6

punishment for action: 2

punishment for up
reward for down

punishment for action: 7

reward for action: 8
punishment for down

punishment for right

punishment for right

punishment for left
punishment for up
punishment for right

punishment for up

punishment for right

punishment for right
punishment for down

punishment for down
punishment for down

punishment for up
punishment for up

punishment for right
punishment for up
punishment for up
punishment for down

punishment for down
punishment for down
punishment for down
punishment for left
punishment for left

punishment for down
punishment for up
punishment for down
reward for left

punishment for right

reward for left

punishment for right
punishment for right

reward for down
punishment for action: 4

punishment for action: 9

reward for action: 5
punishment for up

punishment for down
reward for right

punishment for


punishment for right
punishment for down

punishment for left
punishment for up

punishment for down

punishment for up
punishment for up

punishment for right
punishment for right

punishment for left

punishment for right
punishment for up

punishment for left

punishment for right
punishment for left
punishment for down
punishment for up

punishment for down
punishment for down

punishment for left
punishment for up

punishment for down

punishment for left
punishment for up
punishment for up
punishment for down
punishment for down
punishment for up
punishment for right

punishment for up
punishment for down

punishment for left
punishment for up

punishment for right

punishment for left
punishment for right
punishment for left

punishment for right

punishment for down
punishment for up

punishment for left
punishment for right
punishment for down
punishment for left
punishment for up
punishment for down
punishment for down
punishment for up
punishment for up

punishment for righ

punishment for left
punishment for down

punishment for up

punishment for up
punishment for down

punishment for left

punishment for down
punishment for down

punishment for left
punishment for left
punishment for left

punishment for down
punishment for left

punishment for down
punishment for up

punishment for down

punishment for right

punishment for right
punishment for right

punishment for up
punishment for left

punishment for left

punishment for down
punishment for right

punishment for up

punishment for up
punishment for left

punishment for up
punishment for left

punishment for right
punishment for left

punishment for down

punishment for right
punishment for left

punishment for up
punishment for up
punishment for right

punishment for down

punishment for left
punishment for right

punishment for down

punishment for up
punishment for down

punishment for up
punishment for down

punishment for up

punishment for up
punishment for up

punishment for up
punishment for

punishment for right
punishment for left

punishment for up
punishment for down
punishment for up

punishment for down
punishment for down
punishment for down

punishment for down
punishment for up

punishment for right

punishment for right
punishment for left

punishment for right

punishment for up
punishment for up
punishment for left

punishment for up

punishment for down

punishment for left
punishment for right
punishment for left
punishment for up

punishment for right
punishment for down

punishment for up
punishment for left
punishment for down
punishment for right

punishment for right
punishment for down
punishment for right
punishment for right

punishment for left

punishment for left

punishment for right
punishment for right

punishment for down
punishment for up
punishment for up

punishment for right

reward for down

reward for action: 2
punishment for up

punishment for right
punishment for right
punishment for down

punishment for up
punishment for right
punishmen

punishment for down

punishment for left
punishment for right

punishment for down
punishment for left

punishment for right

punishment for left
punishment for right

punishment for up
punishment for up

punishment for down
punishment for left

punishment for down
punishment for left

punishment for up
punishment for right
punishment for up

punishment for left
punishment for down
punishment for down
punishment for up

punishment for right

punishment for up
punishment for left
punishment for right
punishment for left
punishment for up
punishment for down

punishment for up
punishment for up

punishment for right

punishment for up

punishment for up
punishment for left

punishment for right
punishment for left
punishment for down

punishment for down
punishment for down

punishment for left
punishment for right
punishment for left
punishment for down

reward for down
punishment for action: 4

punishment for action: 6

punishment for up
punishment for right
punishment for down
reward 

punishment for left
punishment for right
punishment for down

punishment for up

punishment for right
punishment for down

punishment for up
punishment for down

punishment for right
punishment for down
punishment for up
punishment for down

punishment for right
punishment for left

punishment for down
punishment for right

punishment for up
punishment for up
punishment for down

punishment for down
punishment for up

punishment for down

punishment for down
punishment for left

punishment for right
punishment for right
punishment for left
punishment for left

punishment for left
punishment for up

punishment for right
punishment for down

punishment for up
punishment for down

punishment for right
punishment for left

punishment for left

punishment for down
punishment for down
punishment for left

punishment for right
punishment for up
punishment for left
punishment for up

punishment for right
punishment for right

reward for down

punishment for action: 1
punishment for action: 9



punishment for up

punishment for left
punishment for right

punishment for up
punishment for right
punishment for right

punishment for left
punishment for left
punishment for right

punishment for up

punishment for left

punishment for up
punishment for right

punishment for right
punishment for down
punishment for up
punishment for up

punishment for down

punishment for left
punishment for down

punishment for left

punishment for up
punishment for right

punishment for left

punishment for up

punishment for left
punishment for left

punishment for down
punishment for right

punishment for left

punishment for up

punishment for up
punishment for down

punishment for up

punishment for down
punishment for down

punishment for down
punishment for left

punishment for down
punishment for right
punishment for left
punishment for right

punishment for right

punishment for right
punishment for up

punishment for right
punishment for up
punishment for up
punishment for left

punishmen


punishment for left
punishment for down

punishment for up
punishment for left

punishment for down
punishment for down
punishment for up

punishment for down
punishment for right
punishment for left
punishment for up
punishment for right

punishment for down
punishment for right
punishment for left

punishment for left

punishment for right
punishment for up

punishment for left
punishment for up
punishment for right
punishment for right

punishment for down

punishment for left

punishment for right
punishment for down

punishment for left
punishment for up

punishment for right

punishment for left

punishment for up

punishment for down
punishment for left
punishment for down
punishment for up

punishment for down
punishment for right
punishment for up
punishment for right
punishment for down
punishment for right
punishment for right

punishment for right
punishment for right
punishment for up

punishment for down
punishment for up

punishment for up

punishment for up
punishment 

punishment for right
punishment for right
punishment for left

punishment for left
punishment for right

punishment for left
punishment for right
punishment for up
punishment for up

punishment for right

punishment for up
punishment for right

punishment for right
punishment for left

punishment for down
punishment for left

punishment for right

punishment for up
punishment for right
punishment for down

punishment for up

punishment for up
punishment for up
punishment for right

punishment for down
punishment for left

punishment for up

punishment for down
punishment for down

punishment for left
punishment for up
punishment for right

punishment for down
punishment for down
punishment for up

punishment for right

punishment for up
punishment for down

punishment for up

punishment for left
punishment for up

punishment for left

punishment for right
punishment for down
punishment for right
punishment for up
punishment for left

punishment for right
punishment for up

punishment f

  266/10000 [..............................] - ETA: 1:24 - reward: -2698.9662
punishment for right
  270/10000 [..............................] - ETA: 1:24 - reward: -2699.4519
punishment for down

punishment for left
  275/10000 [..............................] - ETA: 1:25 - reward: -2700.0618
punishment for down

punishment for down

punishment for down
  279/10000 [..............................] - ETA: 1:26 - reward: -2700.5699
punishment for left
  282/10000 [..............................] - ETA: 1:27 - reward: -2700.9681
punishment for up
  287/10000 [..............................] - ETA: 1:27 - reward: -2701.6237
punishment for right

punishment for right
  293/10000 [..............................] - ETA: 1:27 - reward: -2702.4232
punishment for down
  305/10000 [..............................] - ETA: 1:26 - reward: -2703.9770
punishment for right
  311/10000 [..............................] - ETA: 1:27 - reward: -2704.7170
punishment for right
  318/10000 [..................

  740/10000 [=>............................] - ETA: 1:22 - reward: -2760.8311
punishment for up

punishment for down
  746/10000 [=>............................] - ETA: 1:22 - reward: -2761.6810
punishment for down

punishment for right
  753/10000 [=>............................] - ETA: 1:22 - reward: -2762.6853
punishment for right

punishment for up

punishment for left
  767/10000 [=>............................] - ETA: 1:21 - reward: -2764.6845
punishment for right

punishment for up

punishment for right
  773/10000 [=>............................] - ETA: 1:21 - reward: -2765.5369
punishment for right

punishment for left
  779/10000 [=>............................] - ETA: 1:21 - reward: -2766.3954
punishment for down

punishment for left

punishment for down
  785/10000 [=>............................] - ETA: 1:21 - reward: -2767.2637
punishment for down

punishment for right
  791/10000 [=>............................] - ETA: 1:21 - reward: -2768.1403
punishment for up

punishm

 1244/10000 [==>...........................] - ETA: 1:18 - reward: -2830.8031
punishment for left

punishment for down
 1250/10000 [==>...........................] - ETA: 1:18 - reward: -2831.6216
punishment for right

punishment for left
 1256/10000 [==>...........................] - ETA: 1:18 - reward: -2832.4419
punishment for up
 1261/10000 [==>...........................] - ETA: 1:18 - reward: -2833.1253
punishment for left
 1268/10000 [==>...........................] - ETA: 1:17 - reward: -2834.0749
punishment for right

punishment for up

punishment for down
 1274/10000 [==>...........................] - ETA: 1:17 - reward: -2834.8948
punishment for left
 1281/10000 [==>...........................] - ETA: 1:17 - reward: -2835.8478
punishment for left
 1296/10000 [==>...........................] - ETA: 1:17 - reward: -2837.8742
punishment for left

punishment for up

punishment for right

punishment for right
 1303/10000 [==>...........................] - ETA: 1:17 - reward: -283


punishment for right

punishment for up

punishment for down
 1744/10000 [====>.........................] - ETA: 1:11 - reward: -2899.2385
punishment for down

punishment for right

punishment for left
 1751/10000 [====>.........................] - ETA: 1:11 - reward: -2900.2176
punishment for up

punishment for right
 1758/10000 [====>.........................] - ETA: 1:11 - reward: -2901.2014
punishment for left
 1772/10000 [====>.........................] - ETA: 1:11 - reward: -2903.1569
punishment for down

punishment for right
 1787/10000 [====>.........................] - ETA: 1:11 - reward: -2905.2350
punishment for left

punishment for right
 1794/10000 [====>.........................] - ETA: 1:11 - reward: -2906.1990
punishment for left

punishment for right
 1801/10000 [====>.........................] - ETA: 1:11 - reward: -2907.1621
punishment for left
 1808/10000 [====>.........................] - ETA: 1:11 - reward: -2908.1256
punishment for up

punishment for down

punis


punishment for down
 2270/10000 [=====>........................] - ETA: 1:06 - reward: -2971.4000
punishment for left

punishment for up
 2277/10000 [=====>........................] - ETA: 1:06 - reward: -2972.3434
punishment for down

punishment for right

punishment for right
 2284/10000 [=====>........................] - ETA: 1:06 - reward: -2973.2890
punishment for up

punishment for up
 2291/10000 [=====>........................] - ETA: 1:06 - reward: -2974.2375
punishment for right

punishment for down

punishment for down
 2297/10000 [=====>........................] - ETA: 1:06 - reward: -2975.0522
punishment for up
 2304/10000 [=====>........................] - ETA: 1:06 - reward: -2976.0004
punishment for down

punishment for up

punishment for left

punishment for down
 2311/10000 [=====>........................] - ETA: 1:06 - reward: -2976.9511
punishment for left
 2318/10000 [=====>........................] - ETA: 1:06 - reward: -2977.9034
punishment for down

punishment f


punishment for action: 9
punishment for action: 7

punishment for action: 8

punishment for action: 9

punishment for down
reward for down

punishment for action: 7

reward for left

punishment for action: 8

punishment for action: 4
punishment for action: 5

punishment for action: 6

punishment for down

punishment for right

punishment for left

punishment for right

punishment for left
punishment for right

reward for right

punishment for up

punishment for right

reward for right
punishment for action: 2

reward for action: 3

punishment for up
reward for up

reward for action: 4

punishment for down
punishment for down
reward for down

punishment for action: 3

punishment for action: 8

punishment for down
reward for up

punishment for action: 9

punishment for action: 9

reward for action: 6
punishment for left

reward for left

punishment for action: 9
punishment for action: 7

punishment for action: 4

punishment for left

reward for down

reward for down

punishment for acti

punishment for up

punishment for up
reward for left

reward for action: 4

punishment for right
punishment for down

punishment for left
punishment for up

punishment for down
punishment for right

punishment for left

punishment for right
punishment for down

reward for right

punishment for action: 5

reward for down

punishment for down
punishment for right

punishment for up
punishment for down
punishment for up
punishment for down
punishment for up

punishment for down

punishment for up
reward for left

punishment for action: 1

reward for up
punishment for right

reward for up
punishment for action: 2

reward for action: 8
punishment for down
punishment for up

punishment for down
punishment for down
punishment for down

punishment for down
punishment for left
punishment for down
punishment for left
punishment for up
punishment for left

punishment for up
punishment for down

punishment for down
punishment for left

punishment for right
punishment for up

punishment for left

p

punishment for down

punishment for down

reward for right

punishment for action: 2
punishment for up
punishment for left

punishment for up
reward for right

punishment for action: 3

punishment for action: 5

punishment for action: 2

punishment for down
reward for down

punishment for up
punishment for left

punishment for up
punishment for down
punishment for up

punishment for down

punishment for down
reward for down

punishment for action: 8

punishment for action: 7

punishment for action: 5

punishment for action: 7

punishment for action: 7

punishment for down
reward for up

punishment for right

punishment for right
punishment for left
reward for up

punishment for right
punishment for down

punishment for up
punishment for up

punishment for up

punishment for right
punishment for left
reward for left

reward for action: 8

punishment for right
punishment for right

punishment for down

punishment for up
punishment for left
punishment for right

punishment for down

punis

punishment for down

punishment for left
punishment for up

punishment for right
punishment for down

punishment for down
punishment for right

punishment for down

punishment for right
reward for right

punishment for action: 3

reward for action: 8

punishment for up
reward for right

punishment for action: 9

punishment for action: 5

punishment for left
punishment for down
reward for down

punishment for action: 5

punishment for action: 4

punishment for action: 5
punishment for action: 1

punishment for action: 1

punishment for action: 5

punishment for action: 5

punishment for action: 4

punishment for down
punishment for down

punishment for right
punishment for left
reward for left

punishment for action: 8

punishment for action: 1

punishment for action: 8

punishment for action: 9

punishment for down
punishment for left

punishment for left

punishment for right
punishment for right
punishment for left

punishment for up
punishment for left

punishment for right
reward f

punishment for up

punishment for up

punishment for right
punishment for left

punishment for right

punishment for up
punishment for left
punishment for left

punishment for down

punishment for up
punishment for up
punishment for up

punishment for right
punishment for down

punishment for down

punishment for up
punishment for left
punishment for left
punishment for up

punishment for right
punishment for down

punishment for up
punishment for down

punishment for up

punishment for right
punishment for down

punishment for left
punishment for up

punishment for down
punishment for left

punishment for up
punishment for right
punishment for down

punishment for down
punishment for right
punishment for left
punishment for right
punishment for right

punishment for left

punishment for up

punishment for right
punishment for right

punishment for right

punishment for up
punishment for down

punishment for up
punishment for down

punishment for right

punishment for left
punishment f


reward for left

punishment for up
punishment for right

reward for up

punishment for action: 6

punishment for action: 1
punishment for action: 5

punishment for action: 2

punishment for action: 1

punishment for action: 1

reward for left

reward for action: 6
reward for right

punishment for action: 3

punishment for action: 2

punishment for action: 1

punishment for action: 1
punishment for action: 3

punishment for action: 1

punishment for right

punishment for up

reward for up
punishment for action: 8

reward for action: 2

punishment for right

reward for right
punishment for left

punishment for left
punishment for up

punishment for up
punishment for up
punishment for left
punishment for left

punishment for left
punishment for down
punishment for left

punishment for up

punishment for right
punishment for right
punishment for down

punishment for left
punishment for down

punishment for down

punishment for right
punishment for down

punishment for left
punishment for 

punishment for down
punishment for right

punishment for down

punishment for left
punishment for right

punishment for down
punishment for up
punishment for down
punishment for right
punishment for right

punishment for up

reward for right

punishment for action: 4
punishment for action: 5

reward for action: 7

punishment for down
punishment for right
punishment for up

punishment for up
punishment for down

punishment for right
punishment for right

reward for up

punishment for action: 8
punishment for action: 9

punishment for action: 8

punishment for left

punishment for down
punishment for right

punishment for down
punishment for right
punishment for down

punishment for left
punishment for up
punishment for up

punishment for right
punishment for left
punishment for left

punishment for up

reward for right

punishment for action: 3

punishment for action: 4
punishment for action: 2

punishment for action: 7

punishment for action: 1

punishment for left
punishment for down


punishment for right

punishment for right
punishment for up
punishment for down

punishment for up
punishment for up

punishment for left
punishment for up

punishment for down
punishment for down

punishment for right
punishment for left

punishment for left
punishment for left
punishment for down

punishment for right

punishment for right
punishment for left

punishment for left
reward for left

reward for action: 2
punishment for up

punishment for right
punishment for up

punishment for right

punishment for up

punishment for left
punishment for down
punishment for left

punishment for up

punishment for down
punishment for left

punishment for right
punishment for up

punishment for down
punishment for left
punishment for up

punishment for up

punishment for left

punishment for right
punishment for left

punishment for right
punishment for down

punishment for down
punishment for right
punishment for right

punishment for left
punishment for right

punishment for left

punish

punishment for down

punishment for down

punishment for down
punishment for down
punishment for up
punishment for up

punishment for up
punishment for up

punishment for down
punishment for right

punishment for right

punishment for down
punishment for up
punishment for up
punishment for left

punishment for down
punishment for right

punishment for right

punishment for left
punishment for left
punishment for down
punishment for up
punishment for up
punishment for left
punishment for up
punishment for up

punishment for up
punishment for down

punishment for down
punishment for right

punishment for left
punishment for up
punishment for down
punishment for up
punishment for down

punishment for right

punishment for left
punishment for down
punishment for right
punishment for down

punishment for left
punishment for up
punishment for up
punishment for up
punishment for up

punishment for right

punishment for up

punishment for left
punishment for right

punishment for left
punishme

punishment for up
punishment for down

punishment for left

punishment for up
punishment for right
punishment for right
punishment for down

punishment for down

punishment for right
punishment for down

punishment for down

punishment for up
punishment for up
punishment for down

punishment for up
punishment for down

punishment for right
punishment for left
punishment for right
punishment for right
punishment for right
punishment for down
reward for right

reward for up
punishment for up
reward for up

punishment for action: 2
punishment for action: 6

punishment for action: 7
punishment for down

reward for up
punishment for action: 4

punishment for left

punishment for up
punishment for left

punishment for right
punishment for left
punishment for left
punishment for right
punishment for right

punishment for right

reward for down

reward for action: 1
punishment for right

punishment for down
punishment for up
punishment for down

punishment for left

punishment for left
punishm

punishment for up

reward for left

punishment for action: 3

punishment for right
reward for up

punishment for action: 9

punishment for down
punishment for right

punishment for up
reward for right

punishment for action: 8

punishment for action: 5

reward for right

punishment for action: 7

punishment for action: 6

punishment for action: 7
reward for left

reward for right

punishment for action: 3

reward for left

punishment for action: 2

punishment for action: 3
punishment for action: 9

punishment for action: 9

punishment for action: 2

reward for right

reward for left

punishment for action: 5
punishment for down
punishment for left

punishment for right

reward for down

reward for left
punishment for action: 7

punishment for action: 2

reward for action: 8

punishment for down
reward for left

punishment for right

punishment for up

reward for left
punishment for action: 5

punishment for action: 8

punishment for action: 1

punishment for up

punishment for right
pu


reward for down

reward for up
punishment for up

reward for up

punishment for right
punishment for down

punishment for left

reward for down

punishment for action: 8
punishment for action: 6

punishment for action: 3

punishment for action: 1

punishment for action: 3

reward for action: 5

punishment for up
reward for up

punishment for action: 6

reward for action: 7
punishment for down
punishment for right

reward for down

punishment for action: 8

punishment for left
reward for right

reward for action: 4
punishment for up

punishment for up

punishment for right
punishment for down
punishment for up

punishment for down

reward for right
punishment for action: 5

punishment for action: 6

reward for up

punishment for action: 9
punishment for action: 6

punishment for action: 4

punishment for action: 1

reward for action: 2
punishment for left
punishment for left

punishment for left
punishment for down

punishment for right
punishment for down
reward for right

punishment 

punishment for action: 4

punishment for action: 6

punishment for action: 1

punishment for action: 4

punishment for action: 3

punishment for action: 9

punishment for action: 4
punishment for action: 3

punishment for action: 1

punishment for down

punishment for down
punishment for up

reward for up

punishment for action: 4
punishment for action: 9

punishment for action: 7

punishment for left
reward for right

punishment for action: 9

reward for action: 2
punishment for left

punishment for left

reward for left

punishment for action: 2

punishment for action: 7

punishment for up
reward for up

reward for up

reward for left
punishment for action: 8

punishment for action: 1

punishment for action: 1

punishment for action: 6

punishment for up

punishment for right
punishment for left

punishment for left
reward for down

reward for left
punishment for action: 7

reward for right

punishment for action: 6

punishment for action: 7

reward for right

punishment for action: 

punishment for right
punishment for down

punishment for right
punishment for left
reward for down

reward for right

punishment for action: 9

punishment for action: 9
punishment for action: 3

punishment for action: 8

reward for left

punishment for action: 8

punishment for up

punishment for up
punishment for up
punishment for down

punishment for left
punishment for left

punishment for right

punishment for up
punishment for right

punishment for up
punishment for down

punishment for up

punishment for right

reward for down
punishment for action: 5

punishment for up

punishment for left

punishment for left
punishment for left

reward for left

punishment for down
punishment for down

punishment for left

punishment for right
punishment for right

punishment for up
punishment for right
punishment for down

punishment for right

punishment for up
punishment for left
punishment for right

punishment for left
punishment for right

punishment for down

punishment for right
reward

reward for left

punishment for action: 5

punishment for action: 6

punishment for up
punishment for right

punishment for left
punishment for right
punishment for right
punishment for left
punishment for left

reward for down
punishment for action: 6

punishment for action: 2

punishment for up
reward for down

punishment for right

punishment for up
punishment for right

punishment for down

punishment for down
punishment for up

punishment for up

punishment for down

punishment for left
punishment for right

punishment for right

punishment for left

punishment for right
punishment for up

punishment for left

punishment for left
punishment for right

punishment for right
punishment for left

punishment for right

punishment for down
punishment for right
punishment for up

punishment for down
punishment for up

punishment for down
punishment for up
punishment for down

punishment for left

punishment for down
reward for down

punishment for up

reward for down
punishment for actio

punishment for right

punishment for down

punishment for down

punishment for down
punishment for down

reward for down

punishment for action: 4
reward for action: 5

punishment for left

punishment for right
punishment for up

punishment for down
punishment for right
reward for down

punishment for action: 6

punishment for action: 6

punishment for action: 7

reward for left
reward for left

reward for right

punishment for action: 2

punishment for action: 1

punishment for down
reward for up

punishment for action: 2

punishment for down
reward for up

punishment for up
reward for down

punishment for action: 2

punishment for action: 5

punishment for action: 1

punishment for action: 2

reward for action: 7
punishment for up

punishment for up
punishment for left
punishment for right
reward for right
punishment for action: 5

punishment for action: 3

punishment for action: 2

punishment for action: 2

punishment for action: 6

punishment for left

punishment for down
punishmen

punishment for up

punishment for right

punishment for down
punishment for right

punishment for up

punishment for right
punishment for down

punishment for left
punishment for left

punishment for up
punishment for down

punishment for left
punishment for left

punishment for up

punishment for down
punishment for right
punishment for right

punishment for left

punishment for down

punishment for down
punishment for right

punishment for down
punishment for right
punishment for left
punishment for left
punishment for up
punishment for up

punishment for right
punishment for left
punishment for right
punishment for down

punishment for right
punishment for down

punishment for right
punishment for down
punishment for up

punishment for right
punishment for left

punishment for right

punishment for up

punishment for left
punishment for down

punishment for right
punishment for up
Interval 3 (20000 steps performed)
    1/10000 [..............................] - ETA: 2:52 - reward: -

  343/10000 [>.............................] - ETA: 1:25 - reward: -893.2507
punishment for right

punishment for left

punishment for left
  363/10000 [>.............................] - ETA: 1:25 - reward: -895.7438
punishment for left

punishment for right
  369/10000 [>.............................] - ETA: 1:25 - reward: -896.4607
punishment for left

punishment for down
  375/10000 [>.............................] - ETA: 1:25 - reward: -897.1920
punishment for down

punishment for up

punishment for down
  382/10000 [>.............................] - ETA: 1:25 - reward: -898.0602
punishment for up
  388/10000 [>.............................] - ETA: 1:25 - reward: -898.8093
punishment for left

punishment for down

punishment for right
  394/10000 [>.............................] - ETA: 1:25 - reward: -899.5736
punishment for up

punishment for right

punishment for left
  400/10000 [>.............................] - ETA: 1:25 - reward: -900.3550
punishment for up

punishment for le


punishment for right
  818/10000 [=>............................] - ETA: 1:22 - reward: -956.6553
punishment for left
  828/10000 [=>............................] - ETA: 1:22 - reward: -958.1280
punishment for right

punishment for up
  833/10000 [=>............................] - ETA: 1:22 - reward: -958.8631
punishment for up
  839/10000 [=>............................] - ETA: 1:22 - reward: -959.7426
punishment for up

punishment for left
  851/10000 [=>............................] - ETA: 1:22 - reward: -961.4865
punishment for up

punishment for left
  857/10000 [=>............................] - ETA: 1:22 - reward: -962.3489
punishment for down
  863/10000 [=>............................] - ETA: 1:22 - reward: -963.2121
punishment for up

punishment for down
  869/10000 [=>............................] - ETA: 1:22 - reward: -964.0794
punishment for down
  875/10000 [=>............................] - ETA: 1:22 - reward: -964.9429
punishment for up
  881/10000 [=>.................

 1285/10000 [==>...........................] - ETA: 1:18 - reward: -1027.8016
punishment for right
 1292/10000 [==>...........................] - ETA: 1:18 - reward: -1028.9218
punishment for down

punishment for left
 1299/10000 [==>...........................] - ETA: 1:18 - reward: -1030.0400
punishment for right
 1306/10000 [==>...........................] - ETA: 1:18 - reward: -1031.1508
punishment for left

punishment for down

punishment for left
 1314/10000 [==>...........................] - ETA: 1:18 - reward: -1032.4178
punishment for right

punishment for up
 1321/10000 [==>...........................] - ETA: 1:18 - reward: -1033.5329
punishment for down
 1328/10000 [==>...........................] - ETA: 1:17 - reward: -1034.6378
punishment for up

punishment for up

punishment for left

punishment for right
 1335/10000 [===>..........................] - ETA: 1:17 - reward: -1035.7461
punishment for right

punishment for right
 1342/10000 [===>..........................] - E


punishment for up
 1755/10000 [====>.........................] - ETA: 1:12 - reward: -1102.7322
punishment for right

punishment for up
 1761/10000 [====>.........................] - ETA: 1:12 - reward: -1103.7087
punishment for down

punishment for down
 1767/10000 [====>.........................] - ETA: 1:11 - reward: -1104.6848
punishment for right

punishment for down
 1773/10000 [====>.........................] - ETA: 1:11 - reward: -1105.6599
punishment for up

punishment for down
 1779/10000 [====>.........................] - ETA: 1:11 - reward: -1106.6346
punishment for left

punishment for up
 1787/10000 [====>.........................] - ETA: 1:11 - reward: -1107.9373
punishment for down
 1793/10000 [====>.........................] - ETA: 1:11 - reward: -1108.9091
punishment for up

punishment for right
 1806/10000 [====>.........................] - ETA: 1:11 - reward: -1111.0089
punishment for left

punishment for left

punishment for right

punishment for left
 1814/10000 


reward for right

punishment for action: 1

punishment for action: 4

punishment for up
 2270/10000 [=====>........................] - ETA: 1:07 - reward: -1182.9969
punishment for left

punishment for up
 2277/10000 [=====>........................] - ETA: 1:07 - reward: -1184.0988
punishment for right
 2291/10000 [=====>........................] - ETA: 1:06 - reward: -1186.2955
punishment for down

punishment for right
 2298/10000 [=====>........................] - ETA: 1:06 - reward: -1187.3882
punishment for right

punishment for left

punishment for up
 2305/10000 [=====>........................] - ETA: 1:06 - reward: -1188.4837
punishment for down

punishment for right

punishment for left

punishment for left
 2312/10000 [=====>........................] - ETA: 1:06 - reward: -1189.5822
punishment for left
 2333/10000 [=====>........................] - ETA: 1:06 - reward: -1192.8616
punishment for down

punishment for up
punishment for down

punishment for up

punishment for down

reward for right

punishment for action: 4

punishment for action: 9

punishment for left
punishment for up

punishment for left

punishment for down

punishment for right
punishment for up
punishment for right

punishment for up

punishment for down
punishment for left

punishment for up
punishment for left
punishment for left
reward for down

punishment for action: 2

punishment for right

punishment for down

punishment for left
reward for up

punishment for down

punishment for right

punishment for right
reward for right

punishment for action: 7

punishment for action: 9

punishment for action: 2
punishment for action: 9

punishment for action: 9

punishment for action: 6

punishment for action: 9
punishment for action: 7

punishment for down
punishment for right
punishment for left
punishment for right

punishment for up
reward for right

punishment for action: 5

punishment for action: 9

punishment for action: 3

reward for action: 1
punishment for left

punishment for down

p

punishment for right

reward for down

punishment for action: 5

punishment for action: 5

punishment for action: 2

punishment for right
punishment for right
punishment for right
punishment for up

punishment for down

punishment for right
punishment for up

punishment for left

punishment for up
punishment for down
punishment for left
punishment for right
punishment for right

punishment for up

punishment for up
punishment for up

punishment for right
punishment for up
punishment for left

punishment for down
punishment for right
punishment for down
punishment for down

punishment for up

punishment for left

punishment for up
punishment for right
punishment for left

punishment for down
punishment for right

punishment for down

punishment for left
punishment for left

punishment for left
punishment for up

punishment for right

punishment for right
punishment for up
punishment for left

punishment for right
punishment for up
punishment for up

punishment for left

punishment for u

punishment for up

reward for right
punishment for action: 9

punishment for action: 7

reward for action: 6
punishment for left

reward for up

punishment for action: 9

punishment for action: 9
punishment for right

reward for left

punishment for right

punishment for up

punishment for left
reward for down

punishment for down

punishment for right
punishment for right

punishment for right

punishment for left
punishment for down

reward for left

punishment for action: 6
punishment for action: 5

punishment for action: 4

punishment for action: 6

punishment for action: 1

punishment for left
reward for right

punishment for action: 6

punishment for action: 9

punishment for action: 4

reward for action: 8
punishment for left

reward for down

punishment for action: 6
punishment for up

reward for down
punishment for action: 6

punishment for action: 7

punishment for action: 1

punishment for action: 5
punishment for action: 6

punishment for down

reward for down

punishment f


punishment for right

punishment for up
punishment for left
punishment for right

punishment for down
punishment for up

punishment for right
punishment for down
reward for down

punishment for action: 2

reward for right

reward for left

reward for action: 4
reward for down

punishment for up

punishment for up

reward for right

punishment for action: 9
punishment for action: 4

punishment for action: 3

punishment for up
punishment for up

punishment for down
reward for right

punishment for action: 7

punishment for action: 8

reward for right

reward for action: 3
reward for left
reward for up

punishment for action: 5

punishment for action: 5

punishment for action: 4

reward for action: 6
punishment for left

reward for left

punishment for action: 5

punishment for up
punishment for left

punishment for right

punishment for left
punishment for left
punishment for right

punishment for up

punishment for left
punishment for down

punishment for right
punishment for up

punis


punishment for up
punishment for up

reward for right

reward for action: 7
punishment for down
punishment for right

punishment for left

punishment for up
punishment for down

punishment for up
punishment for down
punishment for left

punishment for down
punishment for right
punishment for left
punishment for left
punishment for right
punishment for up

punishment for left

punishment for right

punishment for down
punishment for right

punishment for up
punishment for down

punishment for right
punishment for left
punishment for right
punishment for left
punishment for right

punishment for left
punishment for up

punishment for left

punishment for up

punishment for up
punishment for right
punishment for down
punishment for up

punishment for down
punishment for down

punishment for right
punishment for down
punishment for up
punishment for down
punishment for up

punishment for down
punishment for left

punishment for up
punishment for up
punishment for up

punishment for right


punishment for down
punishment for right
reward for right

reward for right

punishment for action: 2

punishment for action: 3
reward for action: 1

reward for right

punishment for action: 1

punishment for action: 7
reward for up

reward for down

punishment for action: 5

punishment for down
reward for down

punishment for action: 6

punishment for action: 6

punishment for up

reward for right
punishment for action: 8

reward for right

punishment for action: 3

punishment for action: 4

punishment for action: 9

reward for left

punishment for action: 3
reward for action: 4

reward for right

punishment for action: 6

punishment for action: 6

punishment for action: 5

punishment for action: 3

punishment for action: 4
reward for action: 2

punishment for down

punishment for up
punishment for down

punishment for up
reward for up

reward for action: 9
punishment for left

punishment for down
punishment for up
reward for left

punishment for action: 1

punishment for right
punish

punishment for down

punishment for right
punishment for down
punishment for left
punishment for right
punishment for left

punishment for right
punishment for left

punishment for up
punishment for down
punishment for up

punishment for left

punishment for left
punishment for left
punishment for up

punishment for left

reward for down
reward for action: 7
punishment for right

punishment for up

punishment for left

punishment for left
punishment for left

punishment for right

punishment for right

punishment for left
punishment for down

punishment for left

punishment for right

reward for down
punishment for action: 4

punishment for action: 8

reward for left

punishment for action: 6

reward for action: 1

punishment for up
punishment for down

punishment for up

punishment for down

punishment for up
punishment for left
reward for down
punishment for action: 6

punishment for action: 1

punishment for action: 5

punishment for up
reward for down

reward for action: 8

punishm

punishment for left

punishment for up

punishment for up
punishment for up

punishment for right
punishment for left
reward for up

punishment for left

punishment for left
punishment for right

punishment for down

reward for left

punishment for right
reward for left

punishment for right
punishment for up

punishment for left

punishment for up
punishment for right

punishment for down

punishment for down
reward for left

punishment for down

punishment for down

punishment for up
punishment for right

punishment for left

punishment for right

punishment for right
punishment for right

punishment for up

punishment for up

reward for up

punishment for action: 5
punishment for right

reward for left

punishment for up
punishment for right

punishment for up
punishment for up

punishment for left

punishment for right
punishment for left

punishment for right

punishment for left

punishment for right
punishment for left
punishment for right

punishment for left

punishment for do

punishment for down

punishment for right

punishment for left

punishment for down

punishment for left

punishment for left
punishment for right
punishment for left
punishment for down
punishment for up
punishment for down

reward for down

punishment for action: 5

punishment for action: 1

punishment for action: 5
punishment for right

punishment for down

punishment for left

reward for up
punishment for action: 5

punishment for down

reward for up

punishment for action: 2

punishment for action: 7
punishment for action: 2

punishment for action: 2

punishment for action: 2

punishment for action: 7

punishment for action: 8

punishment for right
punishment for right

punishment for right

punishment for down
punishment for up

punishment for left

punishment for up

punishment for right
punishment for up

punishment for down

punishment for down
reward for right

punishment for action: 4

punishment for action: 3

punishment for action: 4

punishment for action: 2

punishment f

punishment for right
punishment for right

punishment for up

punishment for down
punishment for down

punishment for up
punishment for left

punishment for left
punishment for up

punishment for up
punishment for right

punishment for up

reward for left

punishment for action: 1
punishment for down

punishment for left

punishment for down
punishment for up

punishment for left

punishment for right
punishment for up

punishment for left
punishment for right
punishment for up
punishment for up
punishment for up
punishment for down

punishment for down
punishment for right

reward for down

punishment for action: 3
punishment for action: 4

punishment for action: 6

punishment for action: 6

reward for action: 2

punishment for right
punishment for left

punishment for right

reward for up

punishment for down
punishment for left

punishment for up

punishment for up

punishment for left
punishment for up

punishment for right

punishment for right
punishment for right

punishment for

punishment for left

reward for up

punishment for action: 7

reward for action: 5
punishment for down

punishment for right

punishment for left
punishment for right

punishment for up

punishment for left
punishment for left

punishment for down
punishment for down

punishment for up

punishment for down

punishment for left

punishment for right
punishment for right

punishment for up

punishment for right
punishment for left
punishment for down

punishment for left

punishment for down
punishment for left

punishment for up

punishment for left
punishment for down
punishment for up

punishment for up
punishment for up
punishment for left
punishment for right

punishment for down
reward for right
punishment for right

punishment for right
punishment for up

punishment for right
punishment for down
punishment for left
punishment for left

reward for left

punishment for action: 5
reward for action: 4

reward for up

punishment for action: 1

punishment for right

punishment for right

punishment for up

punishment for up

punishment for right
punishment for right

punishment for down

punishment for right
punishment for right

punishment for down
punishment for left
punishment for right

punishment for right
punishment for up

punishment for left
punishment for left

punishment for right

punishment for left
punishment for up

punishment for up
punishment for right
punishment for left

punishment for down
punishment for up

punishment for down
punishment for right

punishment for right
punishment for left

punishment for up

punishment for left
punishment for left
punishment for left
punishment for right
punishment for right

punishment for left
punishment for left

punishment for right
punishment for down

punishment for up
punishment for right
punishment for down
punishment for right
punishment for up

punishment for left
punishment for right
punishment for right
punishment for down

punishment for down
punishment for up

punishment for left
punishment for up
puni

punishment for right
punishment for right

punishment for down

punishment for left
punishment for down
punishment for left

punishment for up

punishment for right
punishment for left

punishment for right
punishment for right
punishment for down
punishment for left

punishment for up
punishment for up
punishment for down
punishment for up

punishment for left
punishment for right
punishment for left

punishment for right

punishment for right
punishment for left
punishment for down

punishment for down

punishment for up

punishment for up

punishment for left
punishment for right
punishment for right

punishment for left

punishment for right
punishment for left

punishment for right
punishment for right
punishment for left

punishment for left
punishment for right

punishment for down
punishment for right

punishment for up

punishment for left
punishment for right
punishment for right

punishment for left
punishment for down

punishment for left
punishment for right

punishment fo

punishment for up

punishment for left
punishment for down

punishment for left

punishment for right
punishment for right

punishment for left

punishment for right
punishment for down

punishment for up
punishment for left

punishment for left
punishment for left

punishment for right

punishment for left
punishment for left
punishment for right

punishment for up
punishment for left

punishment for left
punishment for right

punishment for left
punishment for left

punishment for left
punishment for left

punishment for right
punishment for left
punishment for down

punishment for down

punishment for right
punishment for up

punishment for down
punishment for left

punishment for right

punishment for up

punishment for down
punishment for up
punishment for up

punishment for down
punishment for right

punishment for left

punishment for up
punishment for left
punishment for down
punishment for up
punishment for down
punishment for down
punishment for down
punishment for down
punis

punishment for left

punishment for up

punishment for down

punishment for up
punishment for down

punishment for right

punishment for left
punishment for down

punishment for up

punishment for right

punishment for left
punishment for down
punishment for up

punishment for right
punishment for left

punishment for down

punishment for right
punishment for down

punishment for right

punishment for down

punishment for right
punishment for up

punishment for down

punishment for down
punishment for down

reward for left

reward for action: 7

solved now: 

[[6 4 3 2 9 8 5 1 7]
 [5 7 2 3 4 1 8 6 9]
 [1 9 8 5 6 7 3 4 2]
 [7 6 5 8 2 3 1 9 4]
 [9 8 1 6 7 4 2 5 3]
 [3 2 4 1 5 9 7 8 6]
 [2 3 9 4 8 5 6 7 1]
 [4 5 6 7 1 2 9 3 8]
 [8 1 7 9 3 6 4 2 5]]
game being played now: 15351

punishment for action: 6
punishment for action: 3

punishment for action: 9

reward for action: 1
punishment for down

punishment for up
punishment for down
punishment for up
punishment for right
punishment for lef

punishment for action: 3

punishment for action: 6

reward for up

punishment for action: 1

punishment for action: 7

reward for right
punishment for action: 5

punishment for action: 1

punishment for action: 8

punishment for down
punishment for down
reward for left

punishment for action: 8

reward for left

punishment for action: 6

punishment for action: 3

reward for down
punishment for down

punishment for down
reward for left

punishment for action: 6

punishment for action: 8

punishment for action: 5

reward for down

reward for up
punishment for action: 3

punishment for action: 6

punishment for action: 8

reward for action: 2
reward for down

punishment for action: 6

punishment for action: 8

punishment for action: 1

punishment for up

punishment for right
reward for down

reward for left

reward for action: 9
reward for right

punishment for action: 5

reward for down

punishment for action: 1

punishment for action: 1

punishment for action: 9
punishment for action: 3

  314/10000 [..............................] - ETA: 1:12 - reward: -188.3535
punishment for up

reward for up
  321/10000 [..............................] - ETA: 1:12 - reward: -189.0966
punishment for action: 6

reward for left

punishment for action: 1

punishment for action: 4

reward for down

punishment for action: 5
  328/10000 [..............................] - ETA: 1:12 - reward: -189.8689
punishment for action: 3

punishment for right

reward for up

punishment for action: 8

punishment for down

reward for up
  335/10000 [>.............................] - ETA: 1:12 - reward: -190.7403
punishment for down

reward for up

punishment for action: 5

punishment for down
  342/10000 [>.............................] - ETA: 1:12 - reward: -191.6111
reward for left

reward for up

punishment for action: 2

reward for down
  349/10000 [>.............................] - ETA: 1:12 - reward: -192.4699
reward for up

reward for action: 6

punishment for left

punishment for left

punishmen

  738/10000 [=>............................] - ETA: 1:09 - reward: -248.0081
punishment for action: 6

punishment for up

reward for left

punishment for right
  746/10000 [=>............................] - ETA: 1:09 - reward: -249.1850
reward for left
  753/10000 [=>............................] - ETA: 1:09 - reward: -250.1952
reward for down

punishment for action: 3

punishment for action: 2

punishment for action: 3

punishment for action: 9

reward for up

punishment for action: 1
  760/10000 [=>............................] - ETA: 1:09 - reward: -251.2171
punishment for action: 1

reward for action: 5

punishment for left
  774/10000 [=>............................] - ETA: 1:09 - reward: -253.2765
punishment for right

punishment for left
  777/10000 [=>............................] - ETA: 1:09 - reward: -253.7130
punishment for down
  784/10000 [=>............................] - ETA: 1:09 - reward: -254.7270
reward for right

punishment for action: 4

punishment for action: 4

p

 1215/10000 [==>...........................] - ETA: 1:06 - reward: -320.0337
punishment for up

punishment for down
 1222/10000 [==>...........................] - ETA: 1:06 - reward: -321.0245
punishment for down

punishment for left
 1229/10000 [==>...........................] - ETA: 1:06 - reward: -322.0138
reward for down

punishment for left

reward for up

punishment for action: 4

reward for action: 1
 1236/10000 [==>...........................] - ETA: 1:06 - reward: -322.9951
punishment for up

punishment for down

punishment for left
 1243/10000 [==>...........................] - ETA: 1:06 - reward: -323.9678
punishment for down

punishment for right

reward for right

punishment for action: 5

punishment for action: 4
 1250/10000 [==>...........................] - ETA: 1:06 - reward: -324.9480
punishment for action: 4

punishment for down
 1257/10000 [==>...........................] - ETA: 1:06 - reward: -325.9499
reward for right

punishment for action: 1

punishment for acti

 1698/10000 [====>.........................] - ETA: 1:02 - reward: -391.0024
punishment for down
 1706/10000 [====>.........................] - ETA: 1:02 - reward: -392.2210
punishment for right

punishment for up

punishment for right
 1713/10000 [====>.........................] - ETA: 1:02 - reward: -393.2855
punishment for left

punishment for up
 1720/10000 [====>.........................] - ETA: 1:02 - reward: -394.3512
punishment for right
 1727/10000 [====>.........................] - ETA: 1:02 - reward: -395.4146
punishment for up

punishment for up

punishment for down
 1734/10000 [====>.........................] - ETA: 1:02 - reward: -396.4787
punishment for down

punishment for down

punishment for up
 1741/10000 [====>.........................] - ETA: 1:02 - reward: -397.5428
punishment for up
 1749/10000 [====>.........................] - ETA: 1:02 - reward: -398.7604
punishment for left

punishment for up
 1756/10000 [====>.........................] - ETA: 1:02 - reward: 

 2147/10000 [=====>........................] - ETA: 59s - reward: -461.6022
punishment for right

punishment for up

punishment for left
 2154/10000 [=====>........................] - ETA: 59s - reward: -462.7493
punishment for down
 2162/10000 [=====>........................] - ETA: 59s - reward: -464.0537
punishment for down

punishment for up
 2169/10000 [=====>........................] - ETA: 59s - reward: -465.1913
punishment for down

punishment for left

punishment for down

punishment for down
 2176/10000 [=====>........................] - ETA: 59s - reward: -466.3323
punishment for left

punishment for up
 2183/10000 [=====>........................] - ETA: 59s - reward: -467.4773
punishment for right

punishment for left

punishment for down
 2190/10000 [=====>........................] - ETA: 59s - reward: -468.6233
punishment for up
 2198/10000 [=====>........................] - ETA: 58s - reward: -469.9286
punishment for left
 2212/10000 [=====>........................] - ET


punishment for action: 1

reward for action: 8
reward for right

punishment for action: 2

reward for action: 7

punishment for left
punishment for left
punishment for up

punishment for up

reward for up
punishment for action: 9

punishment for action: 1

punishment for action: 2

punishment for action: 7

punishment for action: 5

punishment for action: 2

punishment for left
punishment for down

reward for left
punishment for action: 3

punishment for left

punishment for up
punishment for down

punishment for up
punishment for right
reward for down

punishment for right

punishment for up
punishment for up

punishment for down

punishment for left

punishment for left

punishment for down
punishment for down

punishment for down
punishment for left

punishment for right
reward for right

punishment for action: 7
punishment for action: 8

reward for up

punishment for action: 1

punishment for action: 9

reward for down

punishment for action: 3

punishment for action: 3
punishment

punishment for down

punishment for left

punishment for up
punishment for up
punishment for up
punishment for right
punishment for up
punishment for left

punishment for right

punishment for up
punishment for left
punishment for up
punishment for up

punishment for right

punishment for down

punishment for up
punishment for right
punishment for down

punishment for left
punishment for up
punishment for right
punishment for down

punishment for left

punishment for up
punishment for down

punishment for up

punishment for down

punishment for left
punishment for right

punishment for right

punishment for right
punishment for down

punishment for down
punishment for up
punishment for up
punishment for right

punishment for up
punishment for left
punishment for right
punishment for down
punishment for down

punishment for down
punishment for down
punishment for left
punishment for down

punishment for up
punishment for left

punishment for down
punishment for right

punishment for lef

punishment for right

punishment for up
punishment for right
punishment for right

punishment for down

punishment for right
punishment for left

punishment for right
punishment for down

punishment for right
punishment for right
punishment for up

punishment for down
reward for right

punishment for left

punishment for up
punishment for up

punishment for down

punishment for up
reward for left

punishment for action: 3

punishment for action: 3

punishment for action: 4

punishment for left
punishment for down

punishment for right
reward for up

reward for action: 6
punishment for up

punishment for up
punishment for up

punishment for right

punishment for down
punishment for down

punishment for right
punishment for down

reward for down

punishment for action: 4

punishment for action: 9
punishment for action: 6

reward for action: 1

punishment for right

punishment for up
punishment for down
punishment for up
punishment for up
punishment for down
punishment for left

punishmen

punishment for up

punishment for left
punishment for down

punishment for up

punishment for up

punishment for up
punishment for left
punishment for up

punishment for up
punishment for up
punishment for down

punishment for up
punishment for up

punishment for down
punishment for down

punishment for down
punishment for down
punishment for down

punishment for up
punishment for up

punishment for down

punishment for right
punishment for down

punishment for right

punishment for right

punishment for down
punishment for right

punishment for down

punishment for left
punishment for right

punishment for up

punishment for down
punishment for up

punishment for down

punishment for up

punishment for right

punishment for left
punishment for up

punishment for left
punishment for down

punishment for right
punishment for down

punishment for left

punishment for up
punishment for up
punishment for up
punishment for up
punishment for left
punishment for right
punishment for right

pu

punishment for left

punishment for up
punishment for down

punishment for down

punishment for down
punishment for down

punishment for left

punishment for down
punishment for right

punishment for right

punishment for up
punishment for right

punishment for left

punishment for down

punishment for left
punishment for up
punishment for up

punishment for up

punishment for up
punishment for up

punishment for right

punishment for up

punishment for left

punishment for right
punishment for left
punishment for right

punishment for left
punishment for left
punishment for right

punishment for right

punishment for right
punishment for left

punishment for right
punishment for left

punishment for left
punishment for left
punishment for right
punishment for left

punishment for right
punishment for right

punishment for down

punishment for down

punishment for down
punishment for left
punishment for left
punishment for down

punishment for up
punishment for right

punishment for up

punishment for left
punishment for right

punishment for right

punishment for up

punishment for up

punishment for down
punishment for down

punishment for right

punishment for left
punishment for left
punishment for down
punishment for right

punishment for right
punishment for up

punishment for down
punishment for left

punishment for right

punishment for up

punishment for left
punishment for right

punishment for right

punishment for up
punishment for up
punishment for left

punishment for down
punishment for down
punishment for left
punishment for left

punishment for left

punishment for right
punishment for left
punishment for down

punishment for up

punishment for right

punishment for left
punishment for up

punishment for left

punishment for down
punishment for up

punishment for left

punishment for right
punishment for left

punishment for right

punishment for right
punishment for down
punishment for up
punishment for down

punishment for left

punishment for up
pu

punishment for down

punishment for up
punishment for right

punishment for down

punishment for right
punishment for up

punishment for left
punishment for down

punishment for up

punishment for down

punishment for up
punishment for up

punishment for up

punishment for right

punishment for up

punishment for down
punishment for right
punishment for right

punishment for left

punishment for right
punishment for right

punishment for up

punishment for right
punishment for up
punishment for left

punishment for down
punishment for up

punishment for left

punishment for right
punishment for up

punishment for right
punishment for down
punishment for left

punishment for left

punishment for down
punishment for down

punishment for left

punishment for right

punishment for left
punishment for down

punishment for left
punishment for up

punishment for down

punishment for down

punishment for down
punishment for left

punishment for down

punishment for up

punishment for left

pun

punishment for action: 3

punishment for action: 7

reward for down

punishment for action: 8

punishment for action: 1

reward for down

punishment for action: 1
punishment for action: 8

punishment for right
punishment for up

punishment for up

punishment for down

punishment for up
reward for left

punishment for action: 1

punishment for action: 6

reward for down
punishment for action: 2

punishment for action: 6

punishment for action: 7

punishment for action: 2

punishment for action: 2

punishment for action: 3

punishment for action: 7
punishment for action: 8

punishment for action: 9

reward for down

punishment for action: 3

reward for down

reward for action: 7
punishment for down

punishment for down
punishment for right

punishment for left
punishment for up

punishment for up
punishment for right

punishment for down
punishment for up

punishment for left
punishment for right

reward for right

punishment for up

punishment for left
reward for left

punishment for ac

punishment for right

punishment for left
reward for left

punishment for action: 3

punishment for action: 9

punishment for action: 9

reward for action: 1
reward for left

punishment for action: 4

punishment for action: 1

punishment for action: 9

punishment for left

reward for right

punishment for action: 6
punishment for right

reward for up

punishment for action: 8

reward for action: 2

punishment for left

punishment for up
punishment for down

reward for down

punishment for action: 5

punishment for action: 3
punishment for action: 3

punishment for action: 4

punishment for action: 5

punishment for action: 5

punishment for action: 5

punishment for right
reward for left

punishment for action: 4

punishment for up

reward for down

punishment for left
reward for right

reward for action: 8
punishment for right

punishment for left
punishment for up

punishment for right
punishment for up

punishment for right

punishment for right
punishment for up

reward for right



punishment for left

punishment for right
punishment for left

punishment for right
punishment for down

punishment for right

punishment for left
punishment for right

punishment for left
punishment for left

punishment for right
punishment for down

punishment for right
punishment for down

reward for left
punishment for action: 4

reward for action: 2

punishment for up
punishment for right

punishment for up

punishment for right

reward for right

punishment for action: 9

punishment for action: 3
punishment for right

reward for left

reward for down

punishment for action: 6

punishment for action: 8

punishment for action: 4
punishment for down

punishment for right

reward for down

punishment for left

punishment for left

punishment for right
reward for right

punishment for action: 5

punishment for action: 8

punishment for action: 8

punishment for action: 7
punishment for action: 2

reward for action: 9

punishment for up

punishment for left

punishment for left
punishm

punishment for down

punishment for down
punishment for right

punishment for right
punishment for left

punishment for up
punishment for right
punishment for left
punishment for left

punishment for up

punishment for right
punishment for left

punishment for right
punishment for right

punishment for right
punishment for up

reward for up

punishment for down
punishment for down

punishment for down
punishment for up

punishment for up

punishment for right
punishment for up

reward for left

punishment for action: 4
punishment for action: 2

punishment for left

punishment for up
reward for right

punishment for action: 2

punishment for action: 8

punishment for action: 8

punishment for action: 7

punishment for left
punishment for left

punishment for right

punishment for down

punishment for down
punishment for up

reward for right

punishment for action: 5
punishment for action: 4

punishment for right

punishment for up

punishment for up
reward for left

reward for left

rew

punishment for left

punishment for left
punishment for down

punishment for right

punishment for left

punishment for up
punishment for down

punishment for up

reward for up

punishment for right
punishment for up

reward for up

reward for action: 2
punishment for down

punishment for down
punishment for right

punishment for up

punishment for left
punishment for down

punishment for up

punishment for right
punishment for left

punishment for right

punishment for right

punishment for left

punishment for up
punishment for up
reward for up

punishment for action: 7

punishment for right
punishment for down
punishment for up

reward for left

punishment for right
reward for left

punishment for right

punishment for down
punishment for left

reward for up

reward for up

punishment for action: 1

punishment for left

punishment for left
punishment for down
punishment for left

punishment for down
punishment for left

reward for up

punishment for action: 2

punishment for down

p

punishment for down

punishment for left

punishment for right
punishment for left

punishment for up

punishment for left

punishment for up
punishment for up

punishment for left
punishment for down

punishment for right
punishment for left

punishment for up
punishment for down

punishment for up
punishment for down

punishment for up
punishment for down
punishment for up
punishment for right
punishment for down

punishment for right
punishment for down

punishment for up
punishment for down

punishment for left
punishment for right

punishment for down

punishment for right
punishment for right

punishment for down
punishment for down

punishment for right

punishment for up

punishment for down

punishment for left
punishment for up

punishment for right

punishment for right
punishment for down

punishment for left
punishment for right
punishment for right

punishment for left
punishment for down

punishment for left

punishment for down
punishment for right

punishment for left


punishment for up

punishment for down

punishment for left
punishment for down

punishment for up

punishment for right

punishment for down
punishment for right
reward for right

punishment for action: 9

punishment for action: 4

punishment for right
punishment for down

punishment for down
punishment for down

punishment for down

reward for right
punishment for up

reward for down

punishment for up

punishment for left
punishment for right

punishment for left

punishment for up

punishment for up
punishment for left

punishment for right

punishment for right

punishment for left
punishment for right

punishment for down
punishment for up

punishment for up

punishment for left
punishment for up
punishment for down

punishment for down

punishment for left
punishment for down

punishment for right
punishment for left
punishment for right
punishment for up

punishment for up

punishment for right
punishment for down

punishment for down
punishment for left

punishment for down
pu

punishment for action: 2

punishment for action: 2

punishment for action: 4

punishment for action: 5

reward for action: 7

punishment for down
punishment for left

punishment for up

punishment for up
punishment for down

punishment for left

punishment for down
punishment for down

punishment for down
punishment for down

punishment for down

punishment for left
punishment for down

punishment for down

punishment for right
punishment for left
punishment for up

punishment for down

punishment for right
punishment for up

punishment for down
punishment for right
punishment for left

punishment for left
punishment for left
punishment for right
punishment for right

punishment for right
punishment for up

punishment for up
punishment for right

punishment for right

punishment for left
punishment for left
punishment for down

punishment for up

punishment for up
punishment for left
punishment for left
punishment for right
punishment for down

punishment for up
punishment for down
pun

punishment for left

punishment for right

punishment for up

punishment for right
punishment for down

punishment for up
punishment for right

punishment for left
punishment for up

punishment for down

punishment for right
punishment for down

punishment for up
punishment for left

punishment for left
punishment for up

punishment for right

punishment for left
punishment for up
punishment for up

punishment for down
punishment for left

punishment for left

punishment for down

punishment for left

punishment for up
punishment for down

punishment for up

punishment for up
punishment for right
Interval 5 (40000 steps performed)
    1/10000 [..............................] - ETA: 1:58 - reward: -1285.0000
punishment for left

punishment for right

punishment for right
    8/10000 [..............................] - ETA: 1:19 - reward: -1286.5000
punishment for down

punishment for down

punishment for left
   15/10000 [..............................] - ETA: 1:17 - reward: -1287.8000
p

  471/10000 [>.............................] - ETA: 1:14 - reward: -1339.4183
punishment for down
  478/10000 [>.............................] - ETA: 1:14 - reward: -1340.2657
punishment for right
  485/10000 [>.............................] - ETA: 1:14 - reward: -1341.1093
punishment for up
  492/10000 [>.............................] - ETA: 1:14 - reward: -1341.9370
punishment for left
  499/10000 [>.............................] - ETA: 1:13 - reward: -1342.7615
punishment for up

punishment for up
  506/10000 [>.............................] - ETA: 1:13 - reward: -1343.5830
punishment for down

punishment for right
  513/10000 [>.............................] - ETA: 1:13 - reward: -1344.3996
punishment for down

punishment for left
  520/10000 [>.............................] - ETA: 1:13 - reward: -1345.2250
punishment for up

punishment for down

punishment for up
  527/10000 [>.............................] - ETA: 1:13 - reward: -1346.0664
punishment for up
  534/10000 [>.........

  997/10000 [=>............................] - ETA: 1:09 - reward: -1401.8526
punishment for down

punishment for down
 1004/10000 [==>...........................] - ETA: 1:09 - reward: -1402.7161
punishment for left

punishment for right

punishment for left
 1018/10000 [==>...........................] - ETA: 1:09 - reward: -1404.4538
punishment for up

punishment for left

punishment for right
 1025/10000 [==>...........................] - ETA: 1:09 - reward: -1405.3229
punishment for left

punishment for left

punishment for right

punishment for right
 1032/10000 [==>...........................] - ETA: 1:09 - reward: -1406.2025
punishment for down
 1039/10000 [==>...........................] - ETA: 1:09 - reward: -1407.0876
punishment for up

punishment for right
 1046/10000 [==>...........................] - ETA: 1:09 - reward: -1407.9685
punishment for left

punishment for up
 1053/10000 [==>...........................] - ETA: 1:09 - reward: -1408.8547
punishment for up

punishme

 1516/10000 [===>..........................] - ETA: 1:05 - reward: -1471.2962
punishment for right
 1524/10000 [===>..........................] - ETA: 1:05 - reward: -1472.3563
punishment for left

punishment for down

punishment for down

punishment for up
 1531/10000 [===>..........................] - ETA: 1:05 - reward: -1473.2900
punishment for left
 1538/10000 [===>..........................] - ETA: 1:05 - reward: -1474.2237
punishment for up

punishment for down
 1566/10000 [===>..........................] - ETA: 1:04 - reward: -1477.9163
punishment for down

punishment for down
 1573/10000 [===>..........................] - ETA: 1:04 - reward: -1478.8258
punishment for left

punishment for left
 1580/10000 [===>..........................] - ETA: 1:04 - reward: -1479.7348
punishment for left

punishment for right
 1587/10000 [===>..........................] - ETA: 1:04 - reward: -1480.6440
punishment for down

punishment for left

punishment for right

punishment for left
 1602/1


punishment for up
 2051/10000 [=====>........................] - ETA: 1:00 - reward: -1542.6290
punishment for right
 2058/10000 [=====>........................] - ETA: 1:00 - reward: -1543.5855
punishment for right

punishment for left
 2065/10000 [=====>........................] - ETA: 1:00 - reward: -1544.5390
punishment for down

punishment for left
 2072/10000 [=====>........................] - ETA: 1:00 - reward: -1545.4932
punishment for down

punishment for down

punishment for right
 2079/10000 [=====>........................] - ETA: 1:00 - reward: -1546.4497
punishment for left

punishment for right
 2087/10000 [=====>........................] - ETA: 1:00 - reward: -1547.5443
punishment for up

punishment for left

punishment for up

punishment for down
 2094/10000 [=====>........................] - ETA: 1:00 - reward: -1548.5067
punishment for up

punishment for right

punishment for right
 2101/10000 [=====>........................] - ETA: 1:00 - reward: -1549.4736
punishm

punishment for action: 6

punishment for action: 2

punishment for action: 3

punishment for action: 1

punishment for action: 1

punishment for action: 3
punishment for action: 2

reward for action: 9
punishment for right

punishment for left

punishment for right
reward for down

reward for action: 1

punishment for up
punishment for left
punishment for left

reward for down

punishment for action: 8

punishment for action: 5

reward for action: 3
punishment for down

punishment for up
reward for right

punishment for down

reward for up

punishment for left

reward for left

punishment for action: 4
punishment for action: 2

reward for action: 7

punishment for up

punishment for down
punishment for down

punishment for up
punishment for up
punishment for right

punishment for right
punishment for left

punishment for down

punishment for left

punishment for up
punishment for down

punishment for down

reward for down
punishment for action: 5

reward for action: 6
punishment for do

punishment for down

punishment for right
punishment for down

punishment for up
punishment for down

reward for down

punishment for up

reward for right

punishment for action: 5
punishment for action: 3

punishment for action: 8

punishment for up
punishment for left
punishment for down

reward for down

punishment for action: 7

punishment for action: 2
reward for action: 9

reward for down

punishment for action: 5

punishment for action: 9

reward for action: 4
punishment for left
punishment for right
punishment for up
punishment for up

punishment for up
punishment for right
punishment for up

punishment for down

punishment for up
reward for left

punishment for right

reward for up

punishment for action: 8

punishment for action: 9
punishment for action: 2

punishment for action: 6

punishment for action: 6

reward for right

punishment for action: 9

reward for action: 7
reward for left

punishment for down

punishment for right

punishment for up
punishment for down
reward 

punishment for right

punishment for right

punishment for left
punishment for up
punishment for left
punishment for right

punishment for left
reward for down

punishment for action: 4
punishment for action: 7

punishment for up

punishment for right

punishment for left

punishment for right
punishment for down

punishment for right

punishment for left
punishment for up

punishment for left
punishment for right

punishment for right

punishment for down
punishment for up

punishment for up

punishment for down

punishment for up

punishment for up
punishment for down

punishment for down
punishment for right
reward for down

punishment for action: 8
reward for action: 3

punishment for right

punishment for left
punishment for left
punishment for left
punishment for up

punishment for down
punishment for up

punishment for right
punishment for right
punishment for down
punishment for up

punishment for right
punishment for up

punishment for right
punishment for up

punishment for d

punishment for down

punishment for up

punishment for up
punishment for up

punishment for down
punishment for down

punishment for down
punishment for up

punishment for right

punishment for down

punishment for left
punishment for up

punishment for up

punishment for up
punishment for right

punishment for left

punishment for down
punishment for right

reward for right

punishment for action: 4

punishment for action: 3
punishment for action: 1

punishment for action: 1

punishment for action: 8

punishment for action: 5

punishment for action: 9

punishment for action: 5

punishment for action: 6
punishment for action: 5

punishment for action: 6

punishment for action: 5

punishment for action: 4

punishment for action: 5

punishment for left

punishment for left
punishment for down

punishment for right

punishment for left
punishment for up

punishment for right

punishment for left
punishment for right

punishment for down
punishment for right

punishment for left
punishment


reward for up

punishment for left
punishment for left

punishment for left
punishment for up
punishment for left

punishment for up
punishment for left

punishment for right
punishment for left
punishment for right

punishment for left
reward for down

punishment for down

reward for up

punishment for action: 5
reward for action: 2

punishment for right

punishment for up
punishment for left

punishment for right
punishment for left
punishment for left
punishment for left
punishment for down

punishment for up
punishment for down

punishment for up

punishment for right
punishment for down
punishment for right

punishment for up

punishment for left
punishment for left
punishment for down

punishment for down
punishment for up
punishment for up

punishment for right
punishment for left

punishment for right
punishment for right
punishment for down
punishment for up
punishment for down
punishment for left
punishment for right

punishment for left

punishment for left

punishment for 

punishment for up
punishment for right

punishment for right
punishment for right

punishment for left
punishment for right
punishment for left

reward for up

punishment for down

reward for up
punishment for action: 5

punishment for action: 9

punishment for action: 5

reward for right
punishment for action: 3

reward for left

punishment for action: 6

punishment for left

punishment for up
reward for right

reward for down
reward for up

punishment for left

punishment for down
punishment for down
punishment for up

punishment for up
reward for right

punishment for action: 3

punishment for right
reward for left

punishment for up
reward for right

punishment for down
reward for left

reward for down

punishment for down
punishment for right

reward for up

punishment for up

reward for down

punishment for action: 5

punishment for action: 6
punishment for action: 2

reward for left

punishment for action: 3

punishment for action: 8

punishment for left
punishment for up
punish

punishment for up

punishment for up

punishment for up

punishment for up
punishment for right

punishment for up
punishment for up

punishment for right
punishment for down

reward for down
punishment for right

punishment for right
punishment for right

punishment for right

punishment for down

punishment for down

punishment for up
punishment for right
punishment for down

punishment for up
punishment for left

punishment for up

punishment for down
punishment for left

punishment for up
punishment for up

punishment for left
punishment for up
punishment for down
punishment for right

punishment for left

punishment for up
punishment for right

punishment for up

punishment for down
punishment for down

punishment for down

punishment for left
punishment for right
punishment for up

punishment for right
punishment for down
punishment for right

reward for up

punishment for action: 8

reward for right
punishment for action: 4

reward for action: 9

reward for left

punishment for 

punishment for down

punishment for down
punishment for down

punishment for left
punishment for down
punishment for left
punishment for down

punishment for right
punishment for left
punishment for down
punishment for up

punishment for up

punishment for left
reward for up

punishment for action: 7

punishment for action: 5

punishment for action: 3

punishment for left
reward for right

punishment for action: 5

punishment for action: 9

punishment for action: 2

punishment for up
punishment for left
punishment for left

punishment for up
punishment for up
punishment for down
punishment for up
punishment for right

punishment for left
punishment for down
punishment for up
punishment for right
punishment for down
punishment for down

punishment for left

punishment for up
punishment for down
punishment for right
punishment for up

punishment for up
punishment for down

punishment for right
punishment for up
punishment for left
punishment for right
punishment for left
punishment for d

punishment for up
punishment for left

punishment for up
punishment for down
punishment for left
punishment for up
punishment for right
punishment for up

punishment for left
punishment for right

punishment for up

reward for right

punishment for action: 9

punishment for action: 4

punishment for action: 9
punishment for down

reward for up

reward for action: 7
punishment for down

punishment for up
punishment for left
punishment for right
punishment for down

punishment for down

punishment for down
punishment for down
punishment for left
punishment for down

punishment for right

punishment for down

punishment for left
punishment for up
punishment for down

punishment for right
punishment for down
punishment for left
reward for down

punishment for action: 1
reward for right

punishment for up

punishment for left
punishment for left

punishment for right
punishment for left

punishment for left

punishment for right

punishment for up
punishment for right

punishment for up

pu

punishment for left

punishment for up

punishment for right

punishment for right
punishment for down

punishment for up
punishment for left

punishment for up

punishment for up

punishment for up
punishment for left

punishment for up
punishment for right

punishment for right

punishment for down
punishment for left

punishment for down

punishment for down
punishment for left
punishment for left
punishment for up

punishment for left
punishment for up
reward for left

punishment for left

punishment for down
punishment for right

punishment for down

punishment for right

punishment for right
punishment for down
punishment for down
punishment for up

punishment for right
punishment for up
punishment for right
punishment for up
punishment for right

punishment for down
punishment for right
punishment for up
punishment for up
punishment for left

punishment for right
punishment for up

punishment for up
punishment for left

punishment for right
punishment for left

punishment for le

punishment for down

punishment for left

punishment for down
punishment for down
punishment for down
punishment for up

punishment for up
punishment for down

punishment for down
punishment for up

punishment for down
punishment for down

punishment for right
punishment for down

punishment for left

punishment for down
punishment for down
punishment for up

punishment for up
punishment for down

punishment for down
punishment for right

punishment for up
punishment for left

punishment for up
punishment for down
punishment for up

punishment for up
punishment for down
punishment for up
punishment for up
punishment for down
punishment for right

punishment for up

punishment for left

punishment for down
punishment for down
punishment for down
punishment for down

punishment for right
punishment for left

punishment for right
punishment for right
punishment for left
punishment for left
punishment for right

punishment for left
punishment for right

punishment for up
punishment for dow

punishment for up
punishment for left

punishment for up
punishment for right
punishment for down
punishment for left

punishment for left
punishment for up
punishment for right

punishment for right
punishment for left

punishment for left

punishment for left
punishment for left
punishment for right
punishment for down
punishment for down
punishment for up

punishment for right
punishment for left
punishment for down
punishment for up

punishment for right
punishment for left

punishment for down
punishment for right
punishment for right

punishment for down

punishment for right
punishment for down

punishment for down

punishment for left
punishment for down
punishment for left
punishment for up

punishment for down
punishment for left

punishment for up

punishment for down
punishment for up
punishment for right

punishment for right
punishment for up
punishment for up
punishment for left

punishment for down
punishment for right

punishment for right
punishment for up

punishment

reward for up

punishment for up

punishment for up
punishment for down

punishment for right
punishment for left

punishment for right
punishment for down

punishment for up
punishment for left

punishment for up

punishment for right
punishment for up

punishment for right

punishment for left

punishment for down
punishment for left
punishment for down

reward for down

punishment for action: 5

punishment for right
punishment for right

punishment for up
punishment for up

punishment for left

punishment for right

punishment for left
punishment for left

punishment for left

punishment for up
punishment for down
punishment for left

punishment for down
punishment for down
punishment for up

punishment for right
punishment for right
punishment for left
punishment for down

punishment for left
punishment for down
punishment for down

punishment for down
punishment for up

punishment for right
punishment for right

punishment for down

punishment for up

punishment for up
reward for 

punishment for right
punishment for down
punishment for right
punishment for left

punishment for up
punishment for left

punishment for up
punishment for right

punishment for left
punishment for left

punishment for down

punishment for up

punishment for up
punishment for up

punishment for up
punishment for right
punishment for left

punishment for left
punishment for left

punishment for up
punishment for down

punishment for left
punishment for right
punishment for left

punishment for left

punishment for right

punishment for up
punishment for up
punishment for left

punishment for right

punishment for up
punishment for down
punishment for down

punishment for down
punishment for right

punishment for down

punishment for up

punishment for down
punishment for up
punishment for down

punishment for up
punishment for down

punishment for right

punishment for up

punishment for right
punishment for right
punishment for down
punishment for up
punishment for right
punishment for 

punishment for left

punishment for down

punishment for down
punishment for down
punishment for left

punishment for up

punishment for left
punishment for right

punishment for up

punishment for left
punishment for down

punishment for right
punishment for up

punishment for down
punishment for up

punishment for down

punishment for down
punishment for left
punishment for left

punishment for right

punishment for down

punishment for down
punishment for left

punishment for up

punishment for down
punishment for down

punishment for up

punishment for right
punishment for down

punishment for up

punishment for down

punishment for left
punishment for left
punishment for left
punishment for left

punishment for up
punishment for left
punishment for right

punishment for down

punishment for right
punishment for up
punishment for down

punishment for left
punishment for left
punishment for up
punishment for up

punishment for down
punishment for right

punishment for left
punishmen

reward for down

punishment for action: 9

punishment for action: 3

punishment for action: 7

punishment for action: 8

punishment for action: 4

reward for action: 1

solved now: 

[[4 6 9 2 3 8 5 1 7]
 [7 3 2 1 9 5 6 8 4]
 [5 8 1 7 4 6 3 2 9]
 [6 2 4 9 5 3 1 7 8]
 [8 9 5 6 1 7 4 3 2]
 [1 7 3 4 8 2 9 5 6]
 [3 4 6 8 7 1 2 9 5]
 [2 1 7 5 6 9 8 4 3]
 [9 5 8 3 2 4 7 6 1]]
game being played now: 79078
punishment for down

reward for right

punishment for action: 9

reward for down

punishment for action: 5
punishment for action: 2

punishment for action: 3

reward for action: 7

punishment for down
reward for right

punishment for action: 9

punishment for action: 6

punishment for action: 2

punishment for action: 1

reward for action: 5
punishment for up

reward for right
punishment for action: 7

punishment for down

reward for up

punishment for action: 4
punishment for action: 1

punishment for action: 9

punishment for action: 6

punishment for action: 8

punishment for action: 9

p


reward for up

reward for action: 6
punishment for left

reward for down
done, took 423.755 seconds


<keras.callbacks.History at 0x7f1867c15490>

In [9]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [10]:
import numpy as np
q_table = np.zeros([1000, env.action_space.n])

https://blog.paperspace.com/getting-started-with-openai-gym/

https://github.com/openai/gym/blob/master/gym/spaces/box.py

https://lilianweng.github.io/lil-log/2018/05/05/implementing-deep-reinforcement-learning-models.html