In [21]:
#### Imports
import math
import random
import numpy as np
from tkinter import *

## Game Environment Developmet

In [22]:
random.seed()
class Game:
    # This is the main game class that is used by both the GUI module and the AI module
    def __init__(self, N=4, DownSideRatio=3, SleepTime=5, R=15, r=5, Height=400, Halfwidth=200,
                 GlobalHeight=600, GlobalWidth=800, Thickness=15, RandomTreshold=0.2, RandomStep=1,
                 RandomVertTreshold=0.2, RandomVertStep=1, MaxScore=None):
        
        #Initialized Variables 
        
        self.N = N     # number of falling objects
        self.DownSideRatio = DownSideRatio     # ratio fall speed/left-right speed (integer)
        self.SleepTime = SleepTime     # delay time between steps, game is progressing slower for higher values
        self.R = R     # radius of the blue half circle
        self.r = r     # radius of the falling objects
        self.treshold = (R +r/2)**2      # treshold to indicate the contact for game over
        self.Height = Height     # height of the white structure
        self.Halfwidth = Halfwidth     # half width of the white structure
        self.GlobalHeight = GlobalHeight     # height of the game window
        self.GlobalWidth = GlobalWidth     # width of the game window
        self.Thickness = Thickness     # thickness of the white walls
        

        self.RandomTreshold = RandomTreshold     # probability of left/right noise for falling objects
        self.RandomStep = RandomStep     # intensity of left/right noise for falling objects
        self.RandomVertTreshold = RandomVertTreshold     # probability of up/down noise for falling objects
        self.RandomVertStep = RandomVertStep     # intensity of up/down noise for falling objects
        

        self.MaxScore = MaxScore     # Maximum Score before terminating the game (None for infinity)
        self.retry=False
        self.Direction = random.choice(['L','R'])     # setting the initial direction
        self.steps, self.counter = 0, 0     # total pixel moves (time) and total score
        self.asteroids = []     # relative coordinates of the falling objects
       
        for i in range(N):         # initialize the falling objects' coordinates
            t = random.random()
            if t < 0.5:
                x = (-1)*(Halfwidth + R)/2 - t*(Halfwidth-R)
            else:
                x = (Halfwidth + R)/2 + (t-0.5)*(Halfwidth-R)
            self.asteroids.append([x,2*Height/3+(i+1)*Height/(3*N)])
   

    def ChangeDirection(self, direction):
        if direction == 'L':
            self.Direction = 'L'
        if direction == 'R':
            self.Direction = 'R'

            
    def GameOver(self):
        # Testing for game over
        
        for aster in self.asteroids:
            if aster[0]**2 + aster[1]**2 < self.treshold:
                return True
        if not self.MaxScore == None and self.counter >= self.MaxScore:
            return True
        return False

    
    def Destroy(self):
        # updating falling objects when one gets destroyed (and testing for that scenario)
        Kill = False
        for i in range(self.N):
            if self.asteroids[i][0] <= (-1)*self.Halfwidth or self.asteroids[i][0] >= self.Halfwidth or self.asteroids[i][1] <= 0:
                Kill = True
                self.asteroids.pop(i)
                self.asteroids.append([(2*random.random()-1)*self.Halfwidth, self.Height])
                self.counter += 1
        return Kill


    def UpdateStep(self):
        # Updating locations of falling objects at pixel moves.
        # Returns triple to determine:
            # 1. whether one needs to force refresh the screen due to destroyed objects or because noise was added;
            # 2. whether a falling object was destroyed and one point obtained;
            # 3. whether the game is over;
        self.steps += 1
        for i in range(self.N):          
            self.asteroids[i][1] -= self.DownSideRatio
            if self.Direction == 'L':
                self.asteroids[i][0] += 1
            if self.Direction == 'R':
                self.asteroids[i][0] -= 1

        Update = False
        for i in range(self.N):          #  Adding the noise to falling objects
            t1, t2 = random.random(), random.random()
            if t1 < self.RandomTreshold/2:
                Update = True
                self.asteroids[i][0] += self.RandomStep
            elif t1 < self.RandomTreshold:
                Update = True
                self.asteroids[i][0] -= self.RandomStep
            if t2 < self.RandomVertTreshold/2:
                Update = True
                self.asteroids[i][1] += self.RandomVertStep
            elif t2 < self.RandomVertTreshold:
                Update = True
                self.asteroids[i][1] -= self.RandomVertStep

        Kill = self.Destroy()
        if Kill:
            Update = True
        Over = self.GameOver()
        return (Update, Kill, Over)

## Selection Screen

In [23]:
class Choose:
    # Window to chose Human/AI player
    def __init__(self):
        self.master = Tk()
        self.master.title('Obstacle Avoiding AI')
        self.frame = Frame(self.master)
        self.frame.pack()

        howmany_options, howfast_options = [6,5,4], ['fast', 'medium', 'slow']
        self.howmany, self.howfast = IntVar(), StringVar()
        self.howmany.set(howmany_options[0])
        self.howfast.set(howfast_options[0])
        self.menu1=OptionMenu(self.master,self.howmany,*howmany_options)
        self.menu2=OptionMenu(self.master,self.howfast,*howfast_options)
        Label(font=("Purisa", 13),text="Number of objects").pack()
        self.menu1.pack()
        Label(font=("Purisa", 13),text="Game speed").pack()
        self.menu2.pack()

        Label(font=("Purisa", 13),text="Player").pack()
        self.button1 = Button(self.master, font=("Purisa", 12), text = "Human", command=self.human)
        self.button2 = Button(self.master, font=("Purisa", 12), text = "AI pre train", command=self.dumb_ai)
        self.button3 = Button(self.master, font=("Purisa", 12), text = "AI post train", command=self.ai)
        self.button1.pack(side=LEFT)
        self.button2.pack(side=LEFT)
        self.button3.pack(side=LEFT)

        self.who = None

        
    def human(self):
        self.who = 'human'
        self.master.destroy()


    def dumb_ai(self):
        self.who = 'dumb_ai'
        self.master.destroy()


    def ai(self):
        self.who = 'ai'
        self.master.destroy()

## Super Class Play for both human and AI

In [24]:
class Play:
    # General class implementing common features for both Human and AI player
    def __init__(self, GameParameters):        
        self.game = Game(**GameParameters)
        self.x = self.game.GlobalWidth/2
        self.y = self.game.GlobalHeight - self.game.Thickness

        self.master = Tk()
        self.master.title('Obstacle Avoiding AI')
        self.canvas=Canvas(self.master, bg="black", width=self.game.GlobalWidth, height=self.game.GlobalHeight)
        self.canvas.pack()

        # Next draw permanent and variable (falling) game objects. Draw three copies of everything for smooth screen wrapping. 
        
        for i in range(15):  
              
            cx, cy = self.x+(i-1)*self.game.GlobalWidth, self.y
            self.canvas.create_oval(cx-self.game.R, cy+self.game.R, cx+self.game.R, cy-self.game.R, fill="blue",
                                    width = 0, tag = 'S')
            self.canvas.create_rectangle(cx-self.game.Halfwidth-self.game.Thickness-self.game.r, cy,
                                         cx+self.game.Halfwidth+self.game.Thickness+self.game.r,
                                         cy+self.game.Thickness, fill="white", width = 0, tag = 'S')
            self.canvas.create_rectangle(cx-self.game.Halfwidth-self.game.Thickness-self.game.r, cy-self.game.Height,
                                         cx-self.game.Halfwidth-self.game.r, cy+self.game.Thickness,
                                         fill="white", width = 0, tag = 'S')
            self.canvas.create_rectangle(cx+self.game.Halfwidth+self.game.r, cy-self.game.Height,
                                         cx+self.game.Halfwidth+self.game.Thickness+self.game.r, cy+self.game.Thickness,
                                         fill="white", width = 0, tag = 'S')
            
            

        for aster in self.game.asteroids:
            for i in range(3):
                cx, cy = self.x+(i-1)*self.game.GlobalWidth + aster[0], self.y - aster[1]
                self.canvas.create_oval(cx-self.game.r, cy+self.game.r, cx+self.game.r, cy-self.game.r,
                                        fill="white", width = 0, tag = 'A')            

        self.canvas.focus_set()

        
    def RunStep(self):
        # Moves objects on the screen
        if self.game.Direction == 'L':
            self.canvas.move('S', -1, 0)
            self.x -= 1
            if self.x < (-1)*self.game.GlobalWidth/2:
                self.x += self.game.GlobalWidth
                self.canvas.move('S', self.game.GlobalWidth, 0)     

        if self.game.Direction == 'R':
            self.canvas.move('S', 1, 0)
            self.x += 1
            if self.x > 3*self.game.GlobalWidth/2:
                self.x -= self.game.GlobalWidth
                self.canvas.move('S', (-1)*self.game.GlobalWidth, 0)



        self.canvas.move('A', 0, self.game.DownSideRatio)
        (Update, Kill, Over) = self.game.UpdateStep()
        if Update:
            self.canvas.itemconfig(self.text_id, text="             Score: "+str(self.game.counter))
            self.canvas.delete('A')
            
            for aster in self.game.asteroids:
                for i in range(3):
                    cx, cy = self.x+(i-1)*self.game.GlobalWidth + aster[0], self.y - aster[1]
                    self.canvas.create_oval(cx-self.game.r, cy+self.game.r, cx+self.game.r, cy-self.game.r,
                                            fill="red", width = 0, tag = 'A')
        self.master.update()
        self.master.after(self.game.SleepTime)
        return Over

## Human Class

In [25]:
class PlayHuman(Play):

    # Play subclass for a human player
    def __init__(self, GameParameters):
        Play.__init__(self, GameParameters)
            
        self.text_id = self.canvas.create_text(0,0,anchor="nw",fill="white",font=("Purisa", 14),
                                               text="         Space to start, Left/Right to change direction",tag='Text')
        self.canvas.bind("<Left>", self.left)
        self.canvas.bind("<Right>", self.right)
        self.canvas.bind("<space>", self.Run)            
            
                   
    def left(self, event):
        self.game.ChangeDirection('L')

        
    def right(self, event):
        self.game.ChangeDirection('R')

        
    def Run(self, event):
        Over = False
        while not Over:
            Over = Play.RunStep(self)
            
            if(Over):
                self.master.after(2000,self.master.config(bg='white'))
                self.master.after(100, self.master.destroy())
                app=PlayHuman(GameParameters)
                app.master.mainloop()
                print( "Human Score:", app.game.counter)
                

## Pre and Post Trained AI

In [31]:
class PlayAI(Play):
    # Play subclass for an AI player
    def __init__(self, Theta1, Theta2, GameParameters):
        Play.__init__(self, GameParameters)
        self.bot = Bot(Theta1, Theta2, self.game)

        self.text_id = self.canvas.create_text(0,0,anchor="nw",fill="white",font=("Purisa", 14),text="                     Space to start",tag='Text')
        self.canvas.bind("<space>", self.Run)            


    def Run(self, event):
        Over = False
        while not Over:
            self.bot.TestStep()                    
            Over = Play.RunStep(self)

#             #Resume Game Playing
#             if(Over):
#                 self.master.after(2000,self.master.config(bg='white'))
#                 self.master.after(100, self.master.destroy())
#                 if(who=='ai'):
#                     arrays = np.load(FileToOpen)
#                     GameParameters = arrays['GameParameters'][()]
#                     GameParameters['SleepTime'] = howfast
#                     Theta1 = arrays['Theta1']
#                     Theta2 = arrays['Theta2']
#                     app = PlayAI(Theta1, Theta2, GameParameters)
#                     app.master.mainloop()
#                     print( "AI Score:", app.game.counter)

#                 if(who=='dumb_ai'):
#                     arrays = np.load(FileToOpen)
#                     GameParameters = arrays['GameParameters'][()]
#                     GameParameters['SleepTime'] = howfast
#                     Theta1 = arrays['Theta1']
#                     Theta2 = arrays['Theta2']

#                     Theta1 = np.random.uniform(-1.0, 1.0, Theta1.shape)
#                     Theta2 = np.random.uniform(-1.0, 1.0, Theta2.shape)

#                     app = PlayAI(Theta1, Theta2, GameParameters)
#                     app.master.mainloop()

#                     print( "Dumb AI Score:", app.game.counter)
                    

In [27]:
###################################################################################################################

# Training AI

In [28]:
class Bot:
    # A bot that just plays the game using the values Theta1 and Theta2 for neural network parameters
    def __init__(self, Theta1, Theta2, game):
        self.Theta1 = Theta1
        self.Theta2 = Theta2
        
        self.theta1_values=[]
        self.theta2_values=[]
        
        self.game = game


    def Sigmoid(self, x):
        return (1 + math.exp((-1)*x))**(-1)

    
    def PreProcess(self, action):
        # Use the relative coordinates of the falling objects to generate the input numpy vector
        # the neural network (exploit game symmetry to use only one net)
        state_new = []
        for aster in self.game.asteroids:          # Scaling input values
            state_new.append(aster[0]/(self.game.Halfwidth+0.0))
            state_new.append(aster[1]/(self.game.Height+0.0))
        state_new.append(1)     # Add the bias term
        
        if action == 'L':
            for i in range(self.game.N):
                state_new[2*i] *= -1
        layer1 = np.empty([2*self.game.N+1,1])
        for i in range(2*self.game.N+1):
            layer1[i, 0] = state_new[i]
        return layer1


    def ForwardPropagate(self, action):
        # Evalue the neural network for the current game state with the given L/R action;
        # Returns triple of values/vectors (one for each layer)
        layer1 = self.PreProcess(action)
        layer2_temp = np.dot(np.transpose(self.Theta1), layer1)
        
        for i in range(layer2_temp.shape[0]):
            layer2_temp[i,0] = self.Sigmoid(layer2_temp[i,0])
        layer2 = np.append(layer2_temp, [[1]], axis=0)
        layer3 = np.dot(np.transpose(self.Theta2), layer2)
        result = self.Sigmoid(layer3[0,0])
        return (layer1, layer2, result)

    
    def TestStep(self):
        # Determines the optimal direction in the next move by using the given Theta1, Theta2 parameters
        outputL = self.ForwardPropagate('L')
        outputR = self.ForwardPropagate('R')
        if outputL[-1] < outputR[-1]:
            self.game.ChangeDirection('L')
        else:
            self.game.ChangeDirection('R')
        result = self.game.GameOver()
        return result


In [29]:

class BotTrain(Bot):
    # A bot that performs reinforcement learning to opitmize the
    # Theta1, Theta2 parameters in the neural network
    
    
    def __init__(self, GameParameters, HiddenSize=12, gamma=0.9995, GameOverCost=1,
                 NSim=500, NTest=100, TestTreshold=200, NumberOfSessions=None, Inertia=0.8,
                 p=0.0, a=1.0, epsilon=0.2, epsilon_decay_rate=1, discount = 0.999, p_decay_rate=0.5):
        
        
        Theta1 = np.random.uniform(-1.0, 1.0, (2*GameParameters["N"]+1, HiddenSize))
        Theta2 = np.random.uniform(-1.0, 1.0, (HiddenSize+1, 1))        
        game = Game(**GameParameters)
        
        
        Bot.__init__(self, Theta1, Theta2, game)

        self.GameParameters = GameParameters        
        self.HiddenSize = HiddenSize     # Size of the neural network hidden layer 
        self.gamma = gamma     # gamma parameter in the game cost function E[gamma^N]
        self.GameOverCost = GameOverCost     # Game Over Cost (set to 1.0 for standard game cost function E[gamma^N])
        self.NSim = NSim     # Number of consecutive learning games
        self.NTest = NTest     # Number of consecutive test games
        self.TestTreshold = TestTreshold    # Stop when median score over TestTreshold (None for fixed number of sessions)
        self.NumberOfSessions = NumberOfSessions     # Number of learn train/test session (active only if TestTreshold = None)
        self.Inertia = Inertia     # (1 - Inertia) is the probability of resampling the game direction while learning
        self.p = p     # Probability of chosing learned move in reinforcement learning        
        self.a = a     # Reinforcement learning rate (set to 1.0 since it can be absorbed into gradient descent step factor)
        self.epsilon = epsilon     # Initial gradient descent step factor
        self.epsilon_decay_rate = epsilon_decay_rate     # Exponent in power decay for the gradient descent step factor
        self.discount = discount    # Discount exponent in reinforcement learning
        self.p_decay_rate = p_decay_rate    # Exponent in power decay for the policy greedines parameter

        self.counter = []    # Container for average and median test scores
        self.best_score = 0    # Best score among all training sessions

        
    def BackPropagate(self, output, expected, layer1, layer2):
        # Backpropagation algorithm for neural network;
        # computes the partial derivatives with respect to parameters and performs the stochastic gradient descent
        delta3 = output - expected
        delta2 = delta3*self.Theta2
        for i in range(self.HiddenSize):
            delta2[i,0] *= layer2[i,0]*(1-layer2[i,0])
        for i in range(2*self.game.N+1):
            for j in range(self.HiddenSize):
                self.Theta1[i,j] -= self.epsilon*layer1[i,0]*delta2[j,0]
        for i in range(self.HiddenSize+1):
            self.Theta2[i,0] -= self.epsilon*delta3*layer2[i,0]
                   
            
    def ReinforcedLearningStep(self):
        # Performs one step of reinforcement learning
        t = random.random()
        if t < 1-self.p:
            tt = random.random()
            if tt < self.Inertia:
                output = self.ForwardPropagate(self.game.Direction)
            else:
                new_direction = random.choice(['L','R'])
                output = self.ForwardPropagate(new_direction)
                self.game.ChangeDirection(new_direction)
        else:
            outputL = self.ForwardPropagate('L')
            outputR = self.ForwardPropagate('R')
            if outputL[-1] < outputR[-1]:
                output = outputL
                self.game.ChangeDirection('L')
            else:
                output = outputR
                self.game.ChangeDirection('R')
                
#         if random.random()<0.00002:
            # Occasionally prints out the current value of the network
            # useful for adjusting various learning parameters, especially gamma
#             print( output[-1])
            
        result = self.game.UpdateStep()
        if result[-1]:
            estimate = self.GameOverCost
        else:
            estimateL = self.ForwardPropagate('L')
            estimateR = self.ForwardPropagate('R')
            estimate = min(estimateL[-1], estimateR[-1])**self.discount
            if result[1]:
                estimate *= self.gamma
        expected = (1-self.a)*output[-1] + self.a*estimate
        self.BackPropagate(output[-1], expected, output[0], output[1])
        return result

    
    def Training(self):
        # Run NSim consecutive training games
        train_scores = []
        for i in range(self.NSim):
            stop = False
            while not stop:
                (update, kill, stop) = self.ReinforcedLearningStep()
                
            train_scores.append(self.game.counter)
            self.game = Game(**self.GameParameters)
        return train_scores

            
    def Testing(self):
        # Run NTest consecutive test games to evaluate learned performance;
        # prints out all the test values and records average and median values
        s = 0
        alist = []
        for i in range(self.NTest):
            stop = False
            while not stop:
                stop = self.TestStep()
                self.game.UpdateStep()
            alist.append(self.game.counter)
            self.game = Game(**self.GameParameters)
        m1 = sum(alist)/(len(alist)+0.0)
        m2 = np.median(alist)
        self.counter.append((m1,m2))
        if m1 > self.best_score:
            self.best_score = m1
            np.savez("Data/parameters_best", GameParameters = self.GameParameters, Theta1 = self.Theta1, Theta2 = self.Theta2)


    def TrainSession(self):
        # Performs a learning session until median scores achieves TestTreshold or for fixed number of learn/test sessions
        self.Testing()
        keep_going = True
        i = 0
        
        trainscore_values=[]
        
        while keep_going:
            i += 1
            print('')
            print( "No of Asteroids:", self.game.N)
            print( "RunSession:", i)
            
            
            train_scores = self.Training()
            print( "Train average and median score:", sum(train_scores)/(len(train_scores)+0.0), np.median(train_scores))
            trainscore_values.append(sum(train_scores)/(len(train_scores)+0.0))
            
            print("Theta1:",self.Theta1)
            print(self.Theta1.shape)
            print("Theta2:",self.Theta2)
            print(self.Theta2.shape)
            
            self.Testing()
            print( "Test Results:", self.counter)            
            new, old = self.counter[-1][-1], self.counter[-2][-1]
            
            self.epsilon *= (old/new)**self.epsilon_decay_rate
            print( "Gradient Learning Rate:", self.epsilon)
            
            
            self.p = 1 - (1-self.p)*((old/new)**self.p_decay_rate)
            if self.p < 0:
                self.p = 0.0
            print("Probalistic Values:", self.p)
            print('')
            
            
            if self.TestTreshold == None and not self.NumberOfSessions == None:
                if i >= self.NumberOfSessions:
                    keep_going = False
                    
            elif not self.TestTreshold == None:                 
                if self.counter[-1][-1] >= self.TestTreshold:
                    keep_going = False
  


## Main Application

In [33]:
choose = Choose()
choose.master.mainloop()
who = choose.who
howmany = choose.howmany.get()
FileDict = {4:'Data/parameters4.npz', 5:'Data/parameters5.npz', 6:'Data/parameters6.npz'}
FileToOpen = FileDict[howmany]
SpeedDict = {'fast':5, 'medium':10, 'slow':15}
howfast = SpeedDict[choose.howfast.get()]

if who == 'human':
    GameParameters = {'N':howmany, 'DownSideRatio':3, 'SleepTime':howfast, 'R':25, 'r':5, 'Height':400, 'Halfwidth':200,
                      'GlobalHeight':600, 'GlobalWidth':800, 'Thickness':20, 'RandomTreshold':0.2, 'RandomStep':1,
                      'RandomVertTreshold':0.2, 'RandomVertStep':1, 'MaxScore':None}


    app=PlayHuman(GameParameters)
    app.master.mainloop()
    print( "Human Score:", app.game.counter)

    
if who == 'ai':
    arrays = np.load(FileToOpen)
    GameParameters = arrays['GameParameters'][()]
    GameParameters['SleepTime'] = howfast
    Theta1 = arrays['Theta1']
    Theta2 = arrays['Theta2']

    app = PlayAI(Theta1, Theta2, GameParameters)
    app.master.mainloop()

    print( "AI Score:", app.game.counter)

    
if who == 'dumb_ai':
    arrays = np.load(FileToOpen)
    GameParameters = arrays['GameParameters'][()]
    GameParameters['SleepTime'] = howfast
    Theta1 = arrays['Theta1']
    Theta2 = arrays['Theta2']

    Theta1 = np.random.uniform(-1.0, 1.0, Theta1.shape)
    Theta2 = np.random.uniform(-1.0, 1.0, Theta2.shape)

    app = PlayAI(Theta1, Theta2, GameParameters)
    app.master.mainloop()

    print( "Dumb AI Score:", app.game.counter)


AI Score: 566


Exception in Tkinter callback
Traceback (most recent call last):
  File "/home/kamalshrestha/anaconda3/lib/python3.7/tkinter/__init__.py", line 1705, in __call__
    return self.func(*args)
  File "<ipython-input-31-ac1a95f63d41>", line 15, in Run
    Over = Play.RunStep(self)
  File "<ipython-input-24-5c6d58944aae>", line 44, in RunStep
    self.canvas.move('S', -1, 0)
  File "/home/kamalshrestha/anaconda3/lib/python3.7/tkinter/__init__.py", line 2591, in move
    self.tk.call((self._w, 'move') + args)
_tkinter.TclError: invalid command name ".!canvas"


## Training Sessions

In [20]:
GameParameters = {'N': 5, 'DownSideRatio': 3, 'SleepTime': 5, 'R': 25, 'r': 5, 'Height': 400, 'Halfwidth': 200,
                  'GlobalHeight': 600, 'GlobalWidth': 800, 'Thickness': 20, 'RandomTreshold': 0.2, 'RandomStep': 1,
                  'RandomVertTreshold': 0.2, 'RandomVertStep': 1, 'MaxScore': None}
LearnParameters = {'HiddenSize': 12, 'gamma': 0.8, 'GameOverCost': 1, 'NSim': 400, 'NTest': 400, 'TestTreshold': None,
                   'NumberOfSessions': 50, 'Inertia': 0.8, 'p': 0.0, 'a': 1.0, 'epsilon': 0.05,
                   'epsilon_decay_rate': 0.0, 'discount': 0.999, 'p_decay_rate': 0.5}

bot = BotTrain(GameParameters = GameParameters, **LearnParameters)


bot.TrainSession()

print( bot.Theta1)
print( bot.Theta2)
np.savez("Data/parameters", GameParameters = GameParameters, Theta1 = bot.Theta1, Theta2 = bot.Theta2)




No of Asteroids: 5
RunSession: 1
Train average and median score: 11.18 9.0
Theta1: [[-0.91983019  0.06247182 -0.69830679 -0.63622636  0.4522967  -0.440984
   0.15508653 -0.24986123  0.01714275  0.71011332 -0.14441746  0.14916769]
 [-0.65115349  1.06361803 -0.83347522 -0.8556173   0.48671242  0.07907865
   1.34438867  0.25130786 -0.88626264  0.42265492 -0.27250408  0.75229928]
 [-0.87347575 -0.09194692  0.71941694  0.68511854  0.44019504 -0.4337904
   0.6270246   0.62334526  0.89434345  0.70940217  0.21458486  0.5303813 ]
 [ 0.13332272 -0.30786717 -0.22422625  0.05723031  0.59390754  0.68643836
   0.37957259  0.13626889 -0.13840825 -0.84863474  0.4236853  -0.120301  ]
 [ 0.76861762 -0.02079779 -0.49484651  0.44057054 -0.78526467  0.38035008
  -0.50232994  0.55212959 -0.35576401  0.45015711  0.8409309  -0.0076629 ]
 [ 0.22845874  0.28726815  0.15998693 -0.11084849 -0.21046061 -0.58228931
   0.84934453 -0.66810249 -0.00503687 -0.79353949 -0.162043    0.00220546]
 [ 0.28409968 -0.46292439

Train average and median score: 11.2075 9.0
Theta1: [[-0.94100771  0.034222   -1.20180161 -0.6076794   0.29214656 -0.20737101
  -0.02202598 -0.61017851  0.30145821  0.42162531 -0.03083917 -0.01303133]
 [-0.6578938   0.9852459  -0.7838003  -0.88533371  0.45894039  0.0809484
   1.34206949  0.28244081 -0.91945758  0.45606924 -0.27272315  0.68897482]
 [-0.94566039 -0.10976128  0.87014319  0.73130644  0.2607967  -0.30980902
   0.73026853  0.68771925  1.08866328  0.41136913  0.11860236  0.28261507]
 [ 0.10381679 -0.27145122 -0.29325751 -0.0944461   0.48948571  0.88785585
   0.73853156  0.21598485 -0.32994618 -0.83673116  0.37261405  0.19731656]
 [ 0.76509081 -0.02690137 -0.46926211  0.48200936 -0.6571353   0.28928913
  -0.4283061   0.56922006 -0.40344667  0.37719153  0.69344387  0.00608405]
 [ 0.19438357  0.29305765  0.05852966 -0.22401631 -0.27890576 -0.47230619
   1.03056968 -0.65767535 -0.17593393 -0.73216908 -0.21010397  0.29300027]
 [ 0.30993766 -0.31177069 -0.70848407 -0.65159364 -0.01

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.05719095841793653


No of Asteroids: 5
RunSession: 9
Train average and median score: 11.365 9.0
Theta1: [[-9.21535174e-01  4.09420936e-02 -3.43584000e+00 -5.72776157e-01
   2.72250679e-01 -2.35221126e-01 -7.08316230e-02 -2.65304285e+00
   5.80165723e-01  3.51885057e-01  6.89416593e-02 -1.12432354e-01]
 [-6.32846983e-01  8.81550537e-01 -4.35060080e-01 -7.69906073e-01
   5.42701177e-01 -1.10036272e-01  1.13805468e+00  3.17516610e-01
  -8.47012899e-01  5.16507267e-01 -2.23235608e-01  4.51964625e-01]
 [-1.00800834e+00 -1.13802846e-01  3.28093269e-01  8.25375389e-01
   1.50178224e-01 -4.39572730e-01  1.26546812e+00  3.19787984e-01
   1.38421646e+00  2.94610127e-01  4.68563556e-02  2.32409765e-01]
 [ 1.09999432e-01 -3.11644049e-01 -9.59171863e-02 -4.54115543e-02
   5.46638199e-01  8.30347218

Train average and median score: 15.26 12.0
Theta1: [[-8.66087071e-01  1.22352178e-01 -6.21630078e+00 -4.81852920e-01
   2.89779794e-01 -1.26184019e-01  2.02218029e-01 -5.98070250e+00
   1.93569767e-01  3.27172404e-01  2.58506967e-02  4.19345775e-02]
 [-6.32915180e-01  8.33931621e-01 -4.06317494e-01 -7.46102837e-01
   5.33986871e-01 -1.56915918e-01  1.03131456e+00  2.07631314e-01
  -8.46329254e-01  5.32002977e-01 -2.16155939e-01  3.61342220e-01]
 [-1.01529565e+00 -7.55433761e-02 -1.68048416e-01  1.00117660e+00
   1.28119576e-01 -6.55576790e-01  3.04970072e+00 -1.28708333e-01
   2.88233267e+00  1.86997356e-01 -5.25757708e-02  2.80218843e-01]
 [ 9.79717395e-02 -2.26872130e-01 -2.02808583e-01 -9.37231594e-02
   5.20882977e-01  1.00988211e+00  6.77444896e-01  3.87381822e-01
  -5.91281098e-01 -8.26662533e-01  3.21861612e-01  3.57163028e-01]
 [ 7.54729607e-01 -2.55388673e-03 -1.31801372e-01  4.79364921e-01
  -5.28480652e-01  1.96552589e-01  3.43467453e-02 -8.19558467e-02
  -2.78103544e-02  3.

Train average and median score: 18.1125 14.0
Theta1: [[-8.07706138e-01  2.26439050e-01 -8.34441334e+00 -4.01523391e-01
   3.16385354e-01 -2.88621893e-02  1.56756764e-01 -8.22800978e+00
   4.08704287e-02  3.50217513e-01  2.09725646e-02  2.17838354e-01]
 [-6.41179840e-01  7.63608530e-01 -6.88663126e-01 -7.04652665e-01
   5.42397354e-01 -2.70664104e-01  5.82944781e-01  3.71301742e-01
  -6.46678025e-01  5.68857673e-01 -1.83398141e-01  1.71298764e-01]
 [-1.02197249e+00 -1.20513405e-02 -1.29724257e-01  1.03145869e+00
   1.22281217e-01 -7.52713295e-01  4.78480185e+00 -1.72632674e-01
   4.81136211e+00  1.49813123e-01 -9.45843360e-02  3.33470001e-01]
 [ 8.73820646e-02 -1.90339428e-01 -1.04328723e-01 -1.33459362e-01
   5.03250069e-01  1.11102250e+00  6.55340802e-01  9.92398994e-02
  -7.30834053e-01 -8.37170550e-01  3.15881070e-01  4.33073416e-01]
 [ 7.28579690e-01 -9.34172067e-03 -6.30763024e-02  5.25634450e-01
  -4.86783629e-01  1.39406355e-01  2.05646805e-01 -1.03149346e-01
   1.61523190e-01  

Train average and median score: 18.85 14.0
Theta1: [[-7.65710937e-01  2.53157326e-01 -1.02854204e+01 -3.24100570e-01
   3.57108670e-01 -6.90725003e-02  8.11282733e-02 -1.02648217e+01
   3.69544727e-02  4.19513392e-01  3.78748059e-02  2.28166269e-01]
 [-6.42036895e-01  7.59008739e-01 -1.37027146e+00 -7.10619728e-01
   5.42870638e-01 -2.52271096e-01  2.88388173e-01  9.80418042e-01
  -3.92383817e-01  5.82718582e-01 -1.72178087e-01  8.49391890e-02]
 [-1.02272131e+00  4.09752282e-02 -1.48943663e-01  9.78663370e-01
   1.12627919e-01 -6.85308730e-01  6.06643868e+00 -1.43398389e-01
   6.36246182e+00  1.34134082e-01 -1.11345671e-01  3.71001741e-01]
 [ 8.73043682e-02 -1.47773681e-01 -1.00053258e-01 -1.85675999e-01
   4.76456956e-01  1.25502698e+00  8.69033724e-01  1.64502556e-01
  -8.41494508e-01 -8.64962094e-01  3.03113807e-01  5.33850545e-01]
 [ 7.05930157e-01 -3.03478341e-02 -8.80159532e-02  5.56114859e-01
  -4.32540782e-01  1.22595579e-01  1.33366240e-01 -6.80751711e-02
   6.33683668e-02  1.

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.4102321753804117


No of Asteroids: 5
RunSession: 21
Train average and median score: 26.3825 18.0
Theta1: [[-7.14474005e-01  3.09938742e-01 -1.19304043e+01 -2.72859433e-01
   3.99361713e-01 -5.29077062e-02  1.88355625e-01 -1.21843926e+01
   1.65339986e-01  4.66585036e-01  1.84838845e-02  3.50862987e-01]
 [-6.62188503e-01  6.73149121e-01 -2.01976233e+00 -6.84068389e-01
   5.76738053e-01 -4.23939580e-01 -1.23964449e-01  1.62011587e+00
   1.96362914e-01  6.73372914e-01 -1.15340088e-01 -2.52426912e-01]
 [-1.01869559e+00  6.69450648e-02 -1.49480873e-01  9.30467026e-01
   1.09713608e-01 -5.95679851e-01  7

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0), (26.63, 20.0), (24.9825, 19.0), (67.2775, 47.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.5874315014964828


No of Asteroids: 5
RunSession: 24
Train average and median score: 41.1225 28.0
Theta1: [[-6.64939265e-01  3.50631557e-01 -1.33125385e+01 -2.30247711e-01
   5.02720527e-01 -6.74180444e-02  1.21985294e-01 -1.39450692e+01
   1.68991562e-01  5.53137718e-01  1.32149509e-02  4.29532653e-01]
 [-6.98068446e-01  5.77943761e-01 -2.44090419e+00 -6.61964152e-01
   6.89051921e-01 -6.67912535e-01 -5.47811072e-01  2.22599299e+00
   5.33631311e-01  8.61561873e-01 -2.86601351e-03 -7.61446153e-01]
 [-9.92144215e-01  9.89375346e-02 -5.88496008e-02  8.

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0), (26.63, 20.0), (24.9825, 19.0), (67.2775, 47.0), (76.475, 52.0), (29.4475, 21.0), (63.4325, 51.5)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.6058682887342829


No of Asteroids: 5
RunSession: 27
Train average and median score: 48.465 36.0
Theta1: [[ -0.61185894   0.38119877 -14.51365017  -0.2046359    0.60202314
   -0.05482304   0.17397904 -15.38894961   0.11881868   0.65836294
    0.02117227   0.56643682]
 [ -0.74281935   0.50577669  -2.93023396  -0.65449033   0.8690906
   -0.9137398   -0.51607613   2.73016567   0.44079867   1.09261844
    0.15242334  -1.34624253]
 [ -0.96202315   0.09008656  -0.0165695    0.88391507   0.12124395
   -0.5707

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0), (26.63, 20.0), (24.9825, 19.0), (67.2775, 47.0), (76.475, 52.0), (29.4475, 21.0), (63.4325, 51.5), (26.9675, 20.0), (29.1625, 21.0), (23.105, 17.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.31400565942996483


No of Asteroids: 5
RunSession: 30
Train average and median score: 29.035 20.0
Theta1: [[-5.68521645e-01  3.88399421e-01 -1.56186951e+01 -1.82469434e-01
   7.56838284e-01 -9.45209256e-02  6.69126399e-02 -1.65060739e+01
   6.59440505e-02  7.81458679e-01  4.25228441e-02  5.69970209e-01]
 [-7.19318338e-01  5.22854945e-01 -3.58359816e+00 -6.53726856e-01
   8.77670423e-01 -8.66976470e-01 -4.02194817e-01  3.43920602e+00
   3.65281204e-01  1

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0), (26.63, 20.0), (24.9825, 19.0), (67.2775, 47.0), (76.475, 52.0), (29.4475, 21.0), (63.4325, 51.5), (26.9675, 20.0), (29.1625, 21.0), (23.105, 17.0), (16.4525, 13.0), (58.95, 39.5), (71.905, 53.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.6114856550570944


No of Asteroids: 5
RunSession: 33
Train average and median score: 55.0275 37.0
Theta1: [[-5.25073648e-01  4.12099941e-01 -1.65371085e+01 -1.57327405e-01
   7.82357115e-01 -6.50996283e-02  1.64908416e-01 -1.75645741e+01
   1.65674160e-01  8.16927685e-01  5.26882758e-02  6.47213386e-01]
 [-7.33283896e-01  4.87047721e-01 -3.78147949e+00 -6.52112221e-01
   1.09137310e+00 -1.00532422e+00 -6.0

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0), (26.63, 20.0), (24.9825, 19.0), (67.2775, 47.0), (76.475, 52.0), (29.4475, 21.0), (63.4325, 51.5), (26.9675, 20.0), (29.1625, 21.0), (23.105, 17.0), (16.4525, 13.0), (58.95, 39.5), (71.905, 53.0), (65.1225, 44.5), (17.025, 13.0), (15.9275, 13.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.21553545944726404


No of Asteroids: 5
RunSession: 36
Train average and median score: 21.765 15.0
Theta1: [[-4.90409838e-01  4.21857646e-01 -1.74132480e+01 -1.40502938e-01
   8.79348016e-01 -5.83116317e-02  6.07794639e-02 -1.82954237e+01
   5.19907174e-02  8.94021411e-01  7.26429385e-02  5.82668583e-01]
 [-6.79580216e-01  5.73943651e-01 -4.59848541e+00 -6.4

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0), (26.63, 20.0), (24.9825, 19.0), (67.2775, 47.0), (76.475, 52.0), (29.4475, 21.0), (63.4325, 51.5), (26.9675, 20.0), (29.1625, 21.0), (23.105, 17.0), (16.4525, 13.0), (58.95, 39.5), (71.905, 53.0), (65.1225, 44.5), (17.025, 13.0), (15.9275, 13.0), (16.27, 13.0), (18.6, 14.0), (52.925, 34.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.5149287499273343


No of Asteroids: 5
RunSession: 39
Train average and median score: 52.2825 34.0
Theta1: [[-4.35162875e-01  4.38190868e-01 -1.83824989e+01 -1.33189905e-01
   8.89455552e-01 -3.50232295e-02  1.46211592e-01 -1.92506220e+01
   1.85548041e-01  9.23234066e-01  2.98823290e-02  6.67011933e-01]
 [-7.0146

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0), (26.63, 20.0), (24.9825, 19.0), (67.2775, 47.0), (76.475, 52.0), (29.4475, 21.0), (63.4325, 51.5), (26.9675, 20.0), (29.1625, 21.0), (23.105, 17.0), (16.4525, 13.0), (58.95, 39.5), (71.905, 53.0), (65.1225, 44.5), (17.025, 13.0), (15.9275, 13.0), (16.27, 13.0), (18.6, 14.0), (52.925, 34.0), (84.1625, 57.0), (67.4375, 43.0), (23.06, 18.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.3333333333333336


No of Asteroids: 5
RunSession: 42
Train average and median score: 32.1875 20.0
Theta1: [[-3.04034832e-01  4.57680941e-01 -1.89792365e+01 -1.09607288e-01
   9.68016991e-01  1.07699543e-02  1.82267027e-01 -1.98882731e+01
   1.72697600e-01  9.689667

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0), (26.63, 20.0), (24.9825, 19.0), (67.2775, 47.0), (76.475, 52.0), (29.4475, 21.0), (63.4325, 51.5), (26.9675, 20.0), (29.1625, 21.0), (23.105, 17.0), (16.4525, 13.0), (58.95, 39.5), (71.905, 53.0), (65.1225, 44.5), (17.025, 13.0), (15.9275, 13.0), (16.27, 13.0), (18.6, 14.0), (52.925, 34.0), (84.1625, 57.0), (67.4375, 43.0), (23.06, 18.0), (56.7275, 39.0), (53.3025, 32.0), (59.2875, 41.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.5582738957006139


No of Asteroids: 5
RunSession: 45
Train average and median score: 53.875 34.5
Theta1: [[-1.32281476e-01  4.84080357e-01 -1.95677932e+01 -9.66764310e-02
   9.26777901e-01  5.13669663e-02  1.674593

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0), (26.63, 20.0), (24.9825, 19.0), (67.2775, 47.0), (76.475, 52.0), (29.4475, 21.0), (63.4325, 51.5), (26.9675, 20.0), (29.1625, 21.0), (23.105, 17.0), (16.4525, 13.0), (58.95, 39.5), (71.905, 53.0), (65.1225, 44.5), (17.025, 13.0), (15.9275, 13.0), (16.27, 13.0), (18.6, 14.0), (52.925, 34.0), (84.1625, 57.0), (67.4375, 43.0), (23.06, 18.0), (56.7275, 39.0), (53.3025, 32.0), (59.2875, 41.0), (81.69, 59.0), (97.765, 63.0), (94.84, 68.5)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.6582569369132957


No of Asteroids: 5
RunSession: 48
Train average and median score: 63.4825 39.5
Theta1: [[-1.56096598e-02  4.58545826e-01 -2.00500540e+01 -8.63690498e

Test Results: [(12.05, 10.0), (13.01, 11.0), (11.375, 9.0), (9.9775, 9.0), (10.175, 8.0), (12.4375, 10.0), (15.225, 12.0), (13.365, 11.0), (11.42, 9.0), (9.96, 8.0), (12.9025, 10.0), (18.4875, 15.0), (16.485, 13.0), (16.495, 13.0), (21.295, 16.0), (14.45, 12.0), (19.7675, 15.5), (18.67, 14.0), (29.7225, 22.0), (28.31, 20.0), (32.235, 23.0), (26.63, 20.0), (24.9825, 19.0), (67.2775, 47.0), (76.475, 52.0), (29.4475, 21.0), (63.4325, 51.5), (26.9675, 20.0), (29.1625, 21.0), (23.105, 17.0), (16.4525, 13.0), (58.95, 39.5), (71.905, 53.0), (65.1225, 44.5), (17.025, 13.0), (15.9275, 13.0), (16.27, 13.0), (18.6, 14.0), (52.925, 34.0), (84.1625, 57.0), (67.4375, 43.0), (23.06, 18.0), (56.7275, 39.0), (53.3025, 32.0), (59.2875, 41.0), (81.69, 59.0), (97.765, 63.0), (94.84, 68.5), (73.0625, 47.5), (52.36, 39.5), (64.98, 45.5)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.5806860653112328

[[-1.04294740e-02  4.61194550e-01 -2.03149097e+01 -7.95564269e-02
   9.13705040e-01  5.77286828e-03  2.