In [65]:
#### Imports
import math
import random
import numpy as np
from tkinter import *

## Game Environment Developmet

In [66]:
random.seed()
class Game:
    # This is the main game class that is used by both the GUI module and the AI module
    def __init__(self, N=4, DownSideRatio=3, SleepTime=5, R=15, r=5, Height=400, Halfwidth=200,
                 GlobalHeight=600, GlobalWidth=800, Thickness=15, RandomTreshold=0.2, RandomStep=1,
                 RandomVertTreshold=0.2, RandomVertStep=1, MaxScore=None):
        
        #Initialized Variables 
        
        self.N = N     # number of falling objects
        self.DownSideRatio = DownSideRatio     # ratio fall speed/left-right speed (integer)
        self.SleepTime = SleepTime     # delay time between steps, game is progressing slower for higher values
        self.R = R     # radius of the blue half circle
        self.r = r     # radius of the falling objects
        self.treshold = (R +r/2)**2      # treshold to indicate the contact for game over
        self.Height = Height     # height of the white structure
        self.Halfwidth = Halfwidth     # half width of the white structure
        self.GlobalHeight = GlobalHeight     # height of the game window
        self.GlobalWidth = GlobalWidth     # width of the game window
        self.Thickness = Thickness     # thickness of the white walls
        

        self.RandomTreshold = RandomTreshold     # probability of left/right noise for falling objects
        self.RandomStep = RandomStep     # intensity of left/right noise for falling objects
        self.RandomVertTreshold = RandomVertTreshold     # probability of up/down noise for falling objects
        self.RandomVertStep = RandomVertStep     # intensity of up/down noise for falling objects
        

        self.MaxScore = MaxScore     # Maximum Score before terminating the game (None for infinity)
        self.retry=False
        self.Direction = random.choice(['L','R'])     # setting the initial direction
        self.steps, self.counter = 0, 0     # total pixel moves (time) and total score
        self.asteroids = []     # relative coordinates of the falling objects
       
        for i in range(N):         # initialize the falling objects' coordinates
            t = random.random()
            if t < 0.5:
                x = (-1)*(Halfwidth + R)/2 - t*(Halfwidth-R)
            else:
                x = (Halfwidth + R)/2 + (t-0.5)*(Halfwidth-R)
            self.asteroids.append([x,2*Height/3+(i+1)*Height/(3*N)])
   

    def ChangeDirection(self, direction):
        if direction == 'L':
            self.Direction = 'L'
        if direction == 'R':
            self.Direction = 'R'

            
    def GameOver(self):
        # Testing for game over
        
        for aster in self.asteroids:
            if aster[0]**2 + aster[1]**2 < self.treshold:
                return True
        if not self.MaxScore == None and self.counter >= self.MaxScore:
            return True
        return False

    
    def Destroy(self):
        # updating falling objects when one gets destroyed (and testing for that scenario)
        Kill = False
        for i in range(self.N):
            if self.asteroids[i][0] <= (-1)*self.Halfwidth or self.asteroids[i][0] >= self.Halfwidth or self.asteroids[i][1] <= 0:
                Kill = True
                self.asteroids.pop(i)
                self.asteroids.append([(2*random.random()-1)*self.Halfwidth, self.Height])
                self.counter += 1
        return Kill


    def UpdateStep(self):
        # Updating locations of falling objects at pixel moves.
        # Returns triple to determine:
            # 1. whether one needs to force refresh the screen due to destroyed objects or because noise was added;
            # 2. whether a falling object was destroyed and one point obtained;
            # 3. whether the game is over;
        self.steps += 1
        for i in range(self.N):          
            self.asteroids[i][1] -= self.DownSideRatio
            if self.Direction == 'L':
                self.asteroids[i][0] += 1
            if self.Direction == 'R':
                self.asteroids[i][0] -= 1

        Update = False
        for i in range(self.N):          #  Adding the noise to falling objects
            t1, t2 = random.random(), random.random()
            if t1 < self.RandomTreshold/2:
                Update = True
                self.asteroids[i][0] += self.RandomStep
            elif t1 < self.RandomTreshold:
                Update = True
                self.asteroids[i][0] -= self.RandomStep
            if t2 < self.RandomVertTreshold/2:
                Update = True
                self.asteroids[i][1] += self.RandomVertStep
            elif t2 < self.RandomVertTreshold:
                Update = True
                self.asteroids[i][1] -= self.RandomVertStep

        Kill = self.Destroy()
        if Kill:
            Update = True
        Over = self.GameOver()
        return (Update, Kill, Over)

## Selection Screen

In [67]:
class Choose:
    # Window to chose Human/AI player
    def __init__(self):
        self.master = Tk()
        self.master.title('Obstacle Avoiding AI')
        self.frame = Frame(self.master)
        self.frame.pack()

        howmany_options, howfast_options = [6,5,4], ['fast', 'medium', 'slow']
        self.howmany, self.howfast = IntVar(), StringVar()
        self.howmany.set(howmany_options[0])
        self.howfast.set(howfast_options[0])
        self.menu1=OptionMenu(self.master,self.howmany,*howmany_options)
        self.menu2=OptionMenu(self.master,self.howfast,*howfast_options)
        Label(font=("Purisa", 13),text="Number of objects").pack()
        self.menu1.pack()
        Label(font=("Purisa", 13),text="Game speed").pack()
        self.menu2.pack()

        Label(font=("Purisa", 13),text="Player").pack()
        self.button1 = Button(self.master, font=("Purisa", 12), text = "Human", command=self.human)
        self.button2 = Button(self.master, font=("Purisa", 12), text = "AI pre train", command=self.dumb_ai)
        self.button3 = Button(self.master, font=("Purisa", 12), text = "AI post train", command=self.ai)
        self.button1.pack(side=LEFT)
        self.button2.pack(side=LEFT)
        self.button3.pack(side=LEFT)

        self.who = None

        
    def human(self):
        self.who = 'human'
        self.master.destroy()


    def dumb_ai(self):
        self.who = 'dumb_ai'
        self.master.destroy()


    def ai(self):
        self.who = 'ai'
        self.master.destroy()

## Super Class Play for both human and AI

In [68]:
class Play:
    # General class implementing common features for both Human and AI player
    def __init__(self, GameParameters):        
        self.game = Game(**GameParameters)
        self.x = self.game.GlobalWidth/2
        self.y = self.game.GlobalHeight - self.game.Thickness

        self.master = Tk()
        self.master.title('Obstacle Avoiding AI')
        self.canvas=Canvas(self.master, bg="black", width=self.game.GlobalWidth, height=self.game.GlobalHeight)
        self.canvas.pack()

        # Next draw permanent and variable (falling) game objects. Draw three copies of everything for smooth screen wrapping. 
        
        for i in range(15):  
              
            cx, cy = self.x+(i-1)*self.game.GlobalWidth, self.y
            self.canvas.create_oval(cx-self.game.R, cy+self.game.R, cx+self.game.R, cy-self.game.R, fill="blue",
                                    width = 0, tag = 'S')
            self.canvas.create_rectangle(cx-self.game.Halfwidth-self.game.Thickness-self.game.r, cy,
                                         cx+self.game.Halfwidth+self.game.Thickness+self.game.r,
                                         cy+self.game.Thickness, fill="white", width = 0, tag = 'S')
            self.canvas.create_rectangle(cx-self.game.Halfwidth-self.game.Thickness-self.game.r, cy-self.game.Height,
                                         cx-self.game.Halfwidth-self.game.r, cy+self.game.Thickness,
                                         fill="white", width = 0, tag = 'S')
            self.canvas.create_rectangle(cx+self.game.Halfwidth+self.game.r, cy-self.game.Height,
                                         cx+self.game.Halfwidth+self.game.Thickness+self.game.r, cy+self.game.Thickness,
                                         fill="white", width = 0, tag = 'S')
            
            

        for aster in self.game.asteroids:
            for i in range(3):
                cx, cy = self.x+(i-1)*self.game.GlobalWidth + aster[0], self.y - aster[1]
                self.canvas.create_oval(cx-self.game.r, cy+self.game.r, cx+self.game.r, cy-self.game.r,
                                        fill="white", width = 0, tag = 'A')            

        self.canvas.focus_set()

        
    def RunStep(self):
        # Moves objects on the screen
        if self.game.Direction == 'L':
            self.canvas.move('S', -1, 0)
            self.x -= 1
            if self.x < (-1)*self.game.GlobalWidth/2:
                self.x += self.game.GlobalWidth
                self.canvas.move('S', self.game.GlobalWidth, 0)     

        if self.game.Direction == 'R':
            self.canvas.move('S', 1, 0)
            self.x += 1
            if self.x > 3*self.game.GlobalWidth/2:
                self.x -= self.game.GlobalWidth
                self.canvas.move('S', (-1)*self.game.GlobalWidth, 0)



        self.canvas.move('A', 0, self.game.DownSideRatio)
        (Update, Kill, Over) = self.game.UpdateStep()
        if Update:
            self.canvas.itemconfig(self.text_id, text="             Score: "+str(self.game.counter))
            self.canvas.delete('A')
            
            for aster in self.game.asteroids:
                for i in range(3):
                    cx, cy = self.x+(i-1)*self.game.GlobalWidth + aster[0], self.y - aster[1]
                    self.canvas.create_oval(cx-self.game.r, cy+self.game.r, cx+self.game.r, cy-self.game.r,
                                            fill="red", width = 0, tag = 'A')
        self.master.update()
        self.master.after(self.game.SleepTime)
        return Over

## Human Class

In [69]:
class PlayHuman(Play):

    # Play subclass for a human player
    def __init__(self, GameParameters):
        Play.__init__(self, GameParameters)
            
        self.text_id = self.canvas.create_text(0,0,anchor="nw",fill="white",font=("Purisa", 14),
                                               text="         Space to start, Left/Right to change direction",tag='Text')
        self.canvas.bind("<Left>", self.left)
        self.canvas.bind("<Right>", self.right)
        self.canvas.bind("<space>", self.Run)            
            
                   
    def left(self, event):
        self.game.ChangeDirection('L')

        
    def right(self, event):
        self.game.ChangeDirection('R')

        
    def Run(self, event):
        Over = False
        while not Over:
            Over = Play.RunStep(self)
            
            if(Over):
                self.master.after(2000,self.master.config(bg='white'))
                self.master.after(100, self.master.destroy())
                app=PlayHuman(GameParameters)
                app.master.mainloop()
                print( "Human Score:", app.game.counter)
                

## Pre and Post Trained AI

In [70]:
class PlayAI(Play):
    # Play subclass for an AI player
    def __init__(self, Theta1, Theta2, GameParameters):
        Play.__init__(self, GameParameters)
        self.bot = Bot(Theta1, Theta2, self.game)

        self.text_id = self.canvas.create_text(0,0,anchor="nw",fill="white",font=("Purisa", 14),text="                     Space to start",tag='Text')
        self.canvas.bind("<space>", self.Run)            


    def Run(self, event):
        Over = False
        while not Over:
            self.bot.TestStep()                    
            Over = Play.RunStep(self)

            #Resume Game Playing
            if(Over):
                self.master.after(2000,self.master.config(bg='white'))
                self.master.after(100, self.master.destroy())
                if(who=='ai'):
                    arrays = np.load(FileToOpen)
                    GameParameters = arrays['GameParameters'][()]
                    GameParameters['SleepTime'] = howfast
                    Theta1 = arrays['Theta1']
                    Theta2 = arrays['Theta2']
                    app = PlayAI(Theta1, Theta2, GameParameters)
                    app.master.mainloop()
                    print( "AI Score:", app.game.counter)

                if(who=='dumb_ai'):
                    arrays = np.load(FileToOpen)
                    GameParameters = arrays['GameParameters'][()]
                    GameParameters['SleepTime'] = howfast
                    Theta1 = arrays['Theta1']
                    Theta2 = arrays['Theta2']

                    Theta1 = np.random.uniform(-1.0, 1.0, Theta1.shape)
                    Theta2 = np.random.uniform(-1.0, 1.0, Theta2.shape)

                    app = PlayAI(Theta1, Theta2, GameParameters)
                    app.master.mainloop()

                    print( "Dumb AI Score:", app.game.counter)
                    

In [71]:
###################################################################################################################

# Training AI

In [None]:
class Bot:
    # A bot that just plays the game using the values Theta1 and Theta2 for neural network parameters
    def __init__(self, Theta1, Theta2, game):
        self.Theta1 = Theta1
        self.Theta2 = Theta2
        
        self.theta1_values=[]
        self.theta2_values=[]
        
        self.game = game


    def Sigmoid(self, x):
        return (1 + math.exp((-1)*x))**(-1)

    
    def PreProcess(self, action):
        # Use the relative coordinates of the falling objects to generate the input numpy vector
        # the neural network (exploit game symmetry to use only one net)
        state_new = []
        for aster in self.game.asteroids:          # Scaling input values
            state_new.append(aster[0]/(self.game.Halfwidth+0.0))
            state_new.append(aster[1]/(self.game.Height+0.0))
        state_new.append(1)     # Add the bias term
        
        if action == 'L':
            for i in range(self.game.N):
                state_new[2*i] *= -1
        layer1 = np.empty([2*self.game.N+1,1])
        for i in range(2*self.game.N+1):
            layer1[i, 0] = state_new[i]
        return layer1


    def ForwardPropagate(self, action):
        # Evalue the neural network for the current game state with the given L/R action;
        # Returns triple of values/vectors (one for each layer)
        layer1 = self.PreProcess(action)
        layer2_temp = np.dot(np.transpose(self.Theta1), layer1)
        
        for i in range(layer2_temp.shape[0]):
            layer2_temp[i,0] = self.Sigmoid(layer2_temp[i,0])
        layer2 = np.append(layer2_temp, [[1]], axis=0)
        layer3 = np.dot(np.transpose(self.Theta2), layer2)
        result = self.Sigmoid(layer3[0,0])
        return (layer1, layer2, result)

    
    def TestStep(self):
        # Determines the optimal direction in the next move by using the given Theta1, Theta2 parameters
        outputL = self.ForwardPropagate('L')
        outputR = self.ForwardPropagate('R')
        if outputL[-1] < outputR[-1]:
            self.game.ChangeDirection('L')
        else:
            self.game.ChangeDirection('R')
        result = self.game.GameOver()
        return result


In [None]:

class BotTrain(Bot):
    # A bot that performs reinforcement learning to opitmize the
    # Theta1, Theta2 parameters in the neural network
    
    
    def __init__(self, GameParameters, HiddenSize=12, gamma=0.9995, GameOverCost=1,
                 NSim=500, NTest=100, TestTreshold=200, NumberOfSessions=None, Inertia=0.8,
                 p=0.0, a=1.0, epsilon=0.2, epsilon_decay_rate=1, discount = 0.999, p_decay_rate=0.5):
        
        
        Theta1 = np.random.uniform(-1.0, 1.0, (2*GameParameters["N"]+1, HiddenSize))
        Theta2 = np.random.uniform(-1.0, 1.0, (HiddenSize+1, 1))        
        game = Game(**GameParameters)
        
        
        Bot.__init__(self, Theta1, Theta2, game)

        self.GameParameters = GameParameters        
        self.HiddenSize = HiddenSize     # Size of the neural network hidden layer 
        self.gamma = gamma     # gamma parameter in the game cost function E[gamma^N]
        self.GameOverCost = GameOverCost     # Game Over Cost (set to 1.0 for standard game cost function E[gamma^N])
        self.NSim = NSim     # Number of consecutive learning games
        self.NTest = NTest     # Number of consecutive test games
        self.TestTreshold = TestTreshold    # Stop when median score over TestTreshold (None for fixed number of sessions)
        self.NumberOfSessions = NumberOfSessions     # Number of learn train/test session (active only if TestTreshold = None)
        self.Inertia = Inertia     # (1 - Inertia) is the probability of resampling the game direction while learning
        self.p = p     # Probability of chosing learned move in reinforcement learning        
        self.a = a     # Reinforcement learning rate (set to 1.0 since it can be absorbed into gradient descent step factor)
        self.epsilon = epsilon     # Initial gradient descent step factor
        self.epsilon_decay_rate = epsilon_decay_rate     # Exponent in power decay for the gradient descent step factor
        self.discount = discount    # Discount exponent in reinforcement learning
        self.p_decay_rate = p_decay_rate    # Exponent in power decay for the policy greedines parameter

        self.counter = []    # Container for average and median test scores
        self.best_score = 0    # Best score among all training sessions

        
    def BackPropagate(self, output, expected, layer1, layer2):
        # Backpropagation algorithm for neural network;
        # computes the partial derivatives with respect to parameters and performs the stochastic gradient descent
        delta3 = output - expected
        delta2 = delta3*self.Theta2
        for i in range(self.HiddenSize):
            delta2[i,0] *= layer2[i,0]*(1-layer2[i,0])
        for i in range(2*self.game.N+1):
            for j in range(self.HiddenSize):
                self.Theta1[i,j] -= self.epsilon*layer1[i,0]*delta2[j,0]
        for i in range(self.HiddenSize+1):
            self.Theta2[i,0] -= self.epsilon*delta3*layer2[i,0]
                   
            
    def ReinforcedLearningStep(self):
        # Performs one step of reinforcement learning
        t = random.random()
        if t < 1-self.p:
            tt = random.random()
            if tt < self.Inertia:
                output = self.ForwardPropagate(self.game.Direction)
            else:
                new_direction = random.choice(['L','R'])
                output = self.ForwardPropagate(new_direction)
                self.game.ChangeDirection(new_direction)
        else:
            outputL = self.ForwardPropagate('L')
            outputR = self.ForwardPropagate('R')
            if outputL[-1] < outputR[-1]:
                output = outputL
                self.game.ChangeDirection('L')
            else:
                output = outputR
                self.game.ChangeDirection('R')
                
        if random.random()<0.00002:
            # Occasionally prints out the current value of the network
            # useful for adjusting various learning parameters, especially gamma
#             print( output[-1])
            
        result = self.game.UpdateStep()
        if result[-1]:
            estimate = self.GameOverCost
        else:
            estimateL = self.ForwardPropagate('L')
            estimateR = self.ForwardPropagate('R')
            estimate = min(estimateL[-1], estimateR[-1])**self.discount
            if result[1]:
                estimate *= self.gamma
        expected = (1-self.a)*output[-1] + self.a*estimate
        self.BackPropagate(output[-1], expected, output[0], output[1])
        return result

    
    def Training(self):
        # Run NSim consecutive training games
        train_scores = []
        for i in range(self.NSim):
            stop = False
            while not stop:
                (update, kill, stop) = self.ReinforcedLearningStep()
                
            train_scores.append(self.game.counter)
            self.game = Game(**self.GameParameters)
        return train_scores

            
    def Testing(self):
        # Run NTest consecutive test games to evaluate learned performance;
        # prints out all the test values and records average and median values
        s = 0
        alist = []
        for i in range(self.NTest):
            stop = False
            while not stop:
                stop = self.TestStep()
                self.game.UpdateStep()
            alist.append(self.game.counter)
            self.game = Game(**self.GameParameters)
        m1 = sum(alist)/(len(alist)+0.0)
        m2 = np.median(alist)
        self.counter.append((m1,m2))
        if m1 > self.best_score:
            self.best_score = m1
            np.savez("Data/parameters_best", GameParameters = self.GameParameters, Theta1 = self.Theta1, Theta2 = self.Theta2)


    def TrainSession(self):
        # Performs a learning session until median scores achieves TestTreshold or for fixed number of learn/test sessions
        self.Testing()
        keep_going = True
        i = 0
        
        trainscore_values=[]
        
        while keep_going:
            i += 1
            print('')
            print( "No of Asteroids:", self.game.N)
            print( "RunSession:", i)
            
            
            train_scores = self.Training()
            print( "Train average and median score:", sum(train_scores)/(len(train_scores)+0.0), np.median(train_scores))
            trainscore_values.append(sum(train_scores)/(len(train_scores)+0.0))
            
            print("Theta1:",self.Theta1)
            print(self.Theta1.shape)
            print("Theta2:",self.Theta2)
            print(self.Theta2.shape)
            
            self.Testing()
            print( "Test Results:", self.counter)            
            new, old = self.counter[-1][-1], self.counter[-2][-1]
            
            self.epsilon *= (old/new)**self.epsilon_decay_rate
            print( "Gradient Learning Rate:", self.epsilon)
            
            
            self.p = 1 - (1-self.p)*((old/new)**self.p_decay_rate)
            if self.p < 0:
                self.p = 0.0
            print("Probalistic Values:", self.p)
            print('')
            
            
            if self.TestTreshold == None and not self.NumberOfSessions == None:
                if i >= self.NumberOfSessions:
                    keep_going = False
                    
            elif not self.TestTreshold == None:                 
                if self.counter[-1][-1] >= self.TestTreshold:
                    keep_going = False
  


## Main Application

In [53]:
choose = Choose()
choose.master.mainloop()
who = choose.who
howmany = choose.howmany.get()
FileDict = {4:'Data/parameters4.npz', 5:'Data/parameters5.npz', 6:'Data/parameters6.npz'}
FileToOpen = FileDict[howmany]
SpeedDict = {'fast':5, 'medium':10, 'slow':15}
howfast = SpeedDict[choose.howfast.get()]

if who == 'human':
    GameParameters = {'N':howmany, 'DownSideRatio':3, 'SleepTime':howfast, 'R':25, 'r':5, 'Height':400, 'Halfwidth':200,
                      'GlobalHeight':600, 'GlobalWidth':800, 'Thickness':20, 'RandomTreshold':0.2, 'RandomStep':1,
                      'RandomVertTreshold':0.2, 'RandomVertStep':1, 'MaxScore':None}


    app=PlayHuman(GameParameters)
    app.master.mainloop()
    print( "Human Score:", app.game.counter)

    
if who == 'ai':
    arrays = np.load(FileToOpen)
    GameParameters = arrays['GameParameters'][()]
    GameParameters['SleepTime'] = howfast
    Theta1 = arrays['Theta1']
    Theta2 = arrays['Theta2']

    app = PlayAI(Theta1, Theta2, GameParameters)
    app.master.mainloop()

    print( "AI Score:", app.game.counter)

    
if who == 'dumb_ai':
    arrays = np.load(FileToOpen)
    GameParameters = arrays['GameParameters'][()]
    GameParameters['SleepTime'] = howfast
    Theta1 = arrays['Theta1']
    Theta2 = arrays['Theta2']

    Theta1 = np.random.uniform(-1.0, 1.0, Theta1.shape)
    Theta2 = np.random.uniform(-1.0, 1.0, Theta2.shape)

    app = PlayAI(Theta1, Theta2, GameParameters)
    app.master.mainloop()

    print( "Dumb AI Score:", app.game.counter)


## Training Sessions

In [103]:
GameParameters = {'N': 5, 'DownSideRatio': 3, 'SleepTime': 5, 'R': 25, 'r': 5, 'Height': 400, 'Halfwidth': 200,
                  'GlobalHeight': 600, 'GlobalWidth': 800, 'Thickness': 20, 'RandomTreshold': 0.2, 'RandomStep': 1,
                  'RandomVertTreshold': 0.2, 'RandomVertStep': 1, 'MaxScore': None}
LearnParameters = {'HiddenSize': 12, 'gamma': 0.8, 'GameOverCost': 1, 'NSim': 400, 'NTest': 400, 'TestTreshold': None,
                   'NumberOfSessions': 10, 'Inertia': 0.8, 'p': 0.0, 'a': 1.0, 'epsilon': 0.05,
                   'epsilon_decay_rate': 0.0, 'discount': 0.999, 'p_decay_rate': 0.5}

bot = BotTrain(GameParameters = GameParameters, **LearnParameters)


bot.TrainSession()

print( bot.Theta1)
print( bot.Theta2)
np.savez("Data/parameters", GameParameters = GameParameters, Theta1 = bot.Theta1, Theta2 = bot.Theta2)




No of Asteroids: 5
RunSession: 1
0.15471166767492237
0.13216752143494953
0.11185219054517033
0.1426405729383512
0.20156046992237625
Train average and median score: 11.1275 9.0
Theta1: [[-7.14411603e-01  2.27109420e-01 -4.47645380e-01 -6.24904573e-01
  -2.56133098e-02  3.85483005e-01  7.95041885e-01  2.88377583e-03
   6.23815714e-01  6.40761028e-01 -7.31474682e-01  6.09074667e-01]
 [-7.26373852e-02 -1.28527426e-01 -2.86730559e-01  2.93321418e-01
   4.51124302e-01  2.08884439e-01 -1.12783149e+00 -8.99194917e-01
  -6.25333836e-01 -1.14266800e-01 -4.94355345e-02  7.51472821e-01]
 [ 6.46609017e-01 -8.98072328e-01  8.54369204e-01  6.09219214e-01
  -4.65374540e-01  3.79200601e-02  2.73697694e-02 -4.91651241e-01
  -3.72407013e-01 -7.74154327e-02  2.09516576e-01  8.25319422e-01]
 [ 1.95443580e-02 -1.97094076e-02 -6.85533458e-01  1.17358314e+00
   2.29286268e-01  2.51009970e-01  2.92044464e-01 -6.34610674e-01
  -4.04887789e-01 -4.99199401e-01 -1.46550603e-01  6.24376181e-01]
 [ 7.40956072e-01 -

0.23049884033489915
0.11350588901599823
0.09743918076217271
Train average and median score: 11.2475 9.0
Theta1: [[-1.13779881  0.72063411 -0.48923127 -0.51933355  0.04215254  0.05772419
   1.33816698 -0.00386072  0.31412245  0.23028891 -0.76080635  0.83114425]
 [-0.18145608  0.02555865 -0.33119721  0.62933856  0.53064054  0.36029907
  -1.38430647 -0.90297788 -0.73320647  0.08695011 -0.08023159  0.85237052]
 [ 0.55941121 -1.40418316  0.83351518  0.78977178 -0.4499474   0.05346941
   0.05320571 -0.48568746 -0.27502696 -0.06141912  0.21809147  0.7611972 ]
 [-0.06587353  0.09611327 -0.74138117  1.4233447   0.28869699  0.37669846
   0.09392349 -0.64632592 -0.52151792 -0.3396079  -0.18419581  0.65903792]
 [ 0.71749752  0.13512452  0.82840627 -0.08027552  0.42279842 -0.33946206
  -0.76602944  0.739524   -0.49363451  0.43430574  0.60822765 -0.66853033]
 [ 0.74768227  0.06784865  0.55541122  1.15027847  0.68613895 -0.26072069
  -0.36489286 -0.49555189 -0.91623052  0.54489514 -0.59545579 -0.2678

Test Results: [(11.265, 10.0), (10.1675, 9.0), (12.5075, 10.0), (10.6, 9.0), (11.3225, 9.0), (10.9975, 9.0), (11.3025, 9.0), (11.0625, 9.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.0


No of Asteroids: 5
RunSession: 8
0.2081631519685193
0.1444574543763683
Train average and median score: 11.175 9.0
Theta1: [[-4.03433080e+00  1.62575369e+00 -5.12959569e-01 -7.02850470e-01
   2.33668181e-03 -5.04832749e-02  5.67831665e+00 -3.66265401e-02
   2.92567209e-01 -1.06981289e-01 -8.20490551e-01  3.77468843e+00]
 [-7.11964636e-01  6.43885153e-01 -3.06359238e-01  1.24236942e+00
   7.35409146e-01  7.52630335e-01 -1.38321739e+00 -8.89023138e-01
  -8.48325021e-01  4.08620075e-01 -2.06137883e-02  1.39426311e+00]
 [-9.68651654e-02 -2.37002136e+00  8.23495716e-01  1.32145019e+00
  -4.94147208e-01  1.25570408e-01  3.86047348e-01 -5.09768334e-01
  -2.52515280e-01 -7.88296841e-03  1.93692036e-01  6.69156933e-01]
 [-9.21365807e-03  3.32380879e-01 -7.67996890e-01  1.39399784e+00
   3.38712603e-01  

Test Results: [(11.265, 10.0), (10.1675, 9.0), (12.5075, 10.0), (10.6, 9.0), (11.3225, 9.0), (10.9975, 9.0), (11.3025, 9.0), (11.0625, 9.0), (12.8125, 11.0), (15.3375, 12.0), (11.965, 10.0)]
Gradient Learning Rate: 0.05
Probalistic Values: 0.05131670194948623

[[-5.62144323e+00  2.48885553e-01 -5.10318692e-01 -1.65472413e-01
  -8.23171938e-02 -9.27599034e-02  7.45917908e+00 -3.61695432e-02
   3.30425759e-01 -2.26671516e-01 -8.35219328e-01  5.43951586e+00]
 [-5.32110845e-01  6.41104906e-01 -3.11393992e-01  1.09105421e+00
   7.36187920e-01  5.78756716e-01 -7.64866828e-01 -8.91984387e-01
  -7.75759786e-01  3.40206255e-01 -2.38734010e-02  1.16909044e+00]
 [-2.15421109e-01 -4.30213644e+00  8.94817533e-01  3.24147811e+00
  -4.76820180e-01  3.04310600e-01  2.39383387e-01 -5.07377242e-01
  -3.46928030e-01  1.47476628e-01  2.52788653e-01  3.74911689e-01]
 [ 3.18214236e-01  5.90993158e-01 -7.75285447e-01  1.27841841e+00
   3.47270913e-01  5.19082377e-01  3.21625828e-01 -6.51530815e-01
  -5.18159