In [1]:
epsilonTest = 1
decayTest = 0.99977
episodesTest = 20000
for i in range(episodesTest):
    epsilonTest*=decayTest
print(epsilonTest)
#For run continuations, define:
#LOAD_MODEL, epsilon, decay, episodeStart

0.010046518914696807


In [2]:
# Imports
import os #Setup which GPU to use before everything else to avoid conflicts
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import sys
import numpy as np
import pandas as pd
import time
import random
import math
import datetime
import csv
import copy
import seaborn as sns
import matplotlib.pyplot as plt
import keras
from PIL import Image, ImageDraw
import keras.backend.tensorflow_backend as backend
from keras.models import Sequential, Model
from keras.utils import plot_model
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten, Input
from keras import optimizers
from keras.callbacks import TensorBoard
import tensorflow as tf
from collections import deque
from tqdm import tqdm
from contextlib import redirect_stdout

#Misc Parameters
USE_SCOTT_MODEL = False
CHECK_DQN_INPUT = False
ID = int(time.time())
LOAD_MODEL = None #Copy paste full model name and enclose it in quotes to load models of all agents. Otherwise, set to None
SAVE_MODEL_EVERY = 1000 #Episodes. Set to -1 for no saves
MODEL_NAME = 'Merge_Dynamic' # Name of all the saved files that are related to this run
RANDOM_SEED = -1 # Sets seed for all random number generators. -1 to turn off
ACTION_SPACE_SIZE = 4

#Model Settings
DISCOUNT = 0.90
REPLAY_MEMORY_SIZE = 20_000  # How many last steps to keep for model training 
MIN_REPLAY_MEMORY_SIZE = 1200 # Minimum number of steps in a memory to start training
MINIBATCH_SIZE = 32  # How many steps (samples) to use for training
UPDATE_TARGET_EVERY = 10  # Terminal states (end of episodes)
LEARNING_RATE = 0.0001

# Agent View Settings
REMOVE_PLAYER_FROM_LOCAL_VIEW = True
WALLS_BLOCK_VIEW = True
VIEW_TYPE = "Merge" # Relative or Local or Merge

# Exploration settings
epsilon = 1  # starting epsilon value. Not a constant, going to be decayed
EPSILON_DECAY = 0.9998
MIN_EPSILON = 0.001

# Record Parameters
# Saved to text file titled the same as the logs
RECORD_PARAMETERS = True
str_model_summary = "No Summary"

PRINT_CSV = True

# Environment settings
START_EPISODE = 1
END_EPISODE = 25000
OBS_RANGE = 3
SIZE_X = 35#Environment width
SIZE_Y = 20#Environment height
EPISODE_STEP_LIMIT = 300
PLAYER_COUNT = 8
TRASH_COUNT = 30
AGENT_SPAWN = "4Room Center" # "Random", "Center", "Corridor", "Each Room", "Custom", "4Room Center","4Room Center 8Agent Static"
TASK_SPAWN = "4Room Each Room" # "Random", "Not Center", "Along Walls", "Not Corridor", "Not Each Room", "Custom", "4Room Each Room", "4Room 1Room Doubled"
WALL_ARRANGEMENT = "Four Room Block Wall" # "Six Rooms", "Custom", "Four Room Block Wall"

#Key settings (the number to represent each thing in the DQN matrix)
PLAYER_N = 1  # player key 
TRASH_N = 1  # trash key
OTHERS_N = 1 # Other players key
BLIND_N = -1 # key for areas that can't be seen due to a wall

#Reward Scheme
MOVE_PENALTY = 0 #make negative
WALL_COLLISION_PENALTY = 0 #make negative
AGENT_COLLISION_PENALTY = 0 #make negative
TRASH_REWARD = 1

#DQN input shape settings
CHANNEL_COUNT = 4 #Depending on view type, certain channels are removed upon DQN input
PLAYER_CHANNEL = 0
OTHERS_CHANNEL = 1
TRASH_CHANNEL = 2
BLIND_CHANNEL = 3
OBSERVATION_SPACE_VALUES = (1, 1, 1)# Actual values assigned on init of environment
OBSERVATION_SPACE_VALUES_TWO = (1, 1, 1)# For merged view

# Determines the DQN input shape
localViewSize = OBS_RANGE*2+1
relativeViewSizeX = SIZE_X
relativeViewSizeY = SIZE_Y
if VIEW_TYPE == "Local":
    if REMOVE_PLAYER_FROM_LOCAL_VIEW:
        OBSERVATION_SPACE_VALUES = (localViewSize, localViewSize, CHANNEL_COUNT-1)
    else:
        OBSERVATION_SPACE_VALUES = (localViewSize, localViewSize, CHANNEL_COUNT)
elif VIEW_TYPE == "Relative":
    OBSERVATION_SPACE_VALUES = (relativeViewSizeX, relativeViewSizeY, CHANNEL_COUNT)
elif VIEW_TYPE == "Merge":
    if REMOVE_PLAYER_FROM_LOCAL_VIEW:
        OBSERVATION_SPACE_VALUES = (localViewSize, localViewSize, CHANNEL_COUNT-2)
    else:
        OBSERVATION_SPACE_VALUES = (localViewSize, localViewSize, CHANNEL_COUNT-1)
    OBSERVATION_SPACE_VALUES_TWO = (relativeViewSizeX, relativeViewSizeY, 2)
    
#Scott's Shared Memory Model Variables
if USE_SCOTT_MODEL:
    GLOBAL_REPLAY_MEMORY = deque(maxlen=REPLAY_MEMORY_SIZE)
    ACTION_SPACE_SIZE += 1

Using TensorFlow backend.


In [3]:
class Roomba:
    # Size = size of the environment; e.g., if NxN grid, size=N
    def __init__(self, obsRange=3, viewType = "none", ID = -1):       
        self.ID = ID
        self.x = np.random.randint(0, SIZE_X)# Xpos of the agent
        self.y = np.random.randint(0, SIZE_Y)# Ypos of the agent
        self.xBefore = -1# Last Xpos of the agent
        self.yBefore = -1# Last Ypos of the agent
        self.obsRange = obsRange
        self.stepsTaken = 0
        self.agentCollisionCount = 0
        self.wallCollisionCount = 0
        self.tasksCompleted = 0
        self.reward = 0
        self.viewType = viewType
        self.event = "nothing"
        
    def __str__(self):
        return f"Roomba ({self.x}, {self.y})"

    def __sub__(self, other):
        return (self.x-other.x, self.y-other.y)

    def __eq__(self, other):
        return self.x == other.x and self.y == other.y

    def action(self, choice):
        '''
        Gives us 4 total movement options. (0,1,2,3)
        '''
        if choice == 0:
            self.move(x=0, y=1)#up
        elif choice == 1:
            self.move(x=1, y=0)#right
        elif choice == 2:
            self.move(x=0, y=-1)#down
        elif choice == 3:
            self.move(x=-1, y=0)#left
        self.stepsTaken += 1
        
    def move(self, x=False, y=False):
        #Save position from previous move
        self.yBefore = self.y
        self.xBefore = self.x
        
        # If no value for x, no change for x
        if not x:
            self.x += 0
        else:
            self.x += x

        # If no value for y, no change for y
        if not y:
            self.y += 0
        else:
            self.y += y

    # Reverts agent to previous position in the case of colision with other agents
    def undo(self):
        self.y = self.yBefore
        self.x = self.xBefore
    
    # Returns True if agent has not changed position from last step
    def hasNotMoved(self):
        return self.xBefore == self.x and self.yBefore == self.y
    
class Trash:
    # Size = size of the environment; e.g., if NxN grid, size=N
    def __init__(self):
        self.x = np.random.randint(0, SIZE_X)
        self.y = np.random.randint(0, SIZE_Y)
        self.count = 0
            
    def __str__(self):
        return f"Trash ({self.x}, {self.y})"

    def __sub__(self, other):
        return (self.x-other.x, self.y-other.y)

    def __eq__(self, other):
        return self.x == other.x and self.y == other.y

In [4]:
# Class to manage matrix for DQN input and rendering 
class MapManager:
    def __init__(self):
        self.sizeX = SIZE_X
        self.sizeY = SIZE_Y
        self.matrix = np.zeros((SIZE_X, SIZE_Y, CHANNEL_COUNT), dtype=np.int8)
        self.playerList = []
        self.trashList = []
        self.blockWallList = []
        
    #Updates the map matrix with given player coordinates and trash coordinates
    def update(self, playerList, trashList, blockWallList):
        self.matrix = np.zeros((self.sizeX, self.sizeY, CHANNEL_COUNT), dtype=np.int8)
        for eachPlayer in playerList:
            self.addOtherPlayer(eachPlayer.x, eachPlayer.y)
        for eachTrash in trashList:
            self.addTask(eachTrash.x, eachTrash.y)
        for eachCoord in blockWallList:
            x = eachCoord[0]
            y = eachCoord[1]
            #self.addBlockWall(x,y)
        self.playerList = playerList
        self.trashList = trashList
        self.blockWallList = blockWallList    
    
    # Adds the player that's supposed to have this obvservation
    # Needs specific location
    def addPlayer(self, playerX, playerY):
        self.matrix[playerX][playerY][PLAYER_CHANNEL] = PLAYER_N
    
    #Adds a player that isn't this player
    # Needs specific location
    def addOtherPlayer(self, otherX, otherY):
        self.matrix[otherX][otherY][OTHERS_CHANNEL] = OTHERS_N
    
    #Adds a task item to the specified location
    def addTask(self, taskX, taskY):
        self.matrix[taskX][taskY][TRASH_CHANNEL] = TRASH_N
    
    def addBlockWall(self, blockWallX, blockWallY):
        self.matrix[blockWallX][blockWallY][BLIND_CHANNEL] = BLIND_N
    
    #Adds an area the agent cannot see
    def addBlindSpot(self, blindX, blindY):
        if blindX >= 0 and blindX < SIZE_X and blindY >=0 and blindY < SIZE_Y:
            self.matrix[blindX][blindY][PLAYER_CHANNEL] = 0
            self.matrix[blindX][blindY][OTHERS_CHANNEL] = 0
            self.matrix[blindX][blindY][TRASH_CHANNEL] = 0
            self.matrix[blindX][blindY][BLIND_CHANNEL] = BLIND_N
    
    #Removes everything from the observation
    def clear(self):
        self.matrix = np.zeros((self.sizeX, self.sizeY, CHANNEL_COUNT), dtype=np.int8)
    
    #Returns copy of map
    #Useful because this passes by value...
    #not by reference which python might do without this method
    def getEnvironment(self):
        return copy.deepcopy(self.matrix)
    
    #Returns what the agent sees given its viewType as a numpy array
    def getView(self, obsRange, viewType, agentID = -1, removePlayerPos = False, blindSpotList = None):
        for eachPlayer in self.playerList:
            if agentID == eachPlayer.ID:
                x = eachPlayer.x
                y = eachPlayer.y
        
        # To avoid typing "self." many times
        sizeX = self.sizeX
        sizeY = self.sizeY
        fullEnv = self.getEnvironment()
        
        #Changes main player on map to be the target agent
        fullEnv[x][y][PLAYER_CHANNEL] = PLAYER_N
        fullEnv[x][y][OTHERS_CHANNEL] = 0
        
        if blindSpotList != None:
            for eachSpot in blindSpotList:
                blindX = eachSpot[0]
                blindY = eachSpot[1]
                if blindX >= 0 and blindX < SIZE_X and blindY >=0 and blindY < SIZE_Y:
                    fullEnv[blindX][blindY][PLAYER_CHANNEL] = 0
                    fullEnv[blindX][blindY][OTHERS_CHANNEL] = 0
                    fullEnv[blindX][blindY][TRASH_CHANNEL] = 0
                    fullEnv[blindX][blindY][BLIND_CHANNEL] = BLIND_N
                
        #np.set_printoptions(threshold=sys.maxsize)
        #np.set_printoptions(threshold=1000)
        if viewType == "Local":
            # Creates a large matrix called "canvas" with -1 for all its values
            canvas = np.zeros((2*obsRange+sizeX+1, 2*obsRange+sizeY+1, CHANNEL_COUNT), dtype=np.int8)
            canvas = canvas-1

            # Pastes the full environment onto the center of the canvas
            canvas[obsRange:obsRange+sizeX,obsRange:obsRange+sizeY,:]=fullEnv
            
            # Returns a matrix containing the area around the agent
            viewMatrix=canvas[x:x+2*obsRange+1,y:y+2*obsRange+1,:]
            
            #viewMatrix = np.delete(viewMatrix, BLIND_CHANNEL, axis=2)#bookmark maybe need again
            if removePlayerPos:
                viewMatrix = np.delete(viewMatrix, PLAYER_CHANNEL, axis=2)
                
        elif viewType == "Relative":
            #creates 3D matrix the same size as env full of zeroes
            viewMatrix = np.zeros((sizeX,sizeY,CHANNEL_COUNT), dtype=np.int8)
            
            #Finds the borders of the agent's view
            left = x-obsRange
            right = x+obsRange+1
            up = y-obsRange
            down = y+obsRange+1
            #Keeps agent view from going outside environment
            if left < 0:
                left=0
            if right > sizeX:
                right = sizeX
            if up < 0:
                up = 0
            if down > sizeY:
                down = sizeY
                
            #Gets the agent's view as a matrix
            agentView=fullEnv[left:right,up:down,:]
            
            #Pastes the agents view onto the zero matrix
            viewMatrix[left:right,up:down,:] = agentView
            
            #viewMatrix = np.delete(viewMatrix, BLIND_CHANNEL, axis=2) #bookmark maybe need again later
            
        elif viewType == "Merge":
            ######### Local View
            # Creates a large matrix called "canvas" with -1 for all its values
            canvas = np.zeros((2*obsRange+sizeX+1, 2*obsRange+sizeY+1, CHANNEL_COUNT), dtype=np.int8)
            canvas = canvas-1

            # Pastes the full environment onto the center of the canvas
            canvas[obsRange:obsRange+sizeX,obsRange:obsRange+sizeY,:]=fullEnv

            # Returns a matrix containing the area around the agent
            viewMatrix=canvas[x:x+2*obsRange+1,y:y+2*obsRange+1,:]
            
            viewMatrix = np.delete(viewMatrix, BLIND_CHANNEL, axis=2)#blind channel added to relative view portion
            if removePlayerPos:
                viewMatrix = np.delete(viewMatrix, PLAYER_CHANNEL, axis=2)
                
            #####################
            
            ######### Relative View
            #creates 3D matrix the same size as env full of zeroes
            viewMatrix2 = np.zeros((sizeX,sizeY,CHANNEL_COUNT), dtype=np.int8)
            
            #Finds the borders of the agent's view
            left = x-obsRange
            right = x+obsRange+1
            up = y-obsRange
            down = y+obsRange+1
            #Keeps agent view from going outside environment
            if left < 0:
                left=0
            if right > sizeX:
                right = sizeX
            if up < 0:
                up = 0
            if down > sizeY:
                down = sizeY
                
            #Gets the agent's view as a matrix
            agentView=fullEnv[left:right,up:down,:]
            
            #Pastes the agents view onto the zero matrix
            viewMatrix2[left:right,up:down,:] = agentView
            viewMatrix2 = np.delete(viewMatrix2, TRASH_CHANNEL, axis=2)
            viewMatrix2 = np.delete(viewMatrix2, OTHERS_CHANNEL, axis=2)#Order of deletion matters
            #####################
            return [viewMatrix, viewMatrix2]
        return viewMatrix
    
    def printView(self, viewMatrix):#bookmark untested
        viewCopy = copy.deepcopy(viewMatrix)
        if viewCopy.ndim >= 3:
            viewCopy = np.swapaxes(viewCopy,1,2)
        if viewCopy.ndim >= 2:
            viewCopy = np.swapaxes(viewCopy,0,1)
        print(viewCopy)
    
    # Print observation array #obsolete due to added channels
    def printEnv(self):
        viewCopy = self.getEnvironment()
        if viewCopy.ndim >= 3:
            viewCopy = np.swapaxes(viewCopy,1,2)
        if viewCopy.ndim >= 2:
            viewCopy = np.swapaxes(viewCopy,0,1)
        print(viewCopy)
        
class WallManager:
    def __init__(self):
        self.wallList = []
        self.blockWallList = []
    
    def getWallList(self):
        return self.wallList
    
    def getWallsWithinView(self, agentX, agentY, obsRange):
        viewEdgeRight = agentX + obsRange
        viewEdgeLeft = agentX - obsRange
        viewEdgeTop = agentY + obsRange
        viewEdgeBot = agentY - obsRange
        wallsWithinViewList = []
        for eachPair in self.wallList:
            x1 = eachPair[0][0]
            y1 = eachPair[0][1]
            x2 = eachPair[1][0]
            y2 = eachPair[1][1]
            if (max(x1,x2) <= viewEdgeRight and 
                min(x1,x2) >= viewEdgeLeft and 
                max(y1,y2) <= viewEdgeTop and
                min(y1,y2) >= viewEdgeBot):
                wallsWithinViewList.append(eachPair)
        return wallsWithinViewList
    
    # Returns true if wall exists at the given coordinates
    # Returns false otherwise
    def wallExists(self, x1, y1, x2, y2):
        if len(self.wallList) == 0:
            return False
        else:
            for eachPair in self.wallList:
                pairX1 = eachPair[0][0]
                pairY1 = eachPair[0][1]
                pairX2 = eachPair[1][0]
                pairY2 = eachPair[1][1]
                if ((x1 == pairX1 and y1 == pairY1) and (x2 == pairX2 and y2 == pairY2)
                   or (x1 == pairX2 and y1 == pairY2) and (x2 == pairX1 and y2 == pairY1)):
                    return True
            return False
    
    def addBlock(self, x = -1, y = -1):
        self.addWall(x-1,y+1,"Horizontal",1)
        self.addWall(x-1,y+1,"Vertical",1)
        self.addWall(x-1,y,"Horizontal",1)
        self.addWall(x,y+1,"Vertical",1)        
        self.blockWallList.append([x,y])
    
    #Checks if a block exists in given x and y position
    #if x is not defined, checks if wall is present in a horizontal line at given y
    #if y is not defined, checks if wall is present in a vertical line at given x
    #if neither are defined, checks whole environment for a wall
    def blockExists(self, x=None, y=None):
        for blockWall in self.blockWallList:
            xWall = blockWall[0]
            yWall = blockWall[1]
            xMatch = False
            yMatch = False
            if (x is None) or (xWall == x):
                xMatch = True
            if (y is None) or (yWall == y):
                yMatch = True
            if xMatch and yMatch:
                return True
        return False    
    
    # Add wall or a line of walls in the designated coordinates
    # xTL and yTL are the coordinates to the top left portion of the wall
    # shape determines whether the wall will be drawn vertically or horizontally from point (xTL, yTL)
    # length determines the length of the wall
    def addWall(self, xTL=-1, yTL=-1, shape = "-1", length = -1):    
        if shape == "Vertical":
            for y in range(yTL-1,yTL-length-1,-1):
                self.wallList.append(([xTL, y], [xTL+1, y]))
        if shape == "Horizontal":
            for x in range(xTL+1,xTL+length+1):
                self.wallList.append(([x, yTL], [x, yTL-1]))
        #print(self.wallList)
    
class SpawnManager:
    def __init__(self):
        self.trashSpawnMap = np.zeros((SIZE_X, SIZE_Y))
        self.playerSpawnMap = np.zeros((SIZE_X, SIZE_Y))
        self.playerIDspawnMap = np.zeros((SIZE_X, SIZE_Y))
        self.playerIDspawnMap += -1
    
    # Makes 1 attempt at spawning a certain thing ("Player" or "Trash") at a certain location
    # returns True if spawn was successful, False otherwise
    def trySpawn(self, x=-1, y=-1, thing="-1", playerID = -1):
        randy = random.randint(1,101)
        if playerID != self.playerIDspawnMap[x][y] and "Static" in AGENT_SPAWN:
            return False
        elif ((thing == "Player" and randy <= self.playerSpawnMap[x][y]) or 
        (thing == "Trash" and randy <= self.trashSpawnMap[x][y])):
            return True
        return False
    
    def updateSpawnPlayerID(self, x, y, playerID):
        self.playerIDspawnMap[x][y] = playerID
    
    # Changes the probability of a certain thing ("Player", "Trash") to spawn at a certain location
    # chance can range from 0 to 100, describing the percent probability of something spawning there
    def updateSpawn(self, x, y, thing, chance):
        if thing == "Player":
            self.playerSpawnMap[x][y] = chance
        if thing == "Trash":
            self.trashSpawnMap[x][y] = chance

class BlindSpotManager:
    # Returns list of coordinates that the agent cannot see due to walls blocking view
    def getBlindSpots(self, agentX, agentY, obsRange, wallManager):
        visibleWallList = wallManager.getWallsWithinView(agentX, agentY, obsRange)
        totalBlindSpotList = []
        for eachVisibleWall in visibleWallList:
            # Get list of blind spots made by each wall
            blindSpotList = self.getBlindSpotsForOneWall(agentX, agentY, obsRange, eachVisibleWall)
            
            # Append all blind spots to 1 list
            totalBlindSpotList = totalBlindSpotList + blindSpotList
            
        #Remove duplicates
        totalBlindSpotList = list(dict.fromkeys(totalBlindSpotList))

        return totalBlindSpotList
    
    def getBlindSpotsForOneWall(self, agentX, agentY, agentObs, wallCoord):
        scale = 10
        
        obsRange = agentObs
        x1 = wallCoord[0][0]
        y1 = wallCoord[0][1]
        x2 = wallCoord[1][0]
        y2 = wallCoord[1][1]
        if x1 == x2:
            orientation = "Horizontal"
        elif y1==y2:
            orientation = "Vertical"
        
        
        verticalAlignment = self.wallLocationVertical(agentY, wallCoord)
        horizontalAlignment = self.wallLocationHorizontal(agentX, wallCoord)

        # Find the center of the grid space for the agent
        agentCenterX = agentX*scale-(scale/2)
        agentCenterY = agentY*scale-(scale/2)
        
        # Find the edges of the wall
        if orientation == "Horizontal":
            edgeX1 = x1*scale
            edgeY1 = min(y1, y2)*scale
            edgeX2 = x1*scale-scale
            edgeY2 = min(y1, y2)*scale
        if orientation == "Vertical":
            edgeX1 = min(x1,x2)*scale
            edgeY1 = y1*scale
            edgeX2 = min(x1,x2)*scale
            edgeY2 = y1*scale-scale
        
        slope1 = (edgeY1-agentCenterY)/(edgeX1-agentCenterX)
        slope2 = (edgeY2-agentCenterY)/(edgeX2-agentCenterX)
        
        #The 'b' in y=mx+b
        b1 = self.findYintercept(edgeX1, edgeY1, slope1)
        b2 = self.findYintercept(edgeX2, edgeY2, slope2)
        
        #minSlope is always more clockwise than maxSlope
        ccwSlope = max(slope1, slope2)
        cwSlope = min(slope1, slope2)
        
        #Assign 'b' values in y=mx+b to match the ccw and cw slopes
        if ccwSlope == slope1:
            ccwB = b1
            cwB = b2
        else:
            ccwB = b2
            cwB = b1
            
        blindCoords = []
        #Iterate over every xy coordinate with agent's view range
        for y in range(agentY-obsRange, agentY+obsRange+1):
            for x in range(agentX-obsRange, agentX+obsRange+1):
                centerX = x*scale-scale/2
                centerY = y*scale-scale/2
                cwStatus = self.aboveOrBelowLine(centerX,centerY,cwSlope, cwB)
                ccwStatus = self.aboveOrBelowLine(centerX,centerY,ccwSlope, ccwB)
                
                if horizontalAlignment == "left":
                    if (cwStatus == "below" or cwStatus == "neither") and (ccwStatus == "above" or ccwStatus == "neither"):            
                        if x <= min(x1, x2):
                            if verticalAlignment == "above":
                                if y >= max(y1,y2):
                                    blindCoords.append((x,y))
                            elif verticalAlignment == "below":
                                if y <= min(y1,y2):
                                    blindCoords.append((x,y))
                            else:
                                blindCoords.append((x,y))
                    
                if horizontalAlignment == "right":
                    if (cwStatus == "above" or cwStatus == "neither") and (ccwStatus == "below" or ccwStatus == "neither"):
                        if x >= max(x1, x2):
                            if verticalAlignment == "above":
                                if y >= max(y1,y2):
                                    blindCoords.append((x,y))
                            elif verticalAlignment == "below":
                                if y <= min(y1,y2):
                                    blindCoords.append((x,y))
                            else:
                                blindCoords.append((x,y))
                
                if horizontalAlignment == "neutral":
                    if verticalAlignment == "above":
                        if (cwStatus == "above" or cwStatus == "neither") and (ccwStatus == "above" or ccwStatus == "neither"):
                            if y >= max(y1, y2):
                                blindCoords.append((x,y))
                    if verticalAlignment == "below":
                        if (cwStatus == "below" or cwStatus == "neither") and (ccwStatus == "below" or ccwStatus == "neither"):
                            if y <= min(y1, y2):
                                blindCoords.append((x,y))
        return blindCoords
    
    def findYintercept(self, x, y, slope):
        return y - slope*x
    
    # Checks if the given x,y coordinates are above or below a line
    # line is defined as y=mx+b where m=slope and b=b in the arguments
    def aboveOrBelowLine(self, x, y, slope, b):
        lineThickness = 0#higher the number, more likely "neither" will be returned when the given coordinates are closer to the line
        
        lineY = (slope*x)+b#y=mx+b
        
        if abs(y-lineY)<=lineThickness:
            return "neither"
        if y > lineY:
            return "above"
        if y < lineY:
            return "below"
        
    def wallLocationHorizontal(self, agentX, wallCoord):
        x1 = wallCoord[0][0]
        x2 = wallCoord[1][0]
        
        if x1==x2:
            if x1 < agentX:
                return "left"
            if x1 > agentX:
                return "right"
            if x1 == agentX:
                return "neutral"
        else:
            if max(x1,x2) > agentX:
                return "right"
            elif min(x1,x2) < agentX:
                return "left"
    
    def wallLocationVertical(self, agentY, wallCoord):
        y1 = wallCoord[0][1]
        y2 = wallCoord[1][1]
        
        if y1==y2:
            if y1 < agentY:
                return "below"
            if y1 > agentY:
                return "above"
            if y1 == agentY:
                return "neutral"
        else:
            if max(y1,y2) > agentY:
                return "above"
            elif min(y1,y2) < agentY:
                return "below"
            
class CSVmanager:
    def __init__(self, noBlindList = True):
        self.blindListDisabled = noBlindList
        self.filePath = f"recordings/{ID}_{MODEL_NAME}/{ID}_{MODEL_NAME}_csvData/"
        self.fileNameList = ["agentList.csv", "blindList.csv", "wallList.csv", "taskList.csv"]
        self.fileNameAndPathList =[]
        for eachFileName in self.fileNameList:
            self.fileNameAndPathList.append(self.filePath+eachFileName)
            
        if not os.path.isdir(self.filePath):
            os.makedirs(self.filePath)
        
        self.agentListHeader = ("Episode","Step","AgentID","X","Y","Event")
        self.blindListHeader = ("Episode","Step","AgentID","X","Y")
        self.wallListHeader = ("X1","Y1","X2","Y2")
        self.taskListHeader = ("Episode", "Step", "X", "Y", "TaskExists")
        
        self.write(self.agentListHeader, 0)
        self.write(self.blindListHeader, 1)
        self.write(self.wallListHeader, 2)
        self.write(self.taskListHeader, 3)
    
    #stringID: "agentList.csv", "blindList.csv", "wallList.csv", "taskList.csv"
    #intID: 0, 1, 2, 3
    def write(self, data, intID = -1, stringID = "-1"):
        if stringID != "-1":
            i = 0
            for eachString in self.fileNameList:
                if eachString == stringID:
                    intID = i
                i+=1
        if intID != -1 and not (self.blindListDisabled and intID == 1):
            dataString = ""
            for eachItem in data:
                dataString+=f"{eachItem},"
            dataString = f"{dataString[:-1]}\n"
            
            with open(self.fileNameAndPathList[intID], 'a', encoding='utf8') as csvFile:
                csvFile.write(dataString)       

In [5]:
class RoombaEnv: 
    def __init__(self):
        self.episode = 0
        self.playerList=[]
        self.trashList=[]
        self.blockWallList = []
        self.envMap = MapManager()
        self.wallMap = WallManager()
        self.spawnMap = SpawnManager()
        self.blindMap = BlindSpotManager()
        self.csv = CSVmanager()
        
        #Define map walls here
        if WALL_ARRANGEMENT == "Six Rooms":
            self.wallMap.addWall(10,20,"Vertical",9)
            self.wallMap.addWall(23,20,"Vertical",9)
            self.wallMap.addWall(10,9,"Vertical",9)
            self.wallMap.addWall(23,9,"Vertical",9)

            self.wallMap.addWall(-1,9,"Horizontal",5)
            self.wallMap.addWall(5,9,"Horizontal",11)
            self.wallMap.addWall(17,9,"Horizontal",11)
            self.wallMap.addWall(29,9,"Horizontal",5)

            self.wallMap.addWall(-1,11,"Horizontal",5)
            self.wallMap.addWall(5,11,"Horizontal",11)
            self.wallMap.addWall(17,11,"Horizontal",11)
            self.wallMap.addWall(29,11,"Horizontal",5)
        
        if WALL_ARRANGEMENT == "Four Room Block Wall":
            for y in range(0,9):
                self.wallMap.addBlock(17,y)
            for y in range(11,20):
                self.wallMap.addBlock(17,y)
            for x in range(0, 8):
                self.wallMap.addBlock(x,8)
                self.wallMap.addBlock(x,11)
            for x in range(9, 26):
                self.wallMap.addBlock(x,8)
                self.wallMap.addBlock(x,11)
            for x in range(27, 35):
                self.wallMap.addBlock(x,8)
                self.wallMap.addBlock(x,11)              
            
        if WALL_ARRANGEMENT == "Custom":
            self.wallMap.addWall(SIZE_X//2-1,SIZE_Y//2+2,"Vertical",2)
            self.wallMap.addWall(SIZE_X//2,SIZE_Y//2+1,"Vertical",1)
            self.wallMap.addWall(SIZE_X//2,SIZE_Y//2+4,"Vertical",2)
            self.wallMap.addWall(SIZE_X//2-1,SIZE_Y//2,"Horizontal",1)
            self.wallMap.addWall(SIZE_X//2-1,SIZE_Y//2+2,"Horizontal",1)
            self.wallMap.addWall(SIZE_X//2,SIZE_Y//2+1,"Horizontal",1)
            self.wallMap.addWall(SIZE_X//2+1,SIZE_Y//2+3,"Vertical",2)
            self.wallMap.addWall(SIZE_X//2,SIZE_Y//2+4,"Horizontal",1)
            
            #small room with 1 exit in center
            #self.wallMap.addWall(SIZE_X//2-2,SIZE_Y//2+2,"Vertical",3)
            #self.wallMap.addWall(SIZE_X//2+1,SIZE_Y//2+2,"Vertical",1)
            #self.wallMap.addWall(SIZE_X//2+1,SIZE_Y//2,"Vertical",1)
            #self.wallMap.addWall(SIZE_X//2-2,SIZE_Y//2-1,"Horizontal",3)
            #self.wallMap.addWall(SIZE_X//2-2,SIZE_Y//2+2,"Horizontal",3)
            
            #2 horizontal lines in center
            #self.wallMap.addWall(SIZE_X//2-3,SIZE_Y//2+1,"Horizontal",5)
            #self.wallMap.addWall(SIZE_X//2-3,SIZE_Y//2,"Horizontal",5)
            
            #2 vertical lines in center
            #self.wallMap.addWall(SIZE_X//2-1,SIZE_Y//2+3,"Vertical",5)
            #self.wallMap.addWall(SIZE_X//2,SIZE_Y//2+3,"Vertical",5)
        
        if PRINT_CSV:
            wallList = self.wallMap.getWallList()
            for eachPair in wallList:
                x1Str = str(eachPair[0][0])
                y1Str = str(eachPair[0][1])
                x2Str = str(eachPair[1][0])
                y2Str = str(eachPair[1][1])
                csvData = (x1Str, y1Str, x2Str, y2Str)
                self.csv.write(csvData,2)
            
        #Define spawn chances here
        if AGENT_SPAWN == "4Room Center 8Agent Static":
            self.spawnMap.updateSpawnPlayerID(15,10,0)
            self.spawnMap.updateSpawnPlayerID(15,9,1)
            self.spawnMap.updateSpawnPlayerID(16,10,2)
            self.spawnMap.updateSpawnPlayerID(16,9,3)
            self.spawnMap.updateSpawnPlayerID(18,10,4)
            self.spawnMap.updateSpawnPlayerID(18,9,5)
            self.spawnMap.updateSpawnPlayerID(19,10,6)
            self.spawnMap.updateSpawnPlayerID(19,9,7)
        for x in range(0,SIZE_X):
            for y in range(0,SIZE_Y):
                if not self.wallMap.blockExists(x,y):
                    if AGENT_SPAWN == "4Room Center 8Agent Static":
                        #construction
                        if (x >= 15 and x <= 19) and (y >= 9 and y <= 10):
                            self.spawnMap.updateSpawn(x,y,"Player", 100)
                    
                    if AGENT_SPAWN == "4Room Center":
                        if (x >= 11 and x <= 23) and (y >= 9 and y <= 10):
                            self.spawnMap.updateSpawn(x,y,"Player", 100)
                    if TASK_SPAWN == "4Room Each Room":
                        if y < 8 or y > 11:
                            self.spawnMap.updateSpawn(x,y,"Trash", 100)
                        
                    if TASK_SPAWN == "4Room 1Room Doubled":
                        if y < 8 or y > 11:
                            self.spawnMap.updateSpawn(x,y,"Trash", 50)
                        if (y < 8) and (x < 17):
                            self.spawnMap.updateSpawn(x,y,"Trash", 100)
                            
                    if AGENT_SPAWN == "Center":
                        if (x >= SIZE_X/2-2 and x <= SIZE_X/2+2) and (y >= SIZE_Y/2-2 and y <= SIZE_Y/2+2):
                            self.spawnMap.updateSpawn(x,y,"Player", 100)
                    if TASK_SPAWN == "Not Center":
                        if x < SIZE_X/2-2 or x > SIZE_X/2+2 or y < SIZE_Y/2-2 or y > SIZE_Y/2+2:
                            self.spawnMap.updateSpawn(x,y,"Trash", 100)

                    if AGENT_SPAWN == "Corridor":
                        if y > SIZE_Y/2-1 and y < SIZE_Y/2+1:
                            self.spawnMap.updateSpawn(x,y,"Player", 100)
                    if TASK_SPAWN == "Not Corridor":
                        if y < SIZE_Y/2-1 or y >= SIZE_Y/2+1:
                            self.spawnMap.updateSpawn(x,y,"Trash", 100)

                    if TASK_SPAWN == "Not Each Room":
                        eachRoomCoord = [
                            (5, 5),
                            (18, 5),
                            (29, 5),
                            (5, 15),
                            (18, 15),
                            (29, 15),
                            (0, 10),
                            (34, 9)
                        ]
                        match = False
                        for eachCoord in eachRoomCoord:
                            noSpawnX = eachCoord[0]
                            noSpawnY = eachCoord[1]
                            if noSpawnX == x and noSpawnY == y:
                                match = True
                        if not match:
                            self.spawnMap.updateSpawn(x,y,"Trash", 100)

                    if AGENT_SPAWN == "Random":
                        self.spawnMap.updateSpawn(x,y,"Player", 100)
                    if TASK_SPAWN == "Random":
                        self.spawnMap.updateSpawn(x,y,"Trash", 100)
                    
        if AGENT_SPAWN == "Each Room":
            eachRoomCoord = [
                (5, 5),
                (18, 5),
                (29, 5),
                (5, 15),
                (18, 15),
                (29, 15),
                (0, 10),
                (34, 9)
            ]
            for eachCoord in eachRoomCoord:
                x = eachCoord[0]
                y = eachCoord[1]
                self.spawnMap.updateSpawn(x,y,"Player", 100)
                
        if TASK_SPAWN == "Along Walls":            
            #Spawns trash next to walls            
            wallCoords = self.wallMap.wallList
            for eachPair in wallCoords:
                pairX1 = eachPair[0][0]
                pairY1 = eachPair[0][1]
                pairX2 = eachPair[1][0]
                pairY2 = eachPair[1][1]
                self.spawnMap.updateSpawn(pairX1,pairY1,"Trash", 100)
                self.spawnMap.updateSpawn(pairX2,pairY2,"Trash", 100)
                #print(f"({pairX1},{pairY1}) ({pairX2},{pairY2})")
        if AGENT_SPAWN == "Custom":
            self.spawnMap.updateSpawn(SIZE_X//2,SIZE_Y//2,"Player", 100)
        #if TASK_SPAWN == "Custom":
            
    def reset(self):
        #Creates players in spawnable locations
        self.playerList = [None] * PLAYER_COUNT
        playerIDlist = list(range(PLAYER_COUNT))
        random.shuffle(playerIDlist)
        for eachID in playerIDlist:
            retry = True
            while retry:
                retry = False
                player = Roomba(obsRange = OBS_RANGE, viewType = VIEW_TYPE, ID=eachID)
                
                # Checks for duplicates
                for eachPlayer in self.playerList:
                    if not eachPlayer is None:
                        if player == eachPlayer:
                            retry = True
                
                # Checks if spawn was successful given probability of spawning in given location
                if not self.spawnMap.trySpawn(player.x, player.y, "Player", eachID):
                    retry = True
                    
            self.playerList[eachID] = copy.deepcopy(player)
       
        #Creates a list of trash with each trash having a unique location
        self.trashList=[None]*TRASH_COUNT
        trashIDlist = list(range(TRASH_COUNT))
        random.shuffle(trashIDlist)
        for eachID in trashIDlist:
            retry = True
            while retry:
                trash = Trash()
                retry = False
            
                # Checks for duplicates
                for eachTrash in self.trashList:
                    if not eachTrash is None:
                        for eachPlayer in self.playerList:
                            if trash == eachTrash or trash == eachPlayer:
                                retry = True
            
                # Checks if spawn was successful given probability of spawning in given location
                if not self.spawnMap.trySpawn(trash.x, trash.y, "Trash"):
                    retry = True
               
            self.trashList[eachID] = copy.deepcopy(trash)
            if PRINT_CSV:
                csvData = (self.episode, self.playerList[0].stepsTaken, trash.x, trash.y, "T")
                self.csv.write(csvData,3)
        #Resets episode counter
        self.episode_step = 0
        
        self.envMap = MapManager()
        self.envMap.update(self.playerList, self.trashList, self.wallMap.blockWallList)
        return self.envMap.getEnvironment()
    
    #Action is a number: 0,1,2,3 that represents an agent's action
    def step(self, actionList):
        self.episode_step += 1
        
        #Initialize lists with None values because agent data will be added in random order
        rewardList = [None] * PLAYER_COUNT    
    
        agentIDlist = list(range(PLAYER_COUNT))
        random.shuffle(agentIDlist)#randomizes action order of agents
        for eachAgentID in agentIDlist:
            i = eachAgentID
            
            #One player tries to move to another space
            self.playerList[i].action(actionList[i])
            
            #As a default, assumes agent moved and assigns reward accordingly
            reward = MOVE_PENALTY
            self.playerList[i].event = "Move"
            
            #Handles agents reaching environment edges
            if self.playerList[i].x < 0:
                self.playerList[i].x = 0
                reward = WALL_COLLISION_PENALTY
                self.playerList[i].event = "WallC"
                self.playerList[i].wallCollisionCount+=1
            elif self.playerList[i].x > SIZE_X-1:
                self.playerList[i].x = SIZE_X-1
                reward = WALL_COLLISION_PENALTY
                self.playerList[i].event = "WallC"
                self.playerList[i].wallCollisionCount+=1
            elif self.playerList[i].y < 0:
                self.playerList[i].y = 0
                reward = WALL_COLLISION_PENALTY
                self.playerList[i].event = "WallC"
                self.playerList[i].wallCollisionCount+=1
            elif self.playerList[i].y > SIZE_Y-1:
                self.playerList[i].y = SIZE_Y-1
                reward = WALL_COLLISION_PENALTY
                self.playerList[i].event = "WallC"
                self.playerList[i].wallCollisionCount+=1
            else:
                #Handles wall and agent collisions
                for eachPlayer in self.playerList:
                    if (self.playerList[i] == eachPlayer and self.playerList[i].ID != eachPlayer.ID):
                        reward = AGENT_COLLISION_PENALTY
                        self.playerList[i].event = "AgentC"
                        self.playerList[i].undo()
                        self.playerList[i].agentCollisionCount+=1
                    elif (self.wallMap.wallExists(eachPlayer.x, eachPlayer.y, eachPlayer.xBefore, eachPlayer.yBefore)):
                        reward = WALL_COLLISION_PENALTY
                        self.playerList[i].event = "WallC"
                        self.playerList[i].undo()
                        self.playerList[i].wallCollisionCount+=1
            
            #Overwrites reward if reaching trash
            #Removes any trash on players position
            for eachTrash in self.trashList:
                if self.playerList[i] == eachTrash:
                    self.playerList[i].tasksCompleted+=1
                    reward = TRASH_REWARD
                    self.playerList[i].event = "Task"
                    self.trashList.remove(eachTrash)
                eachTrash.count += 1#Increment counter for all existing trash
    
            #Saves data from this step for current player (except newObservations)
            rewardList[i] = reward
            
        #Respawns any trash removed
        while TRASH_COUNT > len(self.trashList):
            newTrash = Trash()
            retry = False
            
            #Checks if trash to spawn is in an empty space
            for eachTrash in self.trashList:
                for eachPlayer in self.playerList:
                    if newTrash == eachTrash or newTrash == eachPlayer:
                        retry = True
            
            # Checks if spawn was successful given probability of spawning in given location
            if not self.spawnMap.trySpawn(newTrash.x, newTrash.y, "Trash"):
                retry = True
            
            if not retry:
                self.trashList.append(newTrash)
                if PRINT_CSV:
                    csvData = (self.episode, self.playerList[0].stepsTaken, newTrash.x, newTrash.y, "T")
                    self.csv.write(csvData,3)
        
        # Creates a map of the environment
        self.envMap = MapManager()
        self.envMap.update(self.playerList, self.trashList, self.wallMap.blockWallList)
        
        # Checks if new observations shouldn't have main player position
        removePlayer = False
        if (VIEW_TYPE == "Local" or VIEW_TYPE == "Merge") and REMOVE_PLAYER_FROM_LOCAL_VIEW:
            removePlayer = True
        
        #Agents take an action and the environment and each agents view of the environment is updated
        newObservationList = [None]*PLAYER_COUNT
        
        agentIDlist = list(range(PLAYER_COUNT))
        random.shuffle(agentIDlist)#randomizes action order of agents      
        #Populates newObservationList
        for eachAgentID in agentIDlist:
            eachPlayer = self.playerList[eachAgentID]
            if WALLS_BLOCK_VIEW:
                blindList = self.blindMap.getBlindSpots(eachPlayer.x, eachPlayer.y, eachPlayer.obsRange, self.wallMap)
            else:
                blindList = None
            
            if PRINT_CSV and WALLS_BLOCK_VIEW:
                for eachSpot in blindList:
                    currentEpisode = str(self.episode)
                    currentStep = str(self.episode_step)
                    agentID = str(eachPlayer.ID)
                    blindX = str(eachSpot[0])
                    blindY = str(eachSpot[1])
                    csvData = (currentEpisode, currentStep, agentID, blindX, blindY)
                    self.csv.write(csvData,1)
                    
            #Create a new observation for each agent and append to list
            newObservationList[eachAgentID] = copy.deepcopy(self.envMap.getView(OBS_RANGE, VIEW_TYPE, eachPlayer.ID, removePlayerPos = removePlayer, blindSpotList = blindList))
        
        #Checks if this episode is complete
        done = False
        if self.episode_step >= EPISODE_STEP_LIMIT:
            done = True       
            
        # Returns new agent observations, whether this episode finished, and any data to be recorded for tensorflow
        return newObservationList, rewardList, done

In [6]:
class ScottDQNAgent:
    def __init__(self):  
        # An array with last n steps for training
        self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
        
        # Used to count when to update target network with main network's weights
        self.target_update_counter = 0
        self.timeToAverageWeights = False
    def setModel(self, model):
        # Main model
        self.model = model
        
        # Target network
        self.target_model = get_single_model()
        self.target_model.set_weights(self.model.get_weights())
        
    def create_model(self):
        viewType = VIEW_TYPE
        model = Sequential()
        
        model.add(Conv2D(32, (2, 2), padding="same",input_shape=OBSERVATION_SPACE_VALUES))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
        #model.add(Dropout(0.2))

        model.add(Conv2D(64, (2, 2), padding="same"))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
        #model.add(Dropout(0.2))

        #https://stackoverflow.com/questions/43237124/what-is-the-role-of-flatten-in-keras
        model.add(Flatten(input_shape=(5, 5, 16)))
        model.add(Dense(100))
        model.add(Activation('relu'))
        model.add(Dense(ACTION_SPACE_SIZE, activation='linear'))  # ACTION_SPACE_SIZE = how many choices (4)
        """
        rmsProp = optimizers.RMSprop(lr=0.001, 
                                     rho=0.9, 
                                     epsilon=None, 
                                     decay=0.0)
        """
        rmsProp = optimizers.RMSprop(RMSPROP_LEARNING_RATE)
        model.compile(loss="mse", optimizer=rmsProp, metrics=['accuracy'])
        
        
        return model
    
    # Adds step's data to a memory replay array
    # (observation space, action, reward, new observation space, done)
    def update_replay_memory(self, transition):
        self.replay_memory.append(transition)

    # Trains main network every step during episode
    def train(self, terminal_state, step):

        # Start training only if certain number of samples is already saved
        if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
            return

        # Get a minibatch of random samples from memory replay table
        minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)

        # Get current states from minibatch, then query NN model for Q values
        current_states = np.array([transition[0] for transition in minibatch])
        current_qs_list = self.model.predict(current_states)

        # Get future states from minibatch, then query NN model for Q values
        # When using target network, query it, otherwise main network should be queried
        new_current_states = np.array([transition[3] for transition in minibatch])
        future_qs_list = self.target_model.predict(new_current_states)

        X = []
        y = []

        # Now we need to enumerate our batches
        for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):

            # If not a terminal state, get new q from future states, otherwise set it to 0
            # almost like with Q Learning, but we use just part of equation here
            if not done:
                max_future_q = np.max(future_qs_list[index])
                new_q = reward + DISCOUNT * max_future_q
            else:
                new_q = reward

            # Update Q value for given state
            current_qs = current_qs_list[index]
            current_qs[action] = new_q

            # And append to our training data
            X.append(current_state)
            y.append(current_qs)
        
        self.model.fit(np.array(X), np.array(y), batch_size=MINIBATCH_SIZE, verbose=0, shuffle=False)
        
        # Update target network counter every episode
        if terminal_state:
            self.target_update_counter += 1

        # If counter reaches set value, update target network with weights of main network
        if self.target_update_counter > UPDATE_TARGET_EVERY:
            #self.target_model.set_weights(self.model.get_weights())
            self.timeToAverageWeights = True
            self.target_update_counter = 0

    # Queries main network for Q values given current observation space (environment state)
    def get_qs(self, state):
        return self.model.predict(np.array(state).reshape(-1, *state.shape))[0]
    
#view range: view range
#num: number of agents
#input dim: number of channels
#creates a set of models that share the same CNN/Maxpool layers
def get_model():
    model = []
    inputs = Input(shape=OBSERVATION_SPACE_VALUES)
    x = Conv2D(32, kernel_size=3, activation='relu', data_format="channels_last")(inputs)
    x = Conv2D(64, kernel_size=2, activation='relu', data_format="channels_last")(x)
    x = Flatten()(x)
    for i in range(PLAYER_COUNT):
        x1 = Dense(32, activation='relu')(x)
        x1 = Dense(16, activation='relu')(x1)
        y1 = Dense(5)(x1)
        model1 = Model(inputs=inputs, outputs=y1)
        model1.compile(loss='mse', optimizer=keras.optimizers.Adam(lr=LEARNING_RATE))
        # model1.summary()
        model.append(model1)
    return model

def get_single_model():
    inputs = Input(shape=OBSERVATION_SPACE_VALUES)
    x = Conv2D(32, kernel_size=3, activation='relu', data_format="channels_last")(inputs)
    x = Conv2D(64, kernel_size=2, activation='relu', data_format="channels_last")(x)
    x = Flatten()(x)
    x1 = Dense(32, activation='relu')(x)
    x1 = Dense(16, activation='relu')(x1)
    y1 = Dense(5)(x1)
    model1 = Model(inputs=inputs, outputs=y1)
    model1.compile(loss='mse', optimizer=keras.optimizers.Adam(lr=LEARNING_RATE))
    # model1.summary()
    return model1

#gets a list of DQNagents and updates target weights
def average_weights(agent_group):
    n_layers = len(agent_group[0].model.get_weights())
    avg_model_weights = list()
    weights = [1/len(agent_group) for j in range(len(agent_group))]
    for layer in range(n_layers):
        layer_weights = np.array([agent.model.get_weights()[layer] for agent in agent_group])
        avg_layer_weights = np.average(layer_weights, axis=0, weights=weights)
        avg_model_weights.append(avg_layer_weights)
    for i in range(len(agent_group)):
        agent_group[i].target_model.set_weights(avg_model_weights)

In [7]:
env = RoombaEnv()
# Agent class
class DQNAgent:
    def __init__(self, loadModelID = -1):
            
        # Main model
        self.model = self.create_model(loadModelID)

        # Target network
        self.target_model = self.create_model(loadModelID = loadModelID, target = True)
        self.target_model.set_weights(self.model.get_weights())

        # An array with last n steps for training
        if LOAD_MODEL is not None:
            dequeData = np.load(f"recordings/{LOAD_MODEL}/models/{LOAD_MODEL}{loadModelID}.npy", allow_pickle = True)
            replayMem = deque(dequeData, maxlen =REPLAY_MEMORY_SIZE)
        else:
            replayMem = deque(maxlen=REPLAY_MEMORY_SIZE)
        self.replay_memory = replayMem
        
        # Used to count when to update target network with main network's weights
        self.target_update_counter = 0
    
    def printModelDiagram(self, fileName = 'model.png'):
        plot_model(self.model, show_shapes = True, to_file=f"recordings/{ID}_{MODEL_NAME}/{fileName}")
    
    def create_model(self, loadModelID = -1, target= False):
        if LOAD_MODEL is not None:
            if target:
                modelPath = f"recordings/{LOAD_MODEL}/models/{LOAD_MODEL}{loadModelID}_target.model"
            else:
                modelPath = f"recordings/{LOAD_MODEL}/models/{LOAD_MODEL}{loadModelID}.model"
            model = keras.models.load_model(modelPath)
            return model
        
        if VIEW_TYPE == "Merge":
            localInput = Input(shape=OBSERVATION_SPACE_VALUES)
            x = Conv2D(32, (2, 2), padding="same", data_format = "channels_last", input_shape=OBSERVATION_SPACE_VALUES)(localInput)
            x = MaxPooling2D(pool_size=(2, 2), strides=2)(x)
            x = Conv2D(32, (2, 2), padding="same")(x)
            x = MaxPooling2D(pool_size=(2, 2), strides=2)(x)
            localModelEnd = Flatten()(x)
            
            relativeInput = Input(shape=OBSERVATION_SPACE_VALUES_TWO)
            x = Conv2D(32, (2, 2), padding="same", input_shape=OBSERVATION_SPACE_VALUES_TWO)(relativeInput)
            x = MaxPooling2D(pool_size=(2, 2), strides=2)(x)
            x = Conv2D(32, (2, 2), padding="same")(x)
            x = MaxPooling2D(pool_size=(2, 2), strides=2)(x)
            relativeModelEnd = Flatten()(x)
            
            x = keras.layers.concatenate([localModelEnd, relativeModelEnd])
            x = Dense(100, activation='relu')(x)
            output = Dense(ACTION_SPACE_SIZE, activation='linear')(x)
            
            model = Model(inputs=[localInput, relativeInput], outputs = [output])
            rmsProp = optimizers.RMSprop(LEARNING_RATE)
            model.compile(loss="mse", optimizer=rmsProp, metrics=['accuracy'])
            #plot_model(model, to_file='model.png')
        else:
            model = Sequential()
            model.add(Conv2D(32, (2, 2), padding="same",input_shape=OBSERVATION_SPACE_VALUES))
            model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
            #model.add(Dropout(0.2))

            model.add(Conv2D(64, (2, 2), padding="same"))
            model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
            #model.add(Dropout(0.2))

            #https://stackoverflow.com/questions/43237124/what-is-the-role-of-flatten-in-keras
            model.add(Flatten(input_shape=(5, 5, 16)))
            model.add(Dense(100))
            model.add(Activation('relu'))
            model.add(Dense(ACTION_SPACE_SIZE, activation='linear'))  # ACTION_SPACE_SIZE = how many choices (4)
            """
            rmsProp = optimizers.RMSprop(lr=LEARNING_RATE, #lr=0.001 was default value
                                         rho=0.9, 
                                         epsilon=None, 
                                         decay=0.0)  
            """
            rmsProp = optimizers.RMSprop(LEARNING_RATE)
            model.compile(loss="mse", optimizer=rmsProp, metrics=['accuracy'])

        return copy.deepcopy(model)
    
    # Adds step's data to a memory replay array
    # (observation space, action, reward, new observation space, done)
    def update_replay_memory(self, transition):
        self.replay_memory.append(copy.deepcopy(transition))

    # Trains main network every step during episode
    def train(self, terminal_state, step):

        # Start training only if certain number of samples is already saved
        if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
            return
     
        # Each minibatch is a list of transitions
        # Each transition is a list containing the following:
        # transition[0] current state
        # transition[1] action
        # transition[2] reward
        # transition[3] new state
        # transition[4] done
        # Get a minibatch of random samples from memory replay table
        minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
        
        # Get current states from minibatch, then query NN model for Q values    
        if VIEW_TYPE == "Merge":
            localCurrentStates = np.array([transition[0][0] for transition in minibatch])
            relativeCurrentStates = np.array([transition[0][1] for transition in minibatch])
            #relativeCurrentStates = np.expand_dims(relativeCurrentStates, axis=3) bookmark attempted fix
            current_states = [localCurrentStates, relativeCurrentStates]
        else:
            current_states = np.array([transition[0] for transition in minibatch])
        current_qs_list = self.model.predict(current_states)

        # Get future states from minibatch, then query NN model for Q values
        # When using target network, query it, otherwise main network should be queried
        if VIEW_TYPE == "Merge":
            localCurrentStates = np.array([transition[3][0] for transition in minibatch])
            relativeCurrentStates = np.array([transition[3][1] for transition in minibatch])
            #relativeCurrentStates = np.expand_dims(relativeCurrentStates, axis=3) bookmark attempted fix
            new_current_states = [localCurrentStates, relativeCurrentStates]
        else:
            new_current_states = np.array([transition[3] for transition in minibatch])
        future_qs_list = self.target_model.predict(new_current_states)

        X = [] #Stores each state, e.g., environment image, agent view
        localX = []
        relativeX = []
        
        y = [] #Stores action taken on a given state, e.g., up, down, left, right

        # Now we need to enumerate our batches
        for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):

            # If not a terminal state, get new q from future states, otherwise set it to 0
            # almost like with Q Learning, but we use just part of equation here
            if not done:
                max_future_q = np.max(future_qs_list[index])
                new_q = reward + DISCOUNT * max_future_q
            else:
                new_q = reward

            # Update Q value for given state
            current_qs = current_qs_list[index]
            current_qs[action] = new_q
            
            # And append to our training data
            if VIEW_TYPE == "Merge":
                localState = np.array(current_state[0])
                relativeState = np.array(current_state[1])
                #relativeState = np.expand_dims(relativeState, axis=2)bookmark attempted fix
                localX.append(localState)
                relativeX.append(relativeState)
            else:
                X.append(current_state)
            y.append(current_qs)
            
        if VIEW_TYPE == "Merge":
            self.model.fit([localX, relativeX], np.array(y), batch_size=MINIBATCH_SIZE, verbose=0, shuffle=False)
        else:
            self.model.fit(np.array(X), np.array(y), batch_size=MINIBATCH_SIZE, verbose=0, shuffle=False)
        
        # Update target network counter every episode
        if terminal_state:
            self.target_update_counter += 1

        # If counter reaches set value, update target network with weights of main network
        if self.target_update_counter > UPDATE_TARGET_EVERY:
            self.target_model.set_weights(self.model.get_weights())
            self.target_update_counter = 0

    # Queries main network for Q values given current observation space (environment state)
    def get_qs(self, state):
        if VIEW_TYPE == "Merge":
            localViewState = state[0]
            relativeViewState = state[1]
            localViewInput = np.array(localViewState).reshape(-1, *localViewState.shape)
            relativeViewInput = np.array(relativeViewState).reshape(-1, *relativeViewState.shape)
            #print(relativeViewInput.shape)#bookmark possible fix for an issue
            #relativeViewInput = np.expand_dims(relativeViewInput, axis=3)
            #print(relativeViewInput.shape)#
            return self.model.predict([localViewInput, relativeViewInput])[0]
        else:
            return self.model.predict(np.array(state).reshape(-1, *state.shape))[0]

In [8]:
# For more repetitive results
if RANDOM_SEED != -1:
    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    #tf.random.set_seed(RANDOM_SEED)
    tf.set_random_seed(RANDOM_SEED)

In [None]:
#Creates list of DQNs for agents
agentList = [None]*PLAYER_COUNT 
agentIDlist = list(range(PLAYER_COUNT))
random.shuffle(agentIDlist)
if USE_SCOTT_MODEL:
    modelList = get_model()

for eachPlayerID in agentIDlist:
    loadModelPlayerID = eachPlayerID
    if not USE_SCOTT_MODEL:
        agentList[eachPlayerID] = copy.deepcopy(DQNAgent(loadModelPlayerID))
    else:
        newAgent = ScottDQNAgent()
        newAgent.setModel(modelList[loadModelPlayerID])
        newAgent.replay_memory = GLOBAL_REPLAY_MEMORY
        agentList[eachPlayerID] = copy.deepcopy(newAgent)
    
# Creates a text file with the parameters for this run
if RECORD_PARAMETERS:
    paramPath = f'recordings/{ID}_{MODEL_NAME}/'
    fileName = str(MODEL_NAME) + "-" + str(ID) + ".txt"
    fileContents = f"""
        #Parameters
        #Misc Parameters
        ID = {ID}
        MODEL_NAME = {MODEL_NAME}
        RANDOM_SEED = {RANDOM_SEED}
        OBSERVATION_SPACE_VALUES = {OBSERVATION_SPACE_VALUES}
        ACTION_SPACE_SIZE = {ACTION_SPACE_SIZE}

        #Model Settings
        DISCOUNT = {DISCOUNT}
        REPLAY_MEMORY_SIZE = {REPLAY_MEMORY_SIZE}  # How many last steps to keep for model training
        MIN_REPLAY_MEMORY_SIZE = {MIN_REPLAY_MEMORY_SIZE}  # Minimum number of steps in a memory to start training
        MINIBATCH_SIZE = {MINIBATCH_SIZE}  # How many steps (samples) to use for training
        UPDATE_TARGET_EVERY = {UPDATE_TARGET_EVERY}  # Terminal states (end of episodes)
        LEARNING_RATE = {LEARNING_RATE}

        # Agent View Settings
        REMOVE_PLAYER_FROM_LOCAL_VIEW = {REMOVE_PLAYER_FROM_LOCAL_VIEW}
        WALLS_BLOCK_VIEW = {WALLS_BLOCK_VIEW}
        VIEW_TYPE = {VIEW_TYPE} # Relative or Local

        # Exploration settings
        EPSILON_DECAY = {EPSILON_DECAY}
        MIN_EPSILON = {MIN_EPSILON}

        # Environment settings
        START_EPISODE = {START_EPISODE}
        END_EPISODE = {END_EPISODE}
        OBS_RANGE = {OBS_RANGE}
        SIZE_X = {SIZE_X}#Environment width
        SIZE_Y = {SIZE_Y}#Environment height
        EPISODE_STEP_LIMIT = {EPISODE_STEP_LIMIT}
        PLAYER_COUNT = {PLAYER_COUNT}
        TRASH_COUNT = {TRASH_COUNT}
        AGENT_SPAWN = {AGENT_SPAWN} # "Random", "Center", "Custom"
        TASK_SPAWN = {TASK_SPAWN} # "Random", "Not Center", "Along Walls", "Custom"
        WALL_ARRANGEMENT = {WALL_ARRANGEMENT} # "Six Rooms", "Custom"

        #Key settings (the number to represent each thing in the DQN matrix)
        PLAYER_N = {PLAYER_N}  # player key 
        TRASH_N = {TRASH_N}  # trash key
        OTHERS_N = {OTHERS_N} # Other players key

        #Reward Scheme
        MOVE_PENALTY = {MOVE_PENALTY} #make negative
        WALL_COLLISION_PENALTY = {WALL_COLLISION_PENALTY} #make negative
        AGENT_COLLISION_PENALTY = {AGENT_COLLISION_PENALTY} #make negative
        TRASH_REWARD = {TRASH_REWARD}
        """
    #{str_model_summary}
    #Writes model summary to file
    if not os.path.isdir(paramPath):
        os.makedirs(paramPath)
    with open(paramPath + fileName, "w+") as f:
        with redirect_stdout(f):
            agentList[0].model.summary()
    f=open(paramPath + fileName, "a")
    f.write(fileContents)
    f.close()
    
    if not USE_SCOTT_MODEL:
        agentList[0].printModelDiagram()

# Iterate over episodes
for episode in tqdm(range(START_EPISODE, END_EPISODE + 1), ascii=True, unit='episodes'):
    env.episode = episode
    # Restarting episode - reset total values and arrays for data recording and step number
    step = 1
    # Reset environment and get initial state
    env.reset()
    envMap = MapManager()
    envMap.update(env.playerList, env.trashList, env.wallMap.blockWallList)
    currentStateList = [None]*PLAYER_COUNT# List that contains the current state for each player
    playerIDlist = list(range(PLAYER_COUNT))
    random.shuffle(playerIDlist)
    for eachPlayerID in playerIDlist:
        #Checks if "player" channels should be removed
        removePlayer = False
        if (VIEW_TYPE == "Local" or VIEW_TYPE == "Merge") and REMOVE_PLAYER_FROM_LOCAL_VIEW:
            removePlayer = True
        
        #Gets blind spts for each player if ray casting is on (just for initial episode step)
        eachPlayer = env.playerList[eachPlayerID]
        if WALLS_BLOCK_VIEW:
            blindList = env.blindMap.getBlindSpots(eachPlayer.x, eachPlayer.y, eachPlayer.obsRange, env.wallMap)
        else:
            blindList = None
        
        #Gets the current state of each player (just for initial episode step)
        currentStateList[eachPlayerID] = envMap.getView(OBS_RANGE, VIEW_TYPE, eachPlayer.ID, removePlayerPos = removePlayer, blindSpotList = blindList)
        
    # Reset flag and start iterating until episode ends
    done = False
    while not done:
        actionList = [None]*PLAYER_COUNT
        playerIDlist = list(range(PLAYER_COUNT))
        random.shuffle(playerIDlist)
        
        #Determines which action each agent takes
        for eachPlayerID in playerIDlist:
            # This part stays mostly the same, the change is to query a model for Q values
            if np.random.random() > epsilon:
                # Get action from Q table
                action = np.argmax(agentList[eachPlayerID].get_qs(currentStateList[eachPlayerID]))
            else:
                # Get random action
                action = np.random.randint(0, ACTION_SPACE_SIZE)
            actionList[eachPlayerID] = copy.deepcopy(action)
            
        #Changes the environment according to agent actions
        new_stateList, rewardList, done = env.step(actionList)
        
        if CHECK_DQN_INPUT and step == 5:
            np.set_printoptions(threshold=sys.maxsize)
            print("Environment")
            print(env.envMap.printEnv())#bookmark debug
            print("")
            print("agent 0 state")
            if VIEW_TYPE == "Merge":
                print("Local view portion")
                envMap.printView(new_stateList[0][0])
                print("Relative view portion")
                envMap.printView(new_stateList[0][1])
            else:
                envMap.printView(new_stateList[0])
            np.set_printoptions(threshold=1000)
        
        #Updates CSV files
        if PRINT_CSV:
            episodeStr = str(episode)
            for i in range(0,len(env.playerList)):
                agentStepsTaken = str(env.playerList[i].stepsTaken)
                agentID = str(env.playerList[i].ID)
                agentX = str(env.playerList[i].x)
                agentY = str(env.playerList[i].y)
                agentEvent = env.playerList[i].event
                csvData = (episodeStr, agentStepsTaken, agentID, agentX, agentY, agentEvent)
                env.csv.write(csvData, 0)
                
                if agentEvent == "Task":
                    csvData = (episodeStr, agentStepsTaken, agentX, agentY, "F")
                    env.csv.write(csvData, 3)
        
        timeToAverageWeightsChecker = []
        playerIDlist = list(range(PLAYER_COUNT))
        random.shuffle(playerIDlist)
        for eachPlayerID in playerIDlist:
            # Every step we update replay memory and train main network
            agentList[eachPlayerID].update_replay_memory((currentStateList[eachPlayerID], actionList[eachPlayerID], rewardList[eachPlayerID], new_stateList[eachPlayerID], done))
            agentList[eachPlayerID].train(done, step)
            if USE_SCOTT_MODEL:
                timeToAverageWeightsChecker.append(agentList[eachPlayerID].timeToAverageWeights)
        
        if USE_SCOTT_MODEL and timeToAverageWeightsChecker[0]:
            average_weights(agentList)
            for agent in agentList:
                agent.timeToAverageWeights = False
        
        currentStateList = copy.deepcopy(new_stateList)
        step += 1
    
    # Decay epsilon
    if epsilon > MIN_EPSILON:
        epsilon *= EPSILON_DECAY
        epsilon = max(MIN_EPSILON, epsilon)
    
    # Save model
    if SAVE_MODEL_EVERY != -1 and not episode % SAVE_MODEL_EVERY:
        playerIDlist = list(range(PLAYER_COUNT))
        basePath = f'recordings/{ID}_{MODEL_NAME}/models/'
        for eachPlayerID in playerIDlist:
            if not os.path.isdir(basePath):
                os.makedirs(basePath)
            agentList[eachPlayerID].model.save(f"{basePath}{ID}_{MODEL_NAME}{eachPlayerID}.model")
            agentList[eachPlayerID].target_model.save(f"{basePath}{ID}_{MODEL_NAME}{eachPlayerID}_target.model")
            
            replayMem = agentList[eachPlayerID].replay_memory#bookmark havent tested loading portion
            np.save(f"{basePath}{ID}_{MODEL_NAME}{eachPlayerID}", replayMem, allow_pickle = True)
            
        paramFileName = str(MODEL_NAME) + "-" + str(ID) + ".txt"
        paramFileContents = f"""
                            Next episode: {episode+1}
                            Next epsilon: {epsilon}
                            """
        if not os.path.isdir(basePath):
            os.makedirs(basePath)
        f=open(basePath + paramFileName, "w+")
        f.write(paramFileContents)
        f.close()
    

 77%|#######6  | 19134/25000 [144:12:50<45:34:16, 27.97s/episodes]