In [None]:
import numpy as np
from numpy import random
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap
import math
import pickle

rng = random.default_rng(seed=42)

#Environment settings

In [None]:
class Environment:
    def __init__(self, settings):
        self.discreteMap = settings['discreteMap']
        self.sizeRechargeAreas = settings['sizeRechargeAreas']
        self.rechargeValue = settings['rechargeValue']
        self.rechargeAreas = settings['rechargeAreas']

        #Save the recharge stations position and set a high value to them for plot reasons
        RechargeAreaPositions = [[0 for i in range(20)] for j in range(20)]
        station = 100
        for i in range(len(self.rechargeAreas)):
            RechargeAreaPositions[self.rechargeAreas[i][0]][self.rechargeAreas[i][1]] = station
            RechargeAreaPositions[self.rechargeAreas[i][0]][self.rechargeAreas[i][1] + self.sizeRechargeAreas] = station
            RechargeAreaPositions[self.rechargeAreas[i][0] + self.sizeRechargeAreas][self.rechargeAreas[i][1]] = station
            RechargeAreaPositions[self.rechargeAreas[i][0] + self.sizeRechargeAreas][self.rechargeAreas[i][1] + self.sizeRechargeAreas] = station
            station += 100

        mat = np.matrix(RechargeAreaPositions)
        with open('RechargeAreaPositions.txt','wb') as f:
            for line in mat:
                np.savetxt(f, line, fmt='%s', delimiter=",")
        f.close()

    def ifInRechargeArea(self, agentY, agentX, numVisitsRechargeArea):
        i = 0
        for rA in self.rechargeAreas:
            if (agentY >= rA[0] and agentY <= (rA[0] + self.sizeRechargeAreas)) and (agentX >= rA[1] and agentX <= (rA[1] + self.sizeRechargeAreas)):
                numVisitsRechargeArea[i] += 1
                return self.rechargeValue[i]
            i += 1
        return 0

#Robot settings

In [None]:
class Agent:
    def __init__(self, env, actions, survive, distanceMeasure):
        self.agentX = 0
        self.agentY = 0
        self.env = env
        self.totalActions = len(actions)
        self.totalFeatures = 8 #FEATURES
        self.actions = actions
        self.distanceMeasure = distanceMeasure

        self.coordinates = []
        self.numVisitsMap = [[0 for i in range(self.env.discreteMap)] for j in range(self.env.discreteMap)]
        self.numVisitsMap_accumulated = [[0 for i in range(self.env.discreteMap)] for j in range(self.env.discreteMap)]

        self.numVisitsRechargeArea = [0 for i in range(len(env.rechargeAreas))]

        #BATTERY
        self.homeostasisSurvive = survive['homeostasisSurvive']
        self.maxEnergy = survive['maxEnergy']
        self.minEnergy = survive['minEnergy']
        self.discountEnergy = survive['discountEnergy']
        self.energy = rng.integers(self.minEnergy, self.maxEnergy + 1)
        
        self.surviveDriveAll = []
        self.energyAll = []
        
        #Fixed initial positions to use in the test phase
        self.allY_test = [0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 11, 12, 12, 13, 13, 13, 14, 14, 15, 16, 16, 16, 17, 17, 18, 19, 19, 19, 19, 19]
        self.allX_test = [0, 4, 6, 19, 8, 12, 15, 3, 10, 1, 6, 12, 16, 18, 2, 5, 7, 4, 10, 13, 0, 15, 19, 7, 14, 5, 12, 16, 2, 10, 19, 1, 13, 5, 8, 16, 0, 3, 10, 1, 4, 17, 9, 13, 7, 0, 3, 11, 16, 19]
        self.cxy = 0

    def reset(self, test):
        self.numVisitsRechargeArea = [0 for i in range(len(self.env.rechargeAreas))]        
        self.energyAll = []
        
        if (test):
            self.energy = self.homeostasisSurvive
            self.agentX = self.allX_test[self.cxy]
            self.agentY = self.allY_test[self.cxy]
            self.cxy += 1
        else:
            self.energy = rng.integers(self.minEnergy, self.maxEnergy + 1)
            self.agentX = rng.integers(0, self.env.discreteMap)
            self.agentY = rng.integers(0, self.env.discreteMap)
        
            
        currentDriveSurvive = self.computeDriveSurvive(self.energy)
        
        self.surviveDriveAll = []

        self.coordinates = []
        self.numVisitsMap = [[0 for i in range(self.env.discreteMap)] for j in range(self.env.discreteMap)]

        minDist, up, down, left, right, station = self.closestRechargeArea(self.agentY, self.agentX)

        return [currentDriveSurvive, minDist, up, down, left, right, self.agentY, self.agentX] #FEATURES

 
    #DRIVE SURVIVE
    def computeDriveSurvive(self, sensor):
        return -(self.homeostasisSurvive - sensor)

    #REWARD FUNCTIONS
    def rewardFunction(self, currentDriveSurvive):
        if ((abs(int(currentDriveSurvive)) == 0) and (currentDriveSurvive * (-1) <= 0)):
            return 1
        elif (currentDriveSurvive < 0):
            return currentDriveSurvive 
        else:
            return -(currentDriveSurvive * 0.5) 


    def computeDriveSurviveReward(self, currentDriveSurvive):
        return self.rewardFunction(currentDriveSurvive)

    def step(self, action):
        #MOVE
        # Up 
        if (action == 1):
            self.agentY += 1
            if self.agentY > self.env.discreteMap-1: self.agentY = self.env.discreteMap-1

        # Down
        elif (action == 2):
            self.agentY -= 1
            if self.agentY < 0: self.agentY = 0

        # Left
        elif (action == 3):
            self.agentX -=1
            if self.agentX < 0: self.agentX = 0

        # Right 
        elif (action == 4):
            self.agentX += 1
            if self.agentX > self.env.discreteMap-1: self.agentX = self.env.discreteMap-1

        self.coordinates.append([self.agentX, self.agentY])
        self.numVisitsMap[self.agentY][self.agentX] += 1

        self.numVisitsMap_accumulated[self.agentY][self.agentX] += 1#all visits to each station during the training phase

        #UPDATE BATTERY STUFF
        self.energy -= self.discountEnergy
        recharge = self.checkIfRecharge()
        self.energy = min(self.maxEnergy, self.energy + recharge)
        self.energyAll.append(self.energy)

        #Compute drives and reward
        currentDriveSurvive = self.computeDriveSurvive(self.energy)
        rewardSurvive = self.computeDriveSurviveReward(currentDriveSurvive)
        self.surviveDriveAll.append(currentDriveSurvive)

        totalReward = rewardSurvive

        done = self.death()

        minDist, up, down, left, right, station = self.closestRechargeArea(self.agentY, self.agentX)

        new_state = [currentDriveSurvive, minDist, up, down, left, right, self.agentY, self.agentX]

        return new_state, totalReward, done 
    
    def death(self):
        if self.energy <= self.minEnergy:
            return 1
        return 0

    def checkIfRecharge(self):
        recharge = self.env.ifInRechargeArea(self.agentY, self.agentX, self.numVisitsRechargeArea)
        return recharge
    
    def manhattanDistance(self, agentY, agentX, middleY_RA, middleX_RA):
        return abs(agentX - middleX_RA) + abs(agentY - middleY_RA)

    def euclideanDistance(self, agentY, agentX, middleY_RA, middleX_RA):
        return math.sqrt(((agentX - middleX_RA) ** 2) + ((agentY - middleY_RA) ** 2))
    
    def closestRechargeArea(self, agentY, agentX):
        minDist = 99999
        station = 0
        i = 0
        
        for rA in self.env.rechargeAreas:
            middleX_RA = rA[1] + self.env.sizeRechargeAreas/2
            middleY_RA = rA[0] + self.env.sizeRechargeAreas/2
            if (self.distanceMeasure == 'Euclidean'):
                dist = self.euclideanDistance(agentY, agentX, middleY_RA, middleX_RA)
            else:
                dist = self.manhattanDistance(agentY, agentX, middleY_RA, middleX_RA)

            if ((self.distanceMeasure == 'Manhattan' and dist <= self.env.sizeRechargeAreas) or (self.distanceMeasure == 'Euclidean' and dist < self.env.sizeRechargeAreas)):#fiz isso pq to usando o ponto central da estação, mas se dist for menor que sizerecharge quer dizer que ja esta na estação (mas nao no meio dela)
                dist = 0#already at the station

            if dist < minDist:
                minDist = dist
                up = 0
                down = 0
                left = 0
                right = 0
                station = i

                horizon = agentX - middleX_RA 
                vertical = agentY - middleY_RA

                if(agentX < rA[1] and (horizon != self.env.sizeRechargeAreas/2)):
                    right = 1
                elif(agentX > rA[1] and (horizon != self.env.sizeRechargeAreas/2)):
                    left = 1

                if(agentY > rA[0] and (vertical != self.env.sizeRechargeAreas/2)):
                    down = 1
                elif(agentY < rA[0] and (vertical != self.env.sizeRechargeAreas/2)):
                    up = 1
            i += 1

        return minDist, up, down, left, right, station

    def save(self, sensor, fileName):
        mat = np.matrix(sensor)
        with open(fileName,'wb') as f:
            for line in mat:
                np.savetxt(f, line, fmt='%s', delimiter=",")
        f.close()

    def saveSensorsData(self, expIDX):
        self.save(self.energyAll, 'Energy' + str(expIDX) + '.txt')
        self.save(self.surviveDriveAll, 'SurviveDrive' + str(expIDX) + '.txt')
        self.save(self.numVisitsRechargeArea, 'NumVisitsRechargeArea' + str(expIDX) + '.txt')
        self.save(self.numVisitsMap, 'NumVisitsMap_Test' + str(expIDX) + '.txt')
        self.save(self.coordinates, 'Coordinates' + str(expIDX) + '.txt')

    def dataTrain_Visits(self):
        self.save(self.numVisitsMap_accumulated, 'NumVisitsMap_TrainAccumulated0.txt')

#Q-Learning Function Approximation

In [None]:
class ApproximateQAgent:
    def __init__(self, robot, learning_parameters, exploration_parameters, glie):
        self.robot = robot 

        # learning parameters (dict)
        self.alpha = learning_parameters['alpha']#learning rate
        self.gamma = learning_parameters['gamma'] #discount factor

        # exploration parameters
        self.epsilon = exploration_parameters['epsilon']
        self.epsilon_min = exploration_parameters['epsilon_min']
        self.epsilon_decay = exploration_parameters['epsilon_decay']
        self.glie = glie

        self.featuresS = 0
        self.featuresSL = 0

        self.featuresPerAction = 0

        self.independFeatures = self.robot.totalFeatures - 2 #All features, except X and Y

################################## FILES #################################################

    def recoverWeights(self):
        with open('FeaturesPerAction_weightsLast.txt', 'r') as f:
            self.featuresPerAction = [[float(num) for num in line.split(',')] for line in f]
        f.close()

    def saveWeights(self, fileName, data):
        mat = np.matrix(data)
        with open(fileName + '.txt','wb') as f:
            for line in mat:
                np.savetxt(f, line, fmt='%s', delimiter=",")
        f.close()

    def saveDataTraining(self, episode_rewards, episode_steps):
        z = zip(episode_rewards, episode_steps)
        f = open('RewardsLearning.txt', 'w')
        for t in z:
            line = ' '.join(str(x) for x in t)
            f.write(line + '\n')
        f.close()

    def saveBestReward(self, bestEpisode, episode_rewards):
        best = [bestEpisode, episode_rewards[bestEpisode]]
        print(best)
        with open('BestEpisodeReward.txt','wb') as f:
            np.savetxt(f, best, fmt='%s', delimiter=",")
        f.close()

################################## Qlearning #################################################

    def init_featuresWeight(self):
        self.featuresPerAction = [[rng.random() * 0.01 for i in range(self.independFeatures + (self.robot.totalFeatures - self.independFeatures) * self.robot.env.discreteMap)] for j in range(self.robot.totalActions)] 


    def setFeatures_binario(self, stateFeatures_, state_S_or_SL):
        size = self.robot.env.discreteMap
        stateFeatures = np.zeros(self.independFeatures + (self.robot.totalFeatures - self.independFeatures) * self.robot.env.discreteMap)
        for i in range(self.independFeatures):
            stateFeatures[i] = stateFeatures_[i]#currentDriveSurvive, minDist, up, down, left, right

        inter = np.zeros(size)
        j = 0
        for i in range(self.independFeatures, len(stateFeatures_)):#agentY, agentX
            inter[stateFeatures_[i]] = 1
            stateFeatures[j * size + self.independFeatures : (j+1) * size + self.independFeatures] = inter
            inter[stateFeatures_[i]] = 0
            j += 1

        if state_S_or_SL == 0:
            self.featuresS = stateFeatures
        else:
            self.featuresSL = stateFeatures

    def getFeatures(self, state_S_or_SL):
        if state_S_or_SL == 0:
            return self.featuresS
        else:
            return self.featuresSL

    def getQvalue(self, actionIndex, state_S_or_SL):
        qValue = 0
        features = self.getFeatures(state_S_or_SL)

        for i in range(self.independFeatures + (self.robot.totalFeatures - self.independFeatures) * self.robot.env.discreteMap):
            qValue += features[i] * self.featuresPerAction[actionIndex][i]

        return qValue
  
    def getMaxQValue(self):
        maxQinSL = self.getQvalue(0, 1)#I assume that the first is the best
        for i in range(1, self.robot.totalActions):
            value = self.getQvalue(i, 1)
            if value > maxQinSL:
                maxQinSL = value
        return maxQinSL

    def update(self, actionIdid, reward):
        Q_sa = self.getQvalue(actionIdid, 0)
        Max_Qsl = self.getMaxQValue()
        TD_target = reward + self.gamma * Max_Qsl
        for i in range(self.independFeatures + (self.robot.totalFeatures - self.independFeatures) * self.robot.env.discreteMap):
            self.featuresPerAction[actionIdid][i] += self.alpha * ((TD_target - Q_sa) * self.featuresS[i])

        self.featuresS = self.featuresSL[:]

    def updateEpsilon(self, totalEpisodes):
        if self.glie == 'linear':
            self.epsilon = max(self.epsilon_min, self.epsilon - (1/totalEpisodes))
        elif self.glie == 'exponential':
            self.epsilon = max(self.epsilon_min, self.epsilon * (1 - self.epsilon_decay))
        elif self.glie == 'constant':
            self.epsilon = self.epsilon

    def getAction(self):
        if rng.random() < self.epsilon:
          # exploration, random choice
            action = rng.integers(0,self.robot.totalActions)
            selectedAction = action
        else:
          # exploitation, max value for given state
            selectedAction = 0 #Assume that the first is the better
            maxQinSL = self.getQvalue(0, 0)

            for i in range(1, self.robot.totalActions):
                value = self.getQvalue(i, 0)
                if value > maxQinSL:
                    maxQinSL = value
                    selectedAction = i

        return selectedAction

################################## TRAIN #################################################

    def learn(self, max_steps = 5000, total_episodes = 25000):
        self.init_featuresWeight()
        self.episode_rewards = np.zeros(total_episodes)
        self.episode_steps = np.zeros(total_episodes)  
        bestEpisode = 0
        for episode in range(total_episodes):
            state = self.robot.reset(0)
            self.setFeatures_binario(state[:], 0)
            for step in range(max_steps):
                action = self.getAction()
                new_state, reward, done = self.robot.step(action)
                self.setFeatures_binario(new_state[:], 1)
                self.update(action, reward)
                self.episode_rewards[episode] += reward
                if done:  
                    print("died episode ", episode ,  " " , np.sum(robot.numVisitsRechargeArea), "reward:", self.episode_rewards[episode])
                    break
            if (done == 0):
                print("episode ", episode ,  " " ,  np.sum(robot.numVisitsRechargeArea), "reward:", self.episode_rewards[episode])  
            self.episode_steps[episode] = step + 1
            self.updateEpsilon(total_episodes)
            if (episode > 500 and (self.episode_rewards[episode]/self.episode_steps[episode] >= self.episode_rewards[bestEpisode]/self.episode_steps[bestEpisode])):
                self.saveWeights('FeaturesPerAction_weightsBEST', self.featuresPerAction)
                bestEpisode = episode
            if (episode % 1000 == 0):
                self.saveDataTraining(self.episode_rewards, self.episode_steps)
                self.saveBestReward(bestEpisode, self.episode_rewards)
                self.robot.dataTrain_Visits()
        self.saveWeights('FeaturesPerAction_weightsLast', self.featuresPerAction)
        self.saveDataTraining(self.episode_rewards, self.episode_steps)
        self.saveBestReward(bestEpisode, self.episode_rewards)
        self.robot.dataTrain_Visits()

################################## TEST #################################################

    def evaluate(self, max_steps, expIDX):
        self.epsilon = 0
        self.recoverWeights()
        state = self.robot.reset(1)
        self.setFeatures_binario(state[:], 0)
        for step in range(max_steps):
            action = self.getAction()
            new_state, reward, done = self.robot.step(action)
            self.setFeatures_binario(new_state[:], 0)
            if done:
                print("died EXPLORING " , step, " steps")
                break
        self.robot.saveSensorsData(expIDX)

################################## ONLY ANALYZE ACTION TO CHOSE #################################################

    def chooseActionPerPosition(self,energy):
        self.epsilon = 0
        self.recoverWeights()

        actions = [[-1  for i in range(self.robot.env.discreteMap)] for j in range(self.robot.env.discreteMap)]

        energyDrive = self.robot.computeDriveSurvive(energy)

        for i in range(self.robot.env.discreteMap):
            for j in range(self.robot.env.discreteMap):
                minDist, up, down, left, right, station = self.robot.closestRechargeArea(i, j)
                state = [energyDrive, minDist, up, down, left, right, i, j]
                self.setFeatures_binario(state[:], 0)
                action = self.getAction()
                actions[i][j] = action
        self.saveWeights('ActionsPerPosition_Energy' + str(energy), actions)

#Data Visualization

In [None]:
################################## PLOTS #################################################
class dataVisualization:
    def __init__(self, settings):
        self.discreteMap = settings['discreteMap']
        self.rechargeAreas = settings['rechargeAreas']
        self.sizeRechargeAreas = settings['sizeRechargeAreas']
        self.labelsRechargeArea = settings['labelsRechargeArea']
        #self.colors_rechargeAreas = ['tab:blue', 'tab:orange', 'tab:green',  'tab:red']
        self.colors_rechargeAreas = ['red', 'red', 'red',  'red']
        self.cmap = ['#440154FF','tab:blue', 'tab:orange', 'tab:green',  'tab:red']
        
        
    def show_heatmap(self, fileName, numExps, annot=False):
        numVisitsMap = [[0 for i in range(self.discreteMap)] for j in range(self.discreteMap)]
        for i in range(numExps):
            with open(fileName+ str(i) + '.txt', 'r') as f:
                best_q_mat = [[int(num) for num in line.split(',')] for line in f]
            f.close()
            best_q_mat = np.matrix(best_q_mat)
            numVisitsMap = numVisitsMap + best_q_mat

        average = numVisitsMap/numExps 
        fig, ax = plt.subplots(figsize=(15,15))
        cmap = get_cmap()

        ax = sns.heatmap(average, annot=annot, linewidths=.3, cmap=cmap, fmt=".0f", linecolor="grey")

        for i in range(len(self.rechargeAreas)):
            x = []
            y = []
            x.append(self.rechargeAreas[i][1])
            y.append(self.rechargeAreas[i][0])

            x.append(self.rechargeAreas[i][1] + (2 * self.sizeRechargeAreas))
            y.append(self.rechargeAreas[i][0])

            x.append(self.rechargeAreas[i][1] + (2 * self.sizeRechargeAreas))
            y.append(self.rechargeAreas[i][0] + (2 * self.sizeRechargeAreas))

            x.append(self.rechargeAreas[i][1])
            y.append(self.rechargeAreas[i][0] + (2 * self.sizeRechargeAreas))

            x.append(self.rechargeAreas[i][1])
            y.append(self.rechargeAreas[i][0])

            ax.plot(x, y, color = self.colors_rechargeAreas[i])

        ax.xaxis.tick_top()
        ax.xaxis.set_label_position('top')
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.title(fileName, pad = 20)
        plt.savefig(fileName + '.pdf', dpi = 400, bbox_inches='tight')
        plt.savefig(fileName + '.png', dpi = 400, bbox_inches='tight')
        plt.show()

    def plot_actionsPerPosition(self, value, annot=False,):
        #Get RechargeArea positions
        with open('RechargeAreaPositions.txt', 'r') as f:
            best_q_mat = [[int(num) for num in line.split(',')] for line in f]
        f.close()
        best_q_mat = np.matrix(best_q_mat)

        #Plot RechargeArea positions
        fig, ax = plt.subplots(figsize=(10,10))
        ax = sns.heatmap(best_q_mat, annot=annot, linewidths=.3, cmap=self.cmap, fmt=".0f", cbar=False, linecolor="grey")
    
        #Get and Plot the action chose in each position
        with open('ActionsPerPosition_Energy' + str(value) + '.txt', 'r') as f:
            actions = [[int(num) for num in line.split(',')] for line in f]
        f.close()
        #print(actions)
        for i in range(self.discreteMap):#tamanho do environment
            for j in range(self.discreteMap):#tamanho do environment
                if(actions[i][j] == 0):
                    ax.scatter(j+0.5, i+0.5, marker='o', s=100, color='#000000')
                elif(actions[i][j] == 1):
                    ax.scatter(j+0.5, i+0.5, marker='v', s=100, color='#FEE100')
                elif(actions[i][j] == 2):
                    ax.scatter(j+0.5, i+0.5, marker='^', s=100, color='#FFFFFF')
                elif(actions[i][j] == 3):
                    ax.scatter(j+0.5, i+0.5, marker='<', s=100, color='#00E9E7')
                elif(actions[i][j] == 4):
                    ax.scatter(j+0.5, i+0.5, marker='>', s=100, color='#E2C1F3')
            
        ax.xaxis.tick_top()
        ax.xaxis.set_label_position('top')
        plt.xlabel("X")
        plt.ylabel("Y")
        # Only y-axis labels need their rotation set, x-axis labels already have a rotation of 0
        _, labels = plt.yticks()
        plt.setp(labels, rotation=0)

        plt.title('Actions Per Position_Energy' + str(value))
        plt.savefig('ActionsPerPosition_Energy' + str(value) +'.pdf', dpi = 400, bbox_inches='tight')
        plt.savefig('ActionsPerPosition_Energy' + str(value) +'.png', dpi = 400, bbox_inches='tight')
        plt.show()

    def plotTotalRewardsPerEisode(self, timeWindow):
        rewards = []
        actions = []
        with open('RewardsLearning.txt') as f:
            for line in f:
                x = line.replace('\n', '').split(" ")
                rewards.append(float(x[0]))
                actions.append(int(float(x[1])))
        f.close()

 
        timeWindow_RewardValues = []
        timeWindow_ActionValues = []
        for i in range(len(rewards)//timeWindow):
            values = rewards[i * timeWindow: i*timeWindow + timeWindow]
            timeWindow_RewardValues.append(values)
            values = actions[i * timeWindow: i*timeWindow + timeWindow]
            timeWindow_ActionValues.append(values)

        RewardAverage_TimeWindow = []
        ActionsAverage_TimeWindow = []
        stdReward = []
        stdActions = []
        steps = []

        for i in range(len(rewards)//timeWindow):
            RewardAverage_TimeWindow.append(np.mean(timeWindow_RewardValues[i], dtype=np.float64))
            stdReward.append(np.std(timeWindow_RewardValues[i], dtype=np.float64))
            ActionsAverage_TimeWindow.append(np.mean(timeWindow_ActionValues[i], dtype=np.float64))
            stdActions.append(np.std(timeWindow_ActionValues[i], dtype=np.float64))
            steps.append(i)
        
                
        rM = []
        for i in range(len(RewardAverage_TimeWindow)):
            rM.append(RewardAverage_TimeWindow[i]/ActionsAverage_TimeWindow[i])

        fig, ax = plt.subplots(nrows=2, sharex=True)
        plt.xlabel('Episodes')
        ax[0].set_ylabel('Average Reward')
        ax[1].set_ylabel('Steps')

        ax[0].plot(steps, rM)
        ax[1].plot(steps, ActionsAverage_TimeWindow)
        ax[1].fill_between(steps, [elemA + elemB for elemA, elemB in zip(ActionsAverage_TimeWindow, stdActions)] , [elemA - elemB for elemA, elemB in zip(ActionsAverage_TimeWindow, stdActions)], alpha=0.2)
  
        plt.savefig('TotalRewards_AverageAndAction.pdf', dpi = 400, bbox_inches='tight')
        plt.savefig('TotalRewards_AverageAndAction.png', dpi = 400, bbox_inches='tight')
        plt.show()

    def plotSensorsData(self, sensorFile, labelX, numExps):
        allSensorsData_Average = []
        allSensorsData_std = []
        numberOfActions = []
        fig, ax = plt.subplots()

        for i in range(numExps):
            with open(sensorFile+ str(i) + '.txt') as f:
                for line in f:
                    x = line.replace('\n', '').split(",")
            f.close()
            sensorData = [float(k) for k in x]
            numberOfActions.append(len(sensorData))
            allSensorsData_Average.append(np.mean(sensorData, dtype=np.float64))
            allSensorsData_std.append(np.std(sensorData, dtype=np.float64))

        steps = []
        for i in range(numExps):
            steps.append(i+1)
        plt.title('Average ' + labelX + ' per Experiment')
        plt.xlabel("Experiments")

        ax.plot(steps, allSensorsData_Average)
        ax.fill_between(steps, [elemA + elemB for elemA, elemB in zip(allSensorsData_Average, allSensorsData_std)] , [elemA - elemB for elemA, elemB in zip(allSensorsData_Average, allSensorsData_std)], alpha=0.2)

        plt.savefig(labelX + '_.pdf', dpi = 400, bbox_inches='tight')
        plt.savefig(labelX + '_.png', dpi = 400, bbox_inches='tight')
        plt.show()

#Experiments

##Start the Environment

In [None]:
settings = {
    'discreteMap': 20,
    'sizeRechargeAreas': 1,
    'labelsRechargeArea': ['A', 'B', 'C', 'D'],
    'rechargeAreas': [[2,4], [4,13], [16, 2], [14,15]],#[yInicial, XInicial]
    'rechargeValue': [3, 3, 3, 3]#1, 4, 3, 2
}

env = Environment(settings)

In [None]:
#Need: Energy, Drive: Survive
survive = {
    'homeostasisSurvive': 30,
    'maxEnergy': 50,
    'minEnergy': 0,
    'discountEnergy': 0.1
}

actions = {
    0: 'Stop',
    1: 'Up',
    2: 'Down',
    3: 'Left',
    4: 'Right'
}
 
distanceMeasure = ['Euclidean', 'Manhattan']

robot = Agent(env, actions, survive, distanceMeasure[0])  

In [None]:
# q-learning parameters
learning_parameters = {
    'alpha': 0.0001,
    'gamma': 0.9
} 
# exploration-exploitation parameters
exploration_parameters = {
    'epsilon': 1.0, #exploration probability at start
    'epsilon_min': 0.01, #minimum exploration probability
    'epsilon_decay': 0.0003  #exponential decay rate for exploration prob
}

glie = ['linear', 'exponential', 'constant']
qApp_Agent_1 = ApproximateQAgent(robot, learning_parameters, exploration_parameters, glie[0])

In [None]:
max_steps = 5000
total_episodes = 25000
qApp_Agent_1.learn(max_steps, total_episodes)

In [None]:
max_steps = 8000
numExps = 50
for i in range(numExps):
    qApp_Agent_1.evaluate(max_steps, i)
    print('End of the experiment ', i)

In [None]:
numExps = 50
max_steps = 8000

p = dataVisualization(settings)
timeWindow = 100

p.plotTotalRewardsPerEisode(timeWindow)
p.show_heatmap('NumVisitsMap_TrainAccumulated', 1)
p.show_heatmap('NumVisitsMap_Test', numExps)

p.plotSensorsData('Energy', 'Energy', numExps)
p.plotSensorsData('SurviveDrive', 'Drive Survive', numExps)

energy = [15, 30, 45]
for i in range(len(energy)):
    qApp_Agent_1.chooseActionPerPosition(energy[i])
    p.plot_actionsPerPosition(energy[i])