## MO444 Project 3 - Reinforcement Learning

* Rodrigo Araújo Marinho Franco - RA: 233569
* Felipe Marinho Tavares - RA: 265680

### Contributions:
* Rodrigo
 * ...
* Felipe
 * ...

# PART I - Evolutionary Model

# PART II - Reinforcement Learning

In [1]:
import time, os
import traceback
import sys
from util import *

import pacman as pm
import layout as l
import textDisplay
import pacmanAgents, ghostAgents

try:
    import boinc
    _BOINC_ENABLED = True
except:
    _BOINC_ENABLED = False

In [2]:
class Environment:
    def __init__(self, layout="mediumClassic"):
        self.layout = l.getLayout(layout)
        self.beQuiet=True
        self.catchExceptions = False
        self.rules = pm.ClassicGameRules(timeout=30)
        self.reset()

    def reset(self):
        self.display = textDisplay.NullGraphics()
        self.pacman = pacmanAgents.GreedyAgent()
        self.ghosts = [ghostAgents.RandomGhost(i+1) for i in range(self.layout.getNumGhosts())]
        self.agents = [pacmanAgents.GreedyAgent()] + [ghostAgents.RandomGhost(i+1) for i in range(self.layout.getNumGhosts())]
        self.game = self.rules.newGame(self.layout, self.pacman, self.ghosts,
                                       self.display, self.beQuiet, self.catchExceptions)
        ####################################
        ## From function run() of game.py ##
        ####################################
        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.game.agents[i]
            if not agent:
                self.game.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print("Agent %d failed to load" % i, file=sys.stderr)
                self.game.unmute()
                self.game._agentCrash(i, quiet=True)
                return
            if ("registerInitialState" in dir(agent)):
                self.game.mute(i)
                if self.game.catchExceptions:
                    try:
                        timed_func = TimeoutFunction(agent.registerInitialState, int(self.game.rules.getMaxStartupTime(i)))
                        try:
                            self.start_time = time.time()
                            timed_func(self.game.state.deepCopy())
                            time_taken = time.time() - self.start_time
                            self.game.totalAgentTimes[i] += time_taken
                        except TimeoutFunctionException:
                            print("Agent %d ran out of time on startup!" % i, file=sys.stderr)
                            self.game.unmute()
                            self.game.agentTimeout = True
                            self.game._agentCrash(i, quiet=True)
                            return
                    except Exception as data:
                        self.game._agentCrash(i, quiet=False)
                        self.game.unmute()
                        return
                else:
                    agent.registerInitialState(self.game.state.deepCopy())
                ## TODO: could this exceed the total time
                self.game.unmute()

        self.agentIndex = self.game.startingIndex
        self.numAgents = len( self.game.agents )

    def step(self):
        # Fetch the next agent
        agent = self.game.agents[self.agentIndex]
        move_time = 0
        skip_action = False
        # Generate an observation of the state
        if 'observationFunction' in dir( agent ):
            self.game.mute(self.agentIndex)
            if self.game.catchExceptions:
                try:
                    timed_func = TimeoutFunction(agent.observationFunction, int(self.game.rules.getMoveTimeout(self.agentIndex)))
                    try:
                        self.start_time = time.time()
                        observation = timed_func(self.game.state.deepCopy())
                    except TimeoutFunctionException:
                        skip_action = True
                    move_time += time.time() - self.start_time
                    self.game.unmute()
                except Exception as data:
                    self.game._agentCrash(self.agentIndex, quiet=False)
                    self.game.unmute()
                    return
            else:
                observation = agent.observationFunction(self.game.state.deepCopy())
            self.game.unmute()
        else:
            observation = self.game.state.deepCopy()

        # Solicit an action
        action = None
        self.game.mute(self.agentIndex)
        if self.game.catchExceptions:
            try:
                timed_func = TimeoutFunction(agent.getAction, int(self.game.rules.getMoveTimeout(self.agentIndex)) - int(move_time))
                try:
                    self.start_time = time.time()
                    if skip_action:
                        raise TimeoutFunctionException()
                    action = timed_func( observation )
                except TimeoutFunctionException:
                    print("Agent %d timed out on a single move!" % self.agentIndex, file=sys.stderr)
                    self.game.agentTimeout = True
                    self.game._agentCrash(self.agentIndex, quiet=True)
                    self.game.unmute()
                    return

                move_time += time.time() - self.start_time

                if move_time > self.game.rules.getMoveWarningTime(self.agentIndex):
                    self.game.totalAgentTimeWarnings[self.agentIndex] += 1
                    print("Agent %d took too long to make a move! This is warning %d" % (self.agentIndex, self.game.totalAgentTimeWarnings[self.agentIndex]), file=sys.stderr)
                    if self.game.totalAgentTimeWarnings[self.agentIndex] > self.game.rules.getMaxTimeWarnings(self.agentIndex):
                        print("Agent %d exceeded the maximum number of warnings: %d" % (self.agentIndex, self.game.totalAgentTimeWarnings[self.agentIndex]), file=sys.stderr)
                        self.game.agentTimeout = True
                        self.game._agentCrash(self.agentIndex, quiet=True)
                        self.game.unmute()
                        return

                self.game.totalAgentTimes[self.agentIndex] += move_time
                #print("Agent: %d, time: %f, total: %f" % (self.agentIndex, move_time, self.game.totalAgentTimes[self.agentIndex]))
                if self.game.totalAgentTimes[self.agentIndex] > self.game.rules.getMaxTotalTime(self.agentIndex):
                    print("Agent %d ran out of time! (time: %1.2f)" % (self.agentIndex, self.game.totalAgentTimes[self.agentIndex]), file=sys.stderr)
                    self.game.agentTimeout = True
                    self.game._agentCrash(self.agentIndex, quiet=True)
                    self.game.unmute()
                    return
                self.game.unmute()
            except Exception as data:
                self.game._agentCrash(self.agentIndex)
                self.game.unmute()
                return
        else:
            action = agent.getAction(observation)
        self.game.unmute()

        # Execute the action
        self.game.moveHistory.append( (self.agentIndex, action) )
        if self.game.catchExceptions:
            try:
                self.game.state = self.game.state.generateSuccessor( self.agentIndex, action )
            except Exception as data:
                self.game.mute(self.agentIndex)
                self.game._agentCrash(self.agentIndex)
                self.game.unmute()
                return
        else:
            self.game.state = self.game.state.generateSuccessor( self.agentIndex, action )

        # Change the display
        self.game.display.update( self.game.state.data )
        ###idx = self.agentIndex - self.agentIndex % 2 + 1
        ###self.game.display.update( self.game.state.makeObservation(idx).data )

        # Allow for game specific conditions (winning, losing, etc.)
        self.game.rules.process(self.game.state, self.game)
        # Track progress
        if self.agentIndex == self.numAgents + 1: self.game.numMoves += 1
        # Next agent
        self.agentIndex = ( self.agentIndex + 1 ) % self.numAgents

        if _BOINC_ENABLED:
            boinc.set_fraction_done(self.game.getProgress())

    def done(self):
        if not self.game.gameOver:
            return False
        else:
            # inform a learning agent of the game result
            for agentIndex, agent in enumerate(self.game.agents):
                if "final" in dir( agent ) :
                    try:
                        self.game.mute(agentIndex)
                        agent.final( self.game.state )
                        self.game.unmute()
                    except Exception as data:
                        if not self.game.catchExceptions: raise data
                        self.game._agentCrash(agentIndex)
                        self.game.unmute()
                        return
            self.game.display.finish()

            return True

In [3]:
env = Environment()

In [4]:
n_episodes = 1000
for i_episode in range(1, n_episodes + 1):
    print("Episode: ", i_episode)

    env.reset()

    while not env.done():
        env.step()

    scores = [env.game.state.getScore()]
    wins = [env.game.state.isWin()]
    winRate = wins.count(True)/ float(len(wins))
    print('Average Score:', sum(scores) / float(len(scores)))
    print('Scores:       ', ', '.join([str(score) for score in scores]))
    print('Win Rate:      %d/%d (%.2f)' % (wins.count(True), len(wins), winRate))
    print('Record:       ', ', '.join([ ['Loss', 'Win'][int(w)] for w in wins]))

    print()

Episode:  1
Average Score: -371.0
Scores:        -371.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  2
Average Score: 204.0
Scores:        204.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  3
Average Score: 29.0
Scores:        29.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  4
Average Score: -343.0
Scores:        -343.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  5
Average Score: -86.0
Scores:        -86.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  6
Average Score: -47.0
Scores:        -47.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  7
Average Score: 219.0
Scores:        219.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  8
Average Score: 26.0
Scores:        26.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  9
Average Score: -127.0
Scores:        -127.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  10
Average Score: -254.0
Scores:        -254.0
Win Rate:      0/1 (0.00)
Record