## MO444 Project 3 - Reinforcement Learning

* Rodrigo Araújo Marinho Franco - RA: 233569
* Felipe Marinho Tavares - RA: 265680

### Contributions:
* Rodrigo
 * ...
* Felipe
 * ...

# PART I - Evolutionary Model

# PART II - Reinforcement Learning

In [1]:
from util import *

import pacman as pm
import layout as l
import textDisplay
import pacmanAgents, ghostAgents

try:
    import boinc
    _BOINC_ENABLED = True
except:
    _BOINC_ENABLED = False

In [2]:
class Environment:
    def __init__(self, layout="mediumClassic"):
        self.layout = l.getLayout(layout)
        self.beQuiet=True
        self.catchExceptions = False
        self.rules = pm.ClassicGameRules(timeout=30)
        self.reset()
        self.reward = 0 # Score by doing last action
        self.total_reward = 0 # Total reward

    def reset(self):
        self.display = textDisplay.NullGraphics()
        self.pacman = pacmanAgents.GreedyAgent()
        self.ghosts = [ghostAgents.RandomGhost(i+1) for i in range(self.layout.getNumGhosts())]
        self.agents = [self.pacman] + self.ghosts
        self.game = self.rules.newGame(self.layout, self.pacman, self.ghosts,
                                       self.display, self.beQuiet, self.catchExceptions)

        # inform learning agents of the game start
        for i in range(len(self.agents)):
            agent = self.game.agents[i]
            if not agent:
                self.game.mute(i)
                # this is a null agent, meaning it failed to load
                # the other team wins
                print("Agent %d failed to load" % i, file=sys.stderr)
                self.game.unmute()
                self.game._agentCrash(i, quiet=True)
                return

        self.agentIndex = self.game.startingIndex
        self.numAgents = len(self.game.agents)

    def get_current_state(self):
        return self.game.state.deepCopy()

    def get_reward(self):
        return env.game.state.getScore()

    def update_game_state(self, action):
        # Execute the action
        self.game.state = self.game.state.generateSuccessor( self.agentIndex, action )
        # Change the display
        self.game.display.update( self.game.state.data )
        # Allow for game specific conditions (winning, losing, etc.)
        self.game.rules.process(self.game.state, self.game)
        # Track progress
        if self.agentIndex == self.numAgents + 1: self.game.numMoves += 1
        # Next agent
        self.agentIndex = ( self.agentIndex + 1 ) % self.numAgents

        if _BOINC_ENABLED:
            boinc.set_fraction_done(self.game.getProgress())

    def step(self):
        initial_reward = self.get_reward()

        for agentIndex, agent in enumerate(self.game.agents):
            if not self.done():
                observation = self.get_current_state()
                action_ghost = agent.getAction(observation)
                if agentIndex == 0:
                    current_reward = self.get_reward()
                    reward_by_action = current_reward - initial_reward
                    #agent.step(reward_by_action)
                self.update_game_state(action_ghost)

    def done(self, fast_check=False):
        if not self.game.gameOver:
            return False
        else:
            if fast_check: self.game.display.finish()
            return True

In [3]:
env = Environment()

In [4]:
n_episodes = 10
for i_episode in range(1, n_episodes + 1):
    print("Episode: ", i_episode)

    env.reset()

    while not env.done():
        env.step()

    scores = [env.game.state.getScore()]
    wins = [env.game.state.isWin()]
    winRate = wins.count(True)/ float(len(wins))
    print('Average Score:', sum(scores) / float(len(scores)))
    print('Scores:       ', ', '.join([str(score) for score in scores]))
    print('Win Rate:      %d/%d (%.2f)' % (wins.count(True), len(wins), winRate))
    print('Record:       ', ', '.join([ ['Loss', 'Win'][int(w)] for w in wins]))

    print()

Episode:  1
Average Score: 69.0
Scores:        69.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  2
Average Score: 690.0
Scores:        690.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  3
Average Score: 336.0
Scores:        336.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  4
Average Score: 162.0
Scores:        162.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  5
Average Score: 112.0
Scores:        112.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  6
Average Score: 167.0
Scores:        167.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  7
Average Score: -72.0
Scores:        -72.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  8
Average Score: -99.0
Scores:        -99.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  9
Average Score: 227.0
Scores:        227.0
Win Rate:      0/1 (0.00)
Record:        Loss

Episode:  10
Average Score: -358.0
Scores:        -358.0
Win Rate:      0/1 (0.00)
Record:   