An implementation of the 'soccer'-game proposed in (Littman, 94)
----------------------------------------------------------------------------------------------

[Markov games as a framework for multi-agent reinforcement learning (Littman, 94)](https://www2.cs.duke.edu/courses/spring07/cps296.3/littman94markov.pdf)

Rules of the game:
* played by two players A & B on a 4x5 grid
* A & B occupy distinct squares of the grid and can choose from 5 actions on each turn: N, S, E, W and stand. Once both players have selected their actions, the two moves are executes in  random order.
* One player has the ball. If he steps with it in the appropiate goal, that player scores a goal and the board is reset to the original configuration. Possesion of the ball goes to one or the other player at random.
* When a player takes an action that would take him to the square occupied by the other player (remember: random order of execution AFTER action choice!), possesion of the ball goes to the stationary player and the move doesn't take place. Goals are worth 1 point and the discount factor is set to 0.9, which makes scoring sooner somewhat better than scoring later.

In [4]:
import random

In [25]:
class Soccer:
    directions = dict(zip(['N', 'E', 'S', 'W'], [(0,-1), (1,0), (0,1), (-1,0)]))
    def __init__(self, **kwargs):
        self.players = ('A', 'B')
        self.initialize()
    
    def show(self):
        s = '----------------\n'
        for y in range(4):
            for x in range(5):
                s += '|'
                if self.board[(x,y)] is not None:
                    s += self.board[(x,y)]
                    if self.has_ball == self.board[(x,y)]:
                        s += 'o'
                    else:
                        s += ' '
                else:
                    s += '  '
            s += '|\n'
            s += '----------------\n'
        print(s)
        
    def initialize(self):
        self.board = dict(zip([(x, y) for x in range(5) for y in range(4)], [None]*20))
        self.board[(3,1)] = self.players[0]
        self.board[(1,2)] = self.players[1]
        self.has_ball = random.choice(self.players)
        self.winner = None
    
    def find_player(self, player):
        for y in range(4):
            for x in range(5):
                if self.board[(x,y)]  == player:
                    return (x,y)
        raise KeyError('Player {} not found'.format(player))
    
    def other_player(self, player):
        if player == 'A':
            return 'B'
        else:
            return 'A'
    
    def move(self, player, direction):
        # direction in ['N', 'E', 'S', 'W']
        (x, y) = self.find_player(player)
        old_x, old_y = x, y
        
        # check if this leads to a win
        if player == 'A' and (x,y) in [(0, 1), (0, 2)] and self.has_ball == 'A' and direction == 'W':
            self.winner = 'A'
        if player == 'B' and (x,y) in [(4, 1), (4, 2)] and self.has_ball == 'B' and direction == 'E':
            self.winner = 'B'
        
        self.board[(x, y)] = None
        if direction == 'N':
            y = max(0, y-1)
        elif direction == 'E':
            x = min(4, x+1)
        elif direction == 'S':
            y = min(3, y+1)
        elif direction == 'W':
            x = max(0, x-1)
        else:
            raise KeyError('Direction {} not valid'.format(direction))
        if self.board[(x,y)] is None or self.board[(x,y)] == player:
            self.board[(x,y)] = player
        else: # player walked into other player's square
            self.board[(old_x, old_y)] = player
            if self.has_ball == player:
                self.has_ball = self.other_player(player)

In [32]:
soccer = Soccer()
soccer.show()

----------------
|  |  |  |  |  |
----------------
|  |  |  |A |  |
----------------
|  |Bo|  |  |  |
----------------
|  |  |  |  |  |
----------------



In [36]:
soccer.move('B', 'E')
soccer.show()
print(soccer.winner)

----------------
|  |  |  |  |  |
----------------
|  |  |  |A |  |
----------------
|  |  |  |  |Bo|
----------------
|  |  |  |  |  |
----------------

B


In [63]:
soccer = Soccer()
moves = ['N', 'E', 'S', 'W']

i = 0
while True:
    soccer.move('A', choose_action(moves, soccer))
    if soccer.winner: break
    soccer.move('B', random.choice(moves))
    if soccer.winner: break
    i += 1
soccer.show()
print('# moves = {}, winner = {}'.format(i, soccer.winner))

----------------
|  |A |  |  |  |
----------------
|  |  |  |  |Bo|
----------------
|  |  |  |  |  |
----------------
|  |  |  |  |  |
----------------

# moves = 93, winner = B


In [86]:
from numpy.random import multinomial
def choose_actions(actions, player):
    pvals = policy(moves, soccer)
    r = multinomial(1, pvals)
    return moves[int(np.where(r==1)[0])]

In [118]:
def policy(moves, soccer):
    return [1/len(moves)]*len(moves)

In [128]:
choose_actions(moves, None)

'W'