In [None]:
%matplotlib notebook
import numpy as np
from copy import deepcopy
import matplotlib.pyplot as plt
from matplotlib.animation import ArtistAnimation
import random
import functools

# For debugging purposes
from IPython.core.debugger import set_trace

plt.rcParams["animation.html"] = "jshtml"

In [None]:
class InvalidMove(Exception):
    """
    Exception class indicating invalid moves

    Raised if an agent tries to move in a way not according to the rules e.g.:

    returns None
    returns an already occupied cell
    returns a cell outside of the playing field
    """

    def __init__(self, agent, board, move):
        """
        Constructor

        params:
            - culprit: the agent causing the exception
            - state: the current board state without the executed move
            - move: the move causing the exception
        """
        self.culprit = agent
        self.state = board
        self.move = move

    def __repr__(self):
        """
        Wrapper for __str__
        """
        return str(self)

    def __str__(self):
        """
        Conversion to string representation
        """
        return "Illegal Move: %s wants to do %s" % (self.culprit, self.move)


In [None]:
class Board:
    """
    Representation of the game state
    """
    WON = 1
    LOST = 2
    DRAW = 3
    READY = 0
    ONGOING = 4

    def __init__(self, size, agent_ids):
        """
        Constructor

        params:
            - size: Size of the field (single dimension)
            - agentIDs: IDs of the agents playing the game
        """
        self.agent_ids = agent_ids
        self.field = np.zeros((size, size), dtype=np.intp)
        self.lastMove = None
        self.size = size

        # Construction of line templates used for counting lines
        x_template = np.array([(i, 0) for i in range(5)], dtype=np.intp)
        y_template = np.array([(0, i) for i in range(5)], dtype=np.intp)
        x_lines = [x_template + (0, i) for i in range(5)]
        y_lines = [y_template + (i, 0) for i in range(5)]
        cross_lines = [np.array([np.zeros(2, dtype=np.intp) + i for i in range(5)]),
                       np.array([np.array([0, 4], dtype=np.intp) + (i, -i) for i in range(5)])]
        self.lines = []
        self.lines.extend(cross_lines)
        self.lines.extend(x_lines)
        self.lines.extend(y_lines)

    def execute(self, move):
        """
        Execute a move on the board

        params:
            - move: 2 tuple containing agentID and move
        returns:
            - Reference to the modified board
        """
        if self.field[move[1][0], move[1][1]] == 0:
            self.field[move[1][0], move[1][1]] = move[0]
        else:
            raise InvalidMove(move[0], self, move[1])
        self.lastMove = move[1]
        return self

    def __do_it(self, agent_id):
        """
        Helper function to count lines

        params:
            - agentId: ID of the agent lines should be counted for
        returns:
            - list of number of lines in descending order of line length
        """
        line_length = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
        for line in self.lines:
            length = 0
            for cell in line:
                if self.field[cell[0], cell[1]] == agent_id:
                    length += 1
                    continue
                if not self.field[cell[0], cell[1]] == 0:
                    length = 0
                    break
            line_length[length] += 1
        length_list = np.zeros(self.size)
        for i in range(self.size):
            length_list[i] = line_length[self.size - i]
        return length_list

    def get_lines(self, agent):
        """
        Extracts the current feature vector from the board

        params:
            - agent: the agent defining the perspective of the features
        returns:
            - numpy array containing the number of lines of the agents in descending order of length
              concatenated with the number of lines of the oponnent agent multiplied by -1
        """
        agent_list = self.__do_it(agent.i)
        other_id = [i for i in self.agent_ids if i != agent.i][0]
        other_list = self.__do_it(other_id)
        return np.concatenate([agent_list, -1 * other_list])

    def get_free_list(self):
        """
        Returns the list of still possible moves

        returns:
            - List of moves possible on the current board
        """
        return np.array(np.where(self.field == 0), dtype=np.intp).T

    def is_finished(self):
        """
        Checks if game has ended

        returns:
            - True if game ended, False otherwise
        """
        if len(self.get_free_list()) == 0:
            return True
        if self.__do_it(self.agent_ids[0])[0]:
            return True
        if self.__do_it(self.agent_ids[1])[0]:
            return True
        return False

    def get_game_state(self, agent):
        """
        Returns the current game state

        returns:
            - Board.WON, Board.LOST, Board.DRAW if game ended,
            Board.READY if game not yet started, Board.ONGOING otherwise
        """
        if self.is_finished():
            if self.__do_it(agent.i)[0]:
                return Board.WON
            if not len(self.get_free_list()):
                return Board.DRAW
            return Board.LOST
        if len(self.get_free_list()) == self.size ** 2:
            return Board.READY
        return Board.ONGOING

    def render(self, ax):
        """
        Renders the board to an image

        To be called by Game.render! Not for direct use!

        params:
            - ax: a matplotlib axis object
        returns:
            - a list of drawn patches
        """
        output = []

        for line in np.transpose(np.where(self.field == self.agent_ids[0])):
            output.append(Board.token(ax, line, "red"))

        for line in np.transpose(np.where(self.field == self.agent_ids[1])):
            output.append(Board.token(ax, line, "blue"))

        return output

    @staticmethod
    def token(ax, pos, color):
        """
        Renders a token to the board image

        params:
            - ax: a matplotlib axis object
            - pos: the position to render
            - color: the color of the token
        returns:
            - the patch created by the token
        """
        patch = plt.Circle(pos + (0.5, 0.5), radius=0.4, color=color)
        ax.add_patch(patch)
        return patch


In [None]:
class Game:
    """
    Tic-Tac-Toe Game Representation

    The class handles the game logic and runs the games.
    It also allows for visualization of a games result using the render method.
    """

    def __init__(self, size, agent0, agent1):
        """
        Constructor

        params:
            - size: Size of the board (one dimension)
            - agent0: Reference to the first agent
            - agent1: Reference to the second agent
        """
        self.board = Board(size, (agent0.i, agent1.i))
        self.agents = (agent0, agent1)

    def run(self):
        """
        Run the game with supplied agents

        returns:
            - Tuple containing result, boards and agents
            - result: One of Board.WON, Board.LOST, Board.DRAW
            - boards: List of intermediate Board states during the game
            - agents: List of references to the playing agents
        """
        boards = []
        while not self.board.is_finished():
            for agent in self.agents:
                move = agent.next_move(deepcopy(self.board))
                try:
                    self.board.execute(move)
                except InvalidMove:
                    print("Invalid move: %s wants to %s" % (agent, move[1]))
                    return Board.WON if agent.i == self.agents[1].i else Board.LOST, boards
                boards.append(deepcopy(self.board))
                if self.board.is_finished():
                    break
        return self.board.get_game_state(self.agents[0]), boards, self.agents

    def render(self, boards):
        """
        Render multiple board states into a single video

        Params:
            boards: numpy array containing board states

        Returns:
            An animation object of matplotlib
        """
        fig, ax = plt.subplots()

        x = self.board.field.shape[0]
        y = self.board.field.shape[1]

        ax.set_xlim(0, x)
        ax.set_ylim(0, y)

        ax.set_xticklabels([])
        ax.set_xticks(np.arange(1, x - 0.5))
        ax.set_yticklabels([])
        ax.set_yticks(np.arange(1, y - 0.5))
        ax.vlines(range(0, x), 0, y, "grey")
        ax.hlines(range(0, y), 0, x, "grey")

        images = []
        images = functools.reduce(lambda images, b: images + [b.render(ax)], boards, [])

        ArtistAnimation(fig, images, interval=1000)
        plt.show()


In [None]:
class Agent:
    """
    Generic Agent

    Base Class
    """

    def __init__(self, i):
        """
        Constructor:

        params:
            - i: ID of the agent (needs to be unique)
        """
        self.i = i

    def __repr__(self):
        """
        Wrapper for __str__
        """
        return str(self)

    def __str__(self):
        """
        Convert Agent to string using class name and ID
        """
        return "%s %i" % (self.__class__.__name__, self.i)


class RandomAgent(Agent):
    """
    Random Tic-Tac-Toe Agent

    Plays a random move from the list of still allowed moves
    """

    def __init__(self, i):
        """
        Constructor

        params:
            - i: Agent ID
        """
        super().__init__(i)

    def next_move(self, board):
        """
        Compute next move

        selects a move from the boards free list randomly

        params:
            - board: current board state

        returns:
            - the selected move
        """
        moves = board.get_free_list()
        move = moves[np.random.randint(0, len(moves))]
        return self.i, move


class GoodAgent(Agent):
    """
    Good Tic-Tac-Toe Agent

    Tries to enhance its own lines according to length and blocks opponents lines
    """

    def __init__(self, i):
        """
        Constructor

        params:
            - i: Agent ID
            - size: size of the board
        """
        super().__init__(i)
        # Magical weight vector containing the agents logic
        self.w = np.array([100, 8, 4, 2, 1, 32, 16, 8, 4, 2])

    def evaluate(self, board):
        """
        Evaluate a boards quality, see V-function

        params:
            - board: the current board state

        returns:
            A scalar number representing the board quality
        """
        return np.dot(self.w, board.get_lines(self))

    def next_move(self, board):
        """
        Compute next move

        selects the best move based on the resulting boards quality as provided by the evaluate function

        params:
            - board: current board state

        returns:
            - the selected move
        """
        # bestMoveValue = None
        # bestMove = None
        moves = board.get_free_list()
        boards = [deepcopy(board).execute((self.i, move)) for move in moves]
        values = [self.evaluate(board) for board in boards]
        i = np.argmax(values)
        return self.i, moves[i]


class GreedyAgent(GoodAgent):
    """
    Greedy Tic-Tac-Toe Agent

    Tries to extend its own lines without caring for the opponent
    """

    def __init__(self, i):
        """
        Constructor

        params:
            - i: Agent ID
            - size: Size of the board
        """
        super().__init__(i)
        # Magical weight vector containing the agents logic
        self.w = np.array([10000, 1000, 100, 10, 1, 0, 0, 0, 0, 0])


class LearningAgent(GoodAgent):
    """
    Adaptive Tic-Tac-Toe Agent

    Learn a strategy based on the features provided by the board
    """

    def __init__(self, i, eta):
        """
        Constructor

        params:
            - i: Agent ID
            - size: size of the board
        """
        super().__init__(i)
        self.eta = eta
        self.w = np.zeros(2 * 5)

    def learn(self, boards):
        """
        Learn the weight vector based on the boards observed in a game

        TODO: to be implemented by the student

        params:
            - boards: list of boards observed in a game
        returns:
            - learning error
        """
        return 0

    def __str__(self):
        """
        Convert Agent to string using weights and ID
        """
        weights = ""
        for w in self.w:
            weights += f'{w:.2f}, '
        return f"Learning Agent(eta: {self.eta}) ID: {self.i}: {weights[:-2]}"


In [None]:
def compete(agent1, agent2, n):
    """
    Competition between two agents without learning

    params:
        - agent1: reference to first agent
        - agent2: reference to second agent
        - n: number of games
    returns:
        - Directory with result counts (Boards.DRAW, Boards.WON, Boards.LOST)
    """
    results = {Board.DRAW: 0, Board.WON: 0, Board.LOST: 0}
    for i in range(n):
        result, boards, agents = Game(5, agent1, agent2).run()
        results[result] += 1
    print(f"Results:\n\tWon: {results[Board.WON]}\n\tLost: {results[Board.LOST]}\n\tDraw: {results[Board.DRAW]}")
    return results


def train(agent1, agent2, n):
    """
    Competition between two agents with learning

    params:
        - agent1: reference to first agent
        - agent2: reference to second agent
        - n: number of games
    returns:
        - Directory with result counts (Boards.DRAW, Boards.WON, Boards.LOST)
    """
    agents = [agent1, agent2]
    old_r = None
    d_r = None
    l_agents = [i for i in range(len(agents)) if isinstance(agents[i], LearningAgent)]
    e = []
    results = []
    while True:
        e.append(np.zeros(len(l_agents)))
        results.append({Board.DRAW: 0, Board.WON: 0, Board.LOST: 0})
        for i in range(n):
            result, boards, agents = Game(5, agent1, agent2).run()
            for j in l_agents:
                e[-1][j] = agents[j].learn(boards)
            results[-1][result] += 1
        for i in l_agents:
            print(f"{agents[i]} - Error: {e[-1][i]:.2f}")
        print(f"Results: Won: {results[-1][Board.WON]} Lost: {results[-1][Board.LOST]} Draw: {results[-1][Board.DRAW]}")
        if (e[-1] == 0).any():
            break
        if len(e) >= 2:
            if (np.abs(e[-2] - e[-1]) < 1).any():
                break
            d_r = 0
            for key in results[-2]:
                d_r += abs(results[-2][key] - results[-1][key])
            if d_r <= 1:
                break


In [None]:
good, greed = GoodAgent(3), GreedyAgent(4)
game = Game(5, good, greed)
result, boards, agents = game.run()
game.render(boards)

In [None]:
AgentSmith = LearningAgent(1,0.1)
AgentSmith2 = LearningAgent(2,0.5)
print(AgentSmith)
print(AgentSmith2)