Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: Policy Search

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The player **taking the last object wins**.

* Task3.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task3.2: An agent using evolved rules
* Task3.3: An agent using minmax
* Task3.4: An agent using reinforcement learning

## Instructions

* Create the directory `lab3` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.

**Deadline**

T.b.d.


In [542]:
import logging
import random
import numpy as np
import functools
from typing import Callable
from itertools import accumulate
from copy import deepcopy
from operator import xor
from collections import namedtuple

random.seed(42)

## NIM Game 

In [543]:

Nimply = namedtuple("Nimply", "row, num_objects")

In [544]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i*2 + 1 for i in range(num_rows)]
        self._k = k
    
    def __str__(self):
        return f"{self._rows}"

    def nimming(self, row: int, num_objects: int) -> None:
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        assert num_objects > 0, f"You have to pick at least one"
        self._rows[row] -= num_objects
        if sum(self._rows) == 0:
            logging.debug("Yeuch")
    
    def nimming2(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

In [545]:
def nim_sum(rows: list) -> int:
    # List XOR
    # Using reduce() + lambda + "^" operator
    res = functools.reduce(lambda x, y: x ^ y, rows)
    return res


## Task3.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)

### Creating the NIM Table 

In [546]:
N_ROWS = 5
GAMEOVER = [0 for _ in range(N_ROWS)]
K = None

Table = Nim(N_ROWS, K)


In [547]:
def cook_status(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["nim_sum"] = nim_sum(state.rows)

    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming2(m)
        brute_force.append((m, nim_sum(tmp.rows)))
    cooked["brute_force"] = brute_force

    return cooked

### Player:

#### Winning strategy (Expert Agent)

To understand the algorithm of the winning strategy, look at [*Nim*](https://en.wikipedia.org/wiki/Nim)!

In [548]:
class Player:
    def __init__(self, strategy = 'best') -> None:
        # Two parts for the best strategy:
        # 0 -> before all rows have one element
        # 1 -> after all rows have one element
        self._best_strategy = 0

        assert strategy in ['best', 'best_prof', 'pure_random'], f"Strategy non-available"
        self._strategy = strategy

    def moves(self, Nim):
        if self._strategy == 'best':
            return self.best_strategy(Nim)
        elif self._strategy == 'best_prof':
            return self.best_strategy_by_prof(Nim)
        elif self._strategy == 'pure_random':
            return self.pure_random(Nim)
        else: 
            assert f"Can't use a strategy"

        return

    def pure_random(self, Nim):

        # The opponent choose randomly a non-empty row 
        nonzeroind = np.nonzero(Nim.rows)[0]
        random_row = random.choice(nonzeroind)

        # The opponen choose to remove a random number of elements
        if Nim._k == None:
            random_elements = random.randint(1,Nim.rows[random_row])
        else:
            random_elements = random.randint(1,min(Nim._k,Nim.rows[random_row]))

        logging.debug(f"Opponent:   <Row: {random_row}- Elements: {random_elements}>")
        # Update table
        Nim.nimming(random_row, random_elements)

        return
        
    def best_strategy(self, Nim):

        # If all the elements are equal or less then k, we can play the 'normal' nim game
        if Nim._k != None and all(v <= Nim._k for v in Nim._rows):
            Nim._k = None

        if Nim._k != None:
            
            # Try brute force:
            for ind, row in enumerate(Nim._rows):

                for elements in range(1, min(row + 1, Nim._k + 1)):
                    # Reset temp_rows
                    temp_rows = Nim._rows.copy()
                    
                    # See if nim_sum == 0
                    temp_rows[ind] -= elements
                    if nim_sum(temp_rows) == 0:

                        # Update table
                        Nim.nimming(ind, elements)
                        return
                        
            
            equal_grater_than_k_ind = [i for i,v in enumerate(Nim._rows) if v >= Nim._k + 1]
            
            random_row = random.choice(equal_grater_than_k_ind)
            elements = Nim._rows[random_row]%(Nim._k+1) 
            

            if elements == 0:
                elements = 1
            # Update table
            Nim.nimming(random_row, elements)
            return

        # If there is only one element greater to one, the agent picks a number of object to make
        # all the rows of the table equal to 1.
        # He can choose to remove all the objects or all the objects but one from the rows with n>1
        if sum(x >= 2 for x in Nim._rows) == 1:
            # Row with more than one element
            equal_grater_than_two_ind = [i for i,v in enumerate(Nim._rows) if v >= 2][0]

            # Change of strategy
            self._best_strategy = 1

            
            # To win, the remaing number of objects has to be even 
            if (sum(x for x in Nim._rows) - Nim._rows[equal_grater_than_two_ind]) % 2 == 0 :
                logging.debug(f"Agent:   <Row: {equal_grater_than_two_ind}- Elements: {Nim._rows[equal_grater_than_two_ind]}>")
                # Update table
                Nim.nimming(equal_grater_than_two_ind, Nim._rows[equal_grater_than_two_ind])
                
            else:

                logging.debug(f"Agent:   <Row: {equal_grater_than_two_ind}- Elements: {Nim._rows[equal_grater_than_two_ind] - 1}>")
                # Update table
                Nim.nimming(equal_grater_than_two_ind, Nim._rows[equal_grater_than_two_ind]-1)
            
            return
        
        # Strategy before all rows have one element
        if self._best_strategy == 0:    
        
            res = nim_sum(Nim._rows)

            for ind, row in enumerate(Nim._rows):

                if row == 0:
                    continue

                if row ^ res < row:
                    logging.debug(f"Agent:  rows = {Nim._rows}")
                    elements = row - (row ^ res)
                    logging.debug(f"Agent:   xor = {row ^ res}")
                    logging.debug(f"Agent:   <Row: {ind}- Elements: {elements}>")
                    # Update table
                    Nim.nimming(ind, elements)

                    return
        
        # Strategy after all rows have one element
        else:

            nonzeroind = np.nonzero(Nim._rows)[0]
            random_row = random.choice(nonzeroind)


            logging.debug(f"Agent:   <Row:{random_row}- Elements: {1}>")
            # Update table
            Nim.nimming(random_row, 1) 
            return
                 

        # Default move -> Random
        nonzeroind = np.nonzero(Nim._rows)[0]
        random_row = random.choice(nonzeroind)

        if Nim._k == None:
            random_elements = random.randrange(1,Nim._rows[random_row] + 1)
        else:
            random_elements = random.randrange(1,min(Nim._k,Nim._rows[random_row])+1)

        logging.debug(f"Agent:   <Row:{random_row}- Elements: {random_elements}>")
        # Update table
        Nim.nimming(random_row, random_elements)    
        return

    def best_strategy_by_prof(self, state: Nim) -> Nimply:
        data = cook_status(state)
        state.nimming2(next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0])
        return 


### Single Match

In [549]:
def single_match(agent_strategy = 'best', opponent_strategy = 'pure_random'):
    
    agent = Player(agent_strategy)
    opponent = Player(opponent_strategy)

    # 0 -> Agent's turn
    # 1 -> Opponent's turn
    turn = 1

    # Game
    while Table._rows != GAMEOVER:
        
        if turn == 0:
            agent.moves(Table)
        else:
            opponent.moves(Table)
        
        turn = 1 - turn
        

    # Game Over

    if turn == 1:
        print(f"Agent WON the match")
    else:
        print(f"Opponent WON the match")
    
    return



### Multiple Games

In [550]:
NUM_MATCHES = 1000


def evaluate(agent_strategy = 'best', opponent_strategy = 'pure_random') -> float:
    
    
    won = 0
    start = 0
    for m in range(NUM_MATCHES):
        agent = Player(agent_strategy)
        opponent = Player(opponent_strategy)

        
        nim = Nim(N_ROWS, 7)
        
        
        # 0 -> Agent's turn
        # 1 -> Opponent's turn
        turn = start

        # the first move is equally distributed within matches
        start = 1 - start 
        
        # turn = random.randint(0,1)

        # Game
        while nim._rows != GAMEOVER:
            logging.debug(f"Actual turn: {turn}")

            logging.debug(f"Table: {nim} and Nim_sum: {nim_sum(nim._rows)}")
            if turn == 0:
                agent.moves(nim)
                
            else:
                opponent.moves(nim)

            logging.debug(f"Table: {nim} and Nim_sum: {nim_sum(nim._rows)}\n")

            
            turn = 1 - turn
        
        # Game Over

        if turn == 1:
            won +=1
        else:
            print(f"Game Lost by the agent is the n°{m}")
            
        
    return won / NUM_MATCHES


In [551]:
logging.getLogger().setLevel(logging.INFO)

# print(f"Agent Won: {evaluate()*100}% of the games")
# print(f"Agent Won: {evaluate(agent_strategy='best_prof')*100}% of the games")
print(f"Agent Won: {evaluate(agent_strategy='best', opponent_strategy='best_prof')*100}% of the games")

Game Lost by the agent is the n°7
Game Lost by the agent is the n°27
Game Lost by the agent is the n°35
Game Lost by the agent is the n°41
Game Lost by the agent is the n°49
Game Lost by the agent is the n°89
Game Lost by the agent is the n°99
Game Lost by the agent is the n°105
Game Lost by the agent is the n°115
Game Lost by the agent is the n°117
Game Lost by the agent is the n°119
Game Lost by the agent is the n°123
Game Lost by the agent is the n°149
Game Lost by the agent is the n°151
Game Lost by the agent is the n°153
Game Lost by the agent is the n°161
Game Lost by the agent is the n°171
Game Lost by the agent is the n°173
Game Lost by the agent is the n°189
Game Lost by the agent is the n°197
Game Lost by the agent is the n°201
Game Lost by the agent is the n°231
Game Lost by the agent is the n°241
Game Lost by the agent is the n°243
Game Lost by the agent is the n°245
Game Lost by the agent is the n°247
Game Lost by the agent is the n°263
Game Lost by the agent is the n°267
