Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB10

Use reinforcement learning to devise a tic-tac-toe player.

### Deadlines:

* Submission: [Dies Natalis Solis Invicti](https://en.wikipedia.org/wiki/Sol_Invictus)
* Reviews: [Befana](https://en.wikipedia.org/wiki/Befana)

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [1]:
from random import choice
from itertools import combinations
from collections import namedtuple, defaultdict
from copy import deepcopy
import logging

logger = logging.getLogger("Log")
logging.basicConfig()
logger.setLevel(logging.WARNING)

In [2]:
MAGIC = [2, 7, 6, 9, 5, 1, 4, 3, 8]
State = namedtuple('State', ['X', 'O'])

In [3]:
class Player:
    def __init__ (self, numberPlayer, name = "CPU", isHuman=False, isSmart=False):
        self.name = "Player n."+str(numberPlayer)+":"+name
        self.moves = []
        self.states = list()
        self.rewardedStates = defaultdict(int)
        self.isWinner = False
        self.isHuman = isHuman
        self.isSmart = isSmart
        self.isPlayer1 = numberPlayer == 1

    def move(self, move, possibleMoves, state):
        #add new move in moves list and remove it from possible moves list
        self.moves.append(move)
        possibleMoves.remove(move)
        #check what is the player and update correctly the state
        if (self.isPlayer1):
            state.X.add(move)
        else:
            state.O.add(move)
        #add the state in list of all states
        self.states.append(deepcopy(state))
        logger.debug ("Mossa di ", self.name, ": ", move)

    def randomMove (self, possibleMoves, state):
        #choice one move from list of possible moves
        move = choice(list(possibleMoves))
        self.move(move, possibleMoves, state)

    def smartMove (self, possibleMoves, state):
        #smart move if the player was trained
        possibleMovesCopy = deepcopy(possibleMoves)
        max = -9999999999
        while(possibleMovesCopy):
            stateCopy = deepcopy(state)
            possibleMove = possibleMovesCopy.pop()
            #add the possible move correctly in the state
            if (self.isPlayer1):
                #if is player1, add in X state
                stateCopy.X.add(possibleMove)
            else:
                #if is player2, add in O state
                stateCopy.O.add(possibleMove)
            #search the value of the new possible state
            hashable_state_2 = (frozenset(stateCopy.X), frozenset(stateCopy.O))
            #check if the reward of the state is higher then max value
            if (self.rewardedStates[hashable_state_2] > max):
                #assign reward value to max and the possible move to move
                max = self.rewardedStates[hashable_state_2]
                move = possibleMove
        self.move(move, possibleMoves, state)
    
    def human_move (self, possibleMoves, state):
        #if the player is a human, ask the move
        print ("Choose a move: ", possibleMoves)
        move = int(input())
        while not(move in possibleMoves):
            print ("Move not allowed. Moves: ", possibleMoves)
            move = input()
        self.move(move, possibleMoves, state)

    def make_a_move(self, possibleMoves, state):
        #choose the right function to make a move
        if (self.isHuman):
            self.human_move(possibleMoves, state)
        elif (self.isSmart):
            self.smartMove(possibleMoves, state)
        else:
            self.randomMove(possibleMoves, state)

    def train(self):
        for state in self.states:
            #reward/negative reward for all states
            hashable_state = (frozenset(state.X), frozenset(state.O))
            if (self.isWinner):
                self.rewardedStates[hashable_state] += 1
            else:
                self.rewardedStates[hashable_state] += -1
        #an extra reward/negative reward for the final state
        hashable_state = (frozenset(self.states[-1].X), frozenset(self.states[-1].O))
        if (self.isWinner):
            self.rewardedStates[hashable_state] += 1
        else:
            self.rewardedStates[hashable_state] += -1

    def new_match(self):
        #reset parameters for a new game
        self.moves = []
        self.states = list()
        self.isWinner = False

class Game:
    def __init__ (self, player1, player2):
        self.player1 = player1
        self.player2 = player2
        self.magic = [2, 7, 6, 9, 5, 1, 4, 3, 8]
        self.possibleMoves = set(range(1, 9+1))
        self.state = State(set(), set())
        self.states = list()

    def print_board(self):
        """Nicely prints the board"""
        board = ""
        for r in range(3):
            for c in range(3):
                i = r * 3 + c
                if MAGIC[i] in self.player1.moves:
                    board += 'X'
                elif MAGIC[i] in self.player2.moves:
                    board += 'O'
                else:
                    board += '.'
            board += '\n'
        return board
    
    def win (self, player):
        #check if player wins
        if (any(sum(c) == 15 for c in combinations(player.moves, 3))):
            logger.debug (f" {player.name} wins with these moves: {player.moves}")
            player.isWinner = True
            return True
        
    def trainPlayers (self):
        #run train functions of all players
        self.player1.train()
        self.player2.train()

    def exchange_knowledge(self):
        #exchange knowledge between the 2 players because player1 always play firstly in game and player2 play secondly
        self.player1.rewardedStates.update(self.player2.rewardedStates)
        self.player2.rewardedStates.update(self.player1.rewardedStates)

    def controlled_game(self, p1moves, p2moves, isTraining=False):
        #this function can be used to simulate a match given 2 list of moves of 2 player
        #used only in debug phase
        for m1, m2 in zip(p1moves, p2moves):
            self.player1.move(m1, self.possibleMoves, True, self.state)
            logger.debug(self.print_board())
            if (self.win(self.player1)):
                if (isTraining):
                    self.trainPlayers()
                break
            if not(self.possibleMoves):
                logger.debug(" It's a draw")
                break
            self.player2.move(m2, self.possibleMoves, False, self.state)
            logger.debug(self.print_board())
            if (self.win(self.player2)):
                print ("P2 ha vinto")
                if (isTraining):
                    self.trainPlayers()
                break
    
    def new_match (self, isTraining=False):
        #reset parameters for a new game
        self.possibleMoves = set(range(1, 9+1))
        self.state = State(set(), set())
        self.states = list()
        self.player1.new_match()
        self.player2.new_match()
        #play the game
        while self.possibleMoves:
            self.player1.make_a_move(self.possibleMoves, self.state)
            logger.debug(self.print_board())
            if (self.win(self.player1)):
                if isTraining:
                    self.trainPlayers()
                break
            if not(self.possibleMoves):
                logger.debug("It's a draw")
                if isTraining:
                    self.trainPlayers()
                break
            self.player2.make_a_move(self.possibleMoves, self.state)
            logger.debug(self.print_board())
            if (self.win(self.player2)):
                if isTraining:
                    self.trainPlayers()
                break
        logger.debug(f" OK! {self.player1.name} is X and {self.player2.name} is O")            


#create 2 player, p1 and p2
player1 = Player(1)
player2 = Player(2)
#create a new game
game = Game (player1, player2)
for i in range (50_000):
    #play 50.000 matches with training
    game.new_match(True)
    game.exchange_knowledge()

logger.info (sorted(game.player1.rewardedStates.items(), key=lambda e: e[1], reverse=True)[:10])
logger.info (sorted(game.player2.rewardedStates.items(), key=lambda e: e[1], reverse=False)[:10])

#play 50.000 matches between a smart player and a random player
smartWins = 0
randomWins = 0
draws = 0
ngames = 50_000
for i in range (ngames):
    #choose randomly who is the smart player (if is p1 or p2)
    p1smart = choice([True, False])
    if (p1smart):
        game.player1.isSmart = True
        game.player2.isSmart = False
    else:
        game.player2.isSmart = True
        game.player1.isSmart = False
    #play the match and increment the counters
    game.new_match()
    if (game.player1.isSmart and game.player1.isWinner) or (game.player2.isSmart and game.player2.isWinner) :
        smartWins +=1
    elif game.player1.isWinner or game.player2.isWinner:
        randomWins +=1
    else:
        draws += 1

#print statistics
print (f"% smart player wins: {smartWins/ngames*100}")
print (f"% random player wins: {randomWins/ngames*100}")
print (f"% draws: {draws/ngames*100}")

% smart player wins: 83.348
% random player wins: 8.98
% draws: 7.672
