# Exercises 1 - Representing Adversarial Game States

Note that this GameState can do any size and # of players (as long as they are positive).

In [552]:
import random

class GameState: 
    def __init__(self, num_players = 2, size = 3, print_each_move = True): 
        # We represent the Tic-Tac-Toe board as a 3x3 array. 
        # We use 0 to represent an empty square, 1 for Player 1's piece, and 2 for Player 2's piece. 
        self.board = []
        for row in range(size):
            self.board.append([])
            for col in range(size):
                self.board[row].append(0)
        # Player 1 goes first. 
        self.player_to_move = 1 # random.choice(list(range(1, num_players+1)))
        self.num_players = num_players
        self.size = size
        self.print_each_move = print_each_move
        # print(self)
        self.winner = 0
        self.done = False
 
    def make_move(self, row, col): 
        # Make a move at the specified location. 
        # This assumes that the move is valid. 
        assert self.board[row][col] == 0 
        self.board[row][col] = self.player_to_move 
        # Switch the player to move. 
        self.player_to_move += 1
        if self.player_to_move > self.num_players:
            self.player_to_move = 1
        done, winner = self.get_status()
        self.winner = winner
        self.done = done
 
    def get_valid_moves(self): 
        # Return a list of valid moves. 
        # A move is a tuple (row, col). 
        return [(row, col) for row in range(self.size) for col in range(self.size) if self.board[row][col] == 0] 
 
    def is_game_over(self): 
        # Check if the game is over. 
        # This could be done more efficiently, but for simplicity we just check all possibilities. 
        return self.done

    def get_status(self):
        # Quick check
        for player in range(1, self.num_players + 1): 
            for row in range(self.size): 
                if all(self.board[row][col] == player for col in range(self.size)): 
                    return True, player 
            for col in range(self.size): 
                if all(self.board[row][col] == player for row in range(self.size)): 
                    return True, player 
            if all(self.board[i][i] == player for i in range(self.size)): 
                return True, player 
            if all(self.board[i][self.size-1-i] == player for i in range(self.size)): 
                return True, player 

        for row in range(self.size):
            for col in range(self.size):
                if self.board[row][col] == 0:
                    return False, 0 # Game not over yet
            
        return True, 0 # Cat's game
    
    def print(self):
        print(f'{self.player_to_move}/{self.num_players}')
        for row in range(self.size):
            print(self.board[row])
    
    def get_winner(self):
        return self.winner
    
    def pick_move(self):
        vm = self.get_valid_moves()
        mv = random.choice(vm)
        print(f'Player {self.player_to_move} chose move {mv} from valid moves {vm}: ')
        return mv
    
    def play_game(self):
        print('Starting Game!')
        while True:
            if self.done:
                if self.winner == 0:
                    print("Cat's Game!")
                else:
                    print('Game over. Winner: ', self.winner)
                return
            
            mv = self.pick_move()
            
            self.make_move(*mv)
            if self.print_each_move:
                self.print()
            

gs = GameState(print_each_move=True)
gs.play_game()

        

Starting Game!
Player 1 chose move (2, 2) from valid moves [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]: 
2/2
[0, 0, 0]
[0, 0, 0]
[0, 0, 1]
Player 2 chose move (1, 2) from valid moves [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1)]: 
1/2
[0, 0, 0]
[0, 0, 2]
[0, 0, 1]
Player 1 chose move (1, 1) from valid moves [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (2, 0), (2, 1)]: 
2/2
[0, 0, 0]
[0, 1, 2]
[0, 0, 1]
Player 2 chose move (0, 2) from valid moves [(0, 0), (0, 1), (0, 2), (1, 0), (2, 0), (2, 1)]: 
1/2
[0, 0, 2]
[0, 1, 2]
[0, 0, 1]
Player 1 chose move (0, 1) from valid moves [(0, 0), (0, 1), (1, 0), (2, 0), (2, 1)]: 
2/2
[0, 1, 2]
[0, 1, 2]
[0, 0, 1]
Player 2 chose move (2, 0) from valid moves [(0, 0), (1, 0), (2, 0), (2, 1)]: 
1/2
[0, 1, 2]
[0, 1, 2]
[2, 0, 1]
Player 1 chose move (2, 1) from valid moves [(0, 0), (1, 0), (2, 1)]: 
2/2
[0, 1, 2]
[0, 1, 2]
[2, 1, 1]
Game over. Winner:  1


# Exercises 2 - Minimax and Adversarial Game States

## Ex 1 - Copy(), __str__()

In [554]:


# Test copying
a = [[1,2],[3,4]]
print(a)
b = a
print(b)
b[0][0]=5
print(a, b)
import copy as cp
c = cp.copy(b)
c[0][1] = 7
print(a,b,c)
d = cp.deepcopy(c)
d[1][0] = 9
print(a,b,c,d)


g1 = GameState()
print(str(g1), repr(g1))

print(str(a))


# Update print() to use a toString method
def game_state_str(self):
    s = f'{self.player_to_move}/{self.num_players}\n'
    for row in range(self.size):
       s += str(self.board[row])
       s += "\n" 
    return s

# augment or update methods of the GameState class
# GameState.evaluate = evaluate
# GameState.minimax = minimax
GameState.copy = cp.deepcopy
GameState.__str__ = game_state_str
GameState.print = lambda self: print(str(self))

# Verify that after copy, the boards are independent
g2 = GameState(print_each_move=False)
print(str(g2))
g3 = g2.copy()
g2.play_game()
print(g2, g3)
g3.play_game()
print(g2,g3)



[[1, 2], [3, 4]]
[[1, 2], [3, 4]]
[[5, 2], [3, 4]] [[5, 2], [3, 4]]
[[5, 7], [3, 4]] [[5, 7], [3, 4]] [[5, 7], [3, 4]]
[[5, 7], [3, 4]] [[5, 7], [3, 4]] [[5, 7], [3, 4]] [[5, 7], [9, 4]]
1/2
[0, 0, 0]
[0, 0, 0]
[0, 0, 0]
 <__main__.GameState object at 0x116c4a5d0>
[[5, 7], [3, 4]]
1/2
[0, 0, 0]
[0, 0, 0]
[0, 0, 0]

Starting Game!
Player 1 chose move (0, 0) from valid moves [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]: 
Player 2 chose move (1, 2) from valid moves [(0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]: 
Player 1 chose move (2, 1) from valid moves [(0, 1), (0, 2), (1, 0), (1, 1), (2, 0), (2, 1), (2, 2)]: 
Player 2 chose move (1, 0) from valid moves [(0, 1), (0, 2), (1, 0), (1, 1), (2, 0), (2, 2)]: 
Player 1 chose move (0, 1) from valid moves [(0, 1), (0, 2), (1, 1), (2, 0), (2, 2)]: 
Player 2 chose move (2, 2) from valid moves [(0, 2), (1, 1), (2, 0), (2, 2)]: 
Player 1 chose move (2, 0) from valid moves [(0, 2), (1, 1), (2, 0)]: 
Pla

## Ex 1b - Adversarial Minimax (no pruning)

In [1]:
def indent(depth):
    s = '\n'
    for _ in range(depth):
        s += '\t'
    return s

class MinimaxGameState(GameState): 
    def __init__(self, size = 3, print_each_move = False, max_depth = 2, print_each_minimax = False):
        if max_depth <= 0:
            raise ValueError('max depth has to be > 0')
        super().__init__(2, size, print_each_move)
        self.max_depth = max_depth
        self.depth = 0
        self.print_each_minimax = print_each_minimax

    # override
    def pick_move(self):
        _, mv = self.minimax()
        print(f'Player {self.player_to_move} chose move {mv}: ')
        return mv
        
    def evaluate(self): 
        if self.get_winner() == 2:
            return -1
        if self.get_winner() == 1:
            return 1
        return 0

    # returns best_score, best_move (or None)
    def minimax(self):
        if self.num_players > 2:
            raise ValueError(f"Can't do minimax with more than 2 players ({num_players})")

        if self.print_each_minimax:
            print('minimax state: ', self.game_state_str())
            
        # If we've reached the maximum depth or the game is over, return the score. 
        if self.depth == self.max_depth or self.is_game_over(): 
            # print(f"{self.indent()} player {self.player_to_move} detects that the winner is player {self.winner}")
            val = self.evaluate()
            if self.print_each_minimax or val != 0:
                print(f"{self.indent()} Eval: {val}")
            return val, None 

        # Initialize the best score and best move. 
        # Player 1 wants to maximize the score. 
        # Player 2 wants to minimize the score. 
        sign = 1 if self.player_to_move == 1 else -1
        best_score = -float('inf') * sign # negative infinity for player 1, positive infinity for player 2
        best_move = None 

        # Try all possible moves. 
        for move in self.get_valid_moves(): 
            new_state = self.copy()  # copy() is a helper method that clones the current state. 
            new_state.make_move(*move) 
            new_state.depth += 1
            score, _ = new_state.minimax() 
            if self.print_each_minimax or score != 0:
                print(game_state_str(new_state))
                print(f"{self.indent()} player {self.player_to_move}: move {move} returned score {score}")

            # Update the best score and best move. 
            # TODO - randomly pick a move out of moves that are all equal?
            if sign * score > sign * best_score: 
                if self.print_each_minimax:
                    print(f"{self.indent()} player {self.player_to_move} updates best move to: {move}")
                best_score = score 
                best_move = move 

        return best_score, best_move 

    def indent(self):
        return indent(self.depth)
    
    # Update print() to use a toString method
    def game_state_str(self):
        s = self.indent()
        s += f'{self.player_to_move}/{self.num_players}\t Game Over: {self.done}\tWinner:{self.winner}'
        for row in range(self.size):
            s += self.indent()
            s += str(self.board[row])
        return s  
    
    def print(self):
        print(self.game_state_str())
    
mg = MinimaxGameState(max_depth=5, print_each_move = True)
mg.play_game()

# x = indent(4)
# print(x, 'hi')

IndentationError: unexpected indent (1590562969.py, line 68)

### Analysis
First, it's a bit slow, because there's no pruning.
Second... shouldn't we be able to get a win in just 5 moves?

```
[1, 2, 0]
[1, 2, 0]
[1, 0, 0]
```

Shouldn't the algorithm be able to find and detect that?

Also... There's no randomization of visitation order, so you always get the same result with this particular algorithm. 

### Update
I had broken my `is_game_over()` algorithm: it treated any board that had an open spot as not a "done" game -- even if one player had one.
So, now my minimax actually will end without analyzing the entire board

### Results
Again, there's no pruning yet.

#### Depths 1-3:
In a depth-first search, this is the very first winning state.
But with limited depth to search, player 2 isn't able to detect this possibility, so player 1 wins.
```
[1, 2, 1]
[2, 1, 2]
[1, 0, 0]
```

#### Depth 4:
Apparently, player 2 was able to detect a win for player 1 and so it disallowed it and was able to win!
```
[1, 2, 1]
[2, 2, 1]
[1, 2, 0]
```

#### Depth 5:
Player 1 gets it
```
[1, 2, 2]
[1, 1, 2]
[1, 0, 0]
```

#### Depth 6:
Back to the traditional win for player 1.
```
[1, 2, 1]
[2, 1, 2]
[1, 0, 0]
```

#### Depth 7?
I'm gonna go ahead and implement pruning now.
Actually, player 1 won again
```
[1, 2, 1]
[2, 1, 2]
[1, 0, 0]
```

But *now* I'm gonna do pruning.

## Ex 2 - Alpha/Beta Pruning

## Ex 3 - Adversarial *WITH* Pruning

# Exercises 3 - Minimax with Alpha-Beta Pruning

# Exercises 4 - Monte Carlo Tree Search in Adversarial Game States