# Zadanie 3 (7 punktów)

Celem ćwiczenia jest imlementacja metody [Minimax z obcinaniem alpha-beta](https://en.wikipedia.org/wiki/Alpha%E2%80%93beta_pruning) do gry  Czwórki (ang. Connect Four).

W trakcie ćwiczenia można skorzystać z reposytorium z implementacją gry [Connect Four udostępnionym przez Jakuba Łyskawę](https://github.com/lychanl/two-player-games). Ewentualnie, można zaimplementować samemu grę Connect Four.

Należy zaimplementować co najmniej dwie heurystyki do ewaluacji planszy.  

Implementację algorytmu Minimax (klasa `MiniMaxSolver`) należy przetestować używając różną głębokość przeszukiwania symulując grę "komputer vs komputer". W eksperymentach należy również zademonstrować różnice pomiędzy heurystykami.  

W ramach zadania można zaimplementować dowolną liczbę dodatkowych metod w klasie `MiniMaxSolver`.

Punktacja:

- Działająca metoda Minimax oraz heurystyki do ewaluacji planszy. - **2 pkt**
- Działająca metoda Minimax z obcinaniem alpha-beta. - **1.5 pkt**
- Analiza jakości solvera w zależności od głębokości przeszukiwania - wykresy. - **2pkt**
    - należy zaimplementować w tym celu prostą wizualizację rozgrywki dwóch agentów
- Jakość kodu. - **1.5pkt**

Aby importowanie elementów z poniższej komórki działało należy umieścić tego notebooka w tym samym folderze co paczkę `two_player_games`:
```
├── LICENSE
├── README.md
├── minimax.ipynb # <<< HERE
├── test
│   ├── __init__.py
│   ├── test_connect_four.py
│   ├── test_dots_and_boxes.py
│   └── test_pick.py
└── two_player_games
    ├── __init__.py
    ├── games
    │   ├── connect_four.py
    │   └── dots_and_boxes.py
    ├── move.py
    ├── player.py
    └── state.py
```

In [50]:
from typing import Tuple, List

from setuptools.command.easy_install import current_umask

from two_player_games.player import Player
from two_player_games.games.connect_four import ConnectFour, ConnectFourMove, ConnectFourState
import numpy as np
from random import choice
from copy import copy
import time

Wielkość planszy

In [51]:
ROW_COUNT = 6
COLUMN_COUNT = 7

In [52]:
class MinMaxSolver:
    def __init__(self, game: ConnectFour, row_count: int, column_count: int, max_player: Player, min_player: Player):
        self._game = game
        self._row_count = row_count
        self._column_count = column_count
        self._max_player = max_player
        self._min_player = min_player
        
        # weights for the amount of player tokens in a segment in the future iteration
        self._heuristic_prizes = {
            "players_one_in_segment": 1,
            "players_two_in_segment": 5,
            "players_three_in_segment": 20,
            "players_four_in_segment": 30,
            "opponent_four_in_segment": 90
        }
    
    def _simulate_move(self, state, move_column):
        state_copy = copy(state)
        state_copy = state_copy.make_move(ConnectFourMove(move_column))
        return state_copy
    
    def get_heuristic(self, current_state: ConnectFourState) -> float:
        heuristics_value = 0

        # Vertical
        for column in range(self._column_count):
            for row in range(self._row_count - 3):
                segment = [current_state.fields[column][row + i] for i in range(4)]
                heuristics_value += self._get_segment_heuristic(segment)

        # Horizontal
        for column in range(self._column_count - 3):
            for row in range(self._row_count):
                segment = [current_state.fields[column + i][row] for i in range(4)]
                heuristics_value += self._get_segment_heuristic(segment)

        # Diagonal
        for column in range(self._column_count - 3):
            for row in range(self._row_count - 3):
                segment = [current_state.fields[column + i][row + i] for i in range(4)]
                heuristics_value += self._get_segment_heuristic(segment)

        # Antydiagonal
        for column in range(self._column_count - 3):
            for row in range(3, self._row_count):
                segment = [current_state.fields[column + i][row - i] for i in range(4)]
                heuristics_value += self._get_segment_heuristic(segment)

        return heuristics_value

    def _get_segment_heuristic(self, segment: List):
        heuristics_value = 0
        
        if segment.count(self._max_player) == 4:
            heuristics_value += self._heuristic_prizes["players_four_in_segment"]
        elif segment.count(self._max_player) == 3 and segment.count(None) == 1:
            heuristics_value += self._heuristic_prizes["players_three_in_segment"]
        elif segment.count(self._max_player) == 2 and segment.count(None) == 2:
            heuristics_value += self._heuristic_prizes["players_two_in_segment"]
        elif segment.count(self._max_player) == 1 and segment.count(None) == 3:
            heuristics_value += self._heuristic_prizes["players_one_in_segment"]
        elif segment.count(self._min_player) == 3 and segment.count(None) == 1:
            heuristics_value -= self._heuristic_prizes["opponent_four_in_segment"]
        elif segment.count(self._min_player) == 1 and segment.count(None) == 3:
            heuristics_value -= self._heuristic_prizes["players_one_in_segment"]

        return heuristics_value

    def is_valid_move(self, current_state, col_index:int) -> bool:
        if current_state.fields[col_index][-1] is None:
            return True
        else:
            return False

    def get_valid_moves(self, current_state):
        return [valid_column for valid_column in range(self._column_count) if
                self.is_valid_move(current_state, valid_column)]
    
    def get_best_move(self, depth: int, solver=1) -> int|None:
        move = None
        # print("get_best_move start")
        # print(self._game)
        # print(self._game.state)
        if solver == 1:
            move, value = self.minimax_alpha_beta(self._game.state, depth)
        elif solver == 2:
            move, value = self.minimax(self._game.state, depth)
        # print(f"get_best_move end, move: {move}, value: {value}")
        # print(self._game)
        # print(self._game.state)
        return move
        
    def minimax_alpha_beta(self, current_state: ConnectFourState, depth: int, alpha=-np.inf, beta=np.inf, 
                           is_maximizing_player=True) -> Tuple[int|None, float]:
        
        # print(f"minimax_alpha_beta, depth: {depth}, alpha: {alpha}, beta: {beta}")

        if current_state.is_finished():
            if current_state.get_winner() == self._max_player:
                return None, 1e6
            elif current_state.get_winner() == self._min_player:
                return None, -1e6
            else:
                return None, 0
        if depth == 0:
            # print(f"heuristic: {self.get_heuristic(current_state)}")
            return None, self.get_heuristic(current_state)

        valid_moves = self.get_valid_moves(current_state)
        # valid_moves = current_state.get_moves() # czy tutaj wziąć z obecnego stanu
        # print(f"valid_moves: {valid_moves}")
        chosen_move = choice(valid_moves)
        
        if is_maximizing_player:
            value = -np.inf
            for valid_move in valid_moves:
                # print(f"valid_move: {valid_move}")
                _, heuristic = self.minimax_alpha_beta(self._simulate_move(current_state, valid_move), depth - 1, alpha, beta, False)

                if heuristic > value:
                    value = heuristic
                    chosen_move = valid_move

                alpha = max(alpha, value)

                if alpha >= beta:
                    break
                    
        else:
            value = np.inf
            for valid_move in valid_moves:
                # print(f"valid_move: {valid_move}")
                _, heuristic = self.minimax_alpha_beta(self._simulate_move(current_state, valid_move), depth - 1, alpha, beta, True)

                if heuristic < value:
                    value = heuristic
                    chosen_move = valid_move

                beta = min(beta, value)

                if alpha >= beta:
                    break

        return chosen_move, value
    
    def minimax(self, current_state: ConnectFourState, depth: int, is_maximizing_player=True) -> Tuple[int|None, float]:
        # print(10*"----")
        # print(f"minimax, depth: {depth}, is_maximizing_player: {is_maximizing_player}")
        # print(current_state)
        # time.sleep(0.25)

        if current_state.is_finished():
            if current_state.get_winner() == self._max_player:
                return None, 1e6
            elif current_state.get_winner() == self._min_player:
                return None, -1e6
            else:
                return None, 0
        if depth == 0:
            return None, self.get_heuristic(current_state)
        
        valid_moves = self.get_valid_moves(current_state)
        chosen_moves = []
        if is_maximizing_player:
            value = -np.inf
            for valid_move in valid_moves:
                # print(f"valid_move: {valid_move}")
                _, heuristic = self.minimax(self._simulate_move(current_state, valid_move), depth - 1, False)
                
                # print(f"heuristic: {heuristic}")
                
                if heuristic > value:
                    value = heuristic
                    chosen_moves.append((valid_move, heuristic))
            
            return max(chosen_moves, key=lambda x: x[1])
        
        else:
            value = np.inf
            for valid_move in valid_moves:
                # print(f"valid_move: {valid_move}")
                _, heuristic = self.minimax(self._simulate_move(current_state, valid_move), depth - 1, True)
                
                # print(f"heuristic: {heuristic}")
                
                if heuristic < value:
                    value = heuristic
                    chosen_moves.append((valid_move, heuristic))
            
            return min(chosen_moves, key=lambda x: x[1])

Rozgrywka

In [53]:
def simulate(row_count: int, column_count: int, max_player_type, min_player_type,
             max_player_depth, min_player_depth, games_number: int, verbose=0):
    max_player_win_number = 0
    min_player_win_number = 0
    max_player = Player("O")
    min_player = Player("X")
    
    game = ConnectFour(size=(column_count, row_count), first_player=max_player, second_player=min_player)
    solver1 = MinMaxSolver(game, row_count, column_count, max_player, min_player)
    solver2 = MinMaxSolver(game, row_count, column_count, min_player, max_player)
    
    moves_sum = 0
    
    for game_index in range(games_number):
        game = ConnectFour(size=(column_count, row_count), first_player=max_player, second_player=min_player)
        solver1._game = game
        solver2._game = game
        
        print(10*"----")
        print("początek gry")
        
        i = 0
        while not game.is_finished():
            if max_player_type == "alpha-beta":
                game.make_move(ConnectFourMove(solver1.get_best_move(max_player_depth, 1)))
            elif max_player_type == "min-max":
                game.make_move(ConnectFourMove(solver1.get_best_move(max_player_depth, 2)))
            
            # print(20*"----")
            # print(f"Move number: {i} by max player done")
            # print(game)
                
            if game.state.is_finished():
                break
            
            if min_player_type == "alpha-beta":
                game.make_move(ConnectFourMove(solver2.get_best_move(min_player_depth, 1)))
            elif min_player_type == "min-max":
                game.make_move(ConnectFourMove(solver2.get_best_move(min_player_depth, 2)))
            
            # print(20*"----")
            # print(f"Move number: {i} by min player done")
            # print(game)
            
            i += 2
        
        moves_sum += i
        
        if verbose != 0:
            print(f"Game number: {game_index}/{games_number}")
            print(f"Moves: {i}")
        if verbose == 2:
            print(game)
        
        if game.get_winner() is not None:
            if game.get_winner().char == max_player.char:
                print(f"Won: {game.get_winner().char}")
                max_player_win_number += 1
            elif game.get_winner().char == min_player.char:
                print(f"Won: {game.get_winner().char}")
                min_player_win_number += 1
    
    draws_number = games_number - max_player_win_number - min_player_win_number
    average_moves_number = moves_sum/games_number
    
    print(20*"----")
    print(f"Games number: {games_number}")
    print(f"Average moves number: {average_moves_number}")
    print(f"Draws: {draws_number} ({(draws_number/games_number)*100}%)")
    print(f"Max player won: {max_player_win_number} ({(max_player_win_number/games_number)*100}%)")
    print(f"Min player won: {min_player_win_number} ({(min_player_win_number/games_number)*100}%)")
    print(20*"----")

In [54]:
simulate(ROW_COUNT, COLUMN_COUNT, "alpha-beta", "alpha-beta", 2, 2, 10, 2)

----------------------------------------
początek gry
Game number: 0/10
Moves: 34
Current player: X
[X][X][ ][O][O][ ][X]
[X][O][ ][X][O][ ][O]
[O][O][O][O][X][ ][X]
[X][X][X][O][O][ ][O]
[X][O][O][X][O][ ][O]
[X][O][X][O][X][X][X]
Won: O
----------------------------------------
początek gry
Game number: 1/10
Moves: 34
Current player: X
[X][X][ ][O][O][ ][X]
[X][O][ ][X][O][ ][O]
[O][O][O][O][X][ ][X]
[X][X][X][O][O][ ][O]
[X][O][O][X][O][ ][O]
[X][O][X][O][X][X][X]
Won: O
----------------------------------------
początek gry
Game number: 2/10
Moves: 34
Current player: X
[X][X][ ][O][O][ ][X]
[X][O][ ][X][O][ ][O]
[O][O][O][O][X][ ][X]
[X][X][X][O][O][ ][O]
[X][O][O][X][O][ ][O]
[X][O][X][O][X][X][X]
Won: O
----------------------------------------
początek gry
Game number: 3/10
Moves: 34
Current player: X
[X][X][ ][O][O][ ][X]
[X][O][ ][X][O][ ][O]
[O][O][O][O][X][ ][X]
[X][X][X][O][O][ ][O]
[X][O][O][X][O][ ][O]
[X][O][X][O][X][X][X]
Won: O
----------------------------------------
poc