In [7]:
import os
from pathlib import Path
import sys

pwd = Path(os.getcwd())
sys.path.append(str(pwd.parent.parent / "gym-checkers-for-thai"))

In [27]:
from checkers.agents.baselines import play_a_game, RandomPlayer
from checkers.game import Checkers
from checkers.agents import Player
from checkers.agents.alpha_beta import MinimaxPlayer, first_order_adv, material_value_adv
from functools import partial
from tqdm import tqdm
import time
import pygad
import numpy as np

In [23]:
def ga_material_value(king_value, man_value, pieces):
    """
    Zero-th order heuristics
    Heuristic based on advantage in material value
    1. Men are worth `man_value` per man
    1. Kings are worth `king_value` per king
    """
    return man_value * len(pieces["men"]) + king_value * len(pieces["kings"])

def ga_board_value(color, advance_bonus, safety_bonus, pieces):
    value = 0
    for piece_type in Checkers.all_piece_types:
        for sq in pieces[piece_type]:
            # XXX Assuming the standard board (empty corner at upper left)
            row = sq // 4
            col = (sq % 4) * 2 + (row + 1) % 2
            # Advanced men
            if piece_type == "men":
                if color == "black":
                    # Greater row is better
                    advancement = row + 1
                else:
                    # White. Smaller row is better
                    advancement = 8 - row
                value += advancement / 8 * advance_bonus
            # Safe pieces
            if row == 0 or row == 7 or col == 0 or col == 7:
                value += safety_bonus
    return value


def ga_tuned_adv(color,
                    king_value,
                    man_value,
                    advance_bonus,
                    safety_bonus,
                    board,
                    turn,
                    last_moved_piece,):
    black_adv = ga_material_value(king_value, man_value, board["black"])
    black_adv += ga_board_value("black", advance_bonus, safety_bonus, board["black"])
    black_adv -= ga_material_value(king_value, man_value, board["white"])
    black_adv -= ga_board_value("white", advance_bonus, safety_bonus, board["white"])
    return black_adv if color == "black" else -black_adv


In [41]:
def run_experiment(king_value, man_value, advance_bonus, safety_bonus, n_matches=10, max_game_len=100, time_limit=10):
    n_wins, n_draws, n_losses = 0, 0, 0
    stime = time.time()
    looper = tqdm(range(n_matches), unit="matches")
    for i in looper:
        ch = Checkers()

        # GA tuned value function
        black_player = MinimaxPlayer(
            'black',
            value_func=partial(ga_tuned_adv, 'black', king_value, man_value, advance_bonus, safety_bonus),
            rollout_order_gen=lambda x: sorted(x),
            search_depth=4,
            seed=i)

        # original value function
        white_player = MinimaxPlayer(
            'white',
            value_func=partial(first_order_adv, 'white', 200, 100, 20, 0),
            rollout_order_gen=lambda x: sorted(x),
            search_depth=4,
            seed=i)

        #modify this function to put our RL model as white
        winner = play_a_game(ch, black_player.next_move, white_player.next_move, max_game_len, is_show_detail=False)

        # Keep scores
        n_wins += 1 if winner == 'black' else 0
        n_draws += 1 if winner is None else 0
        n_losses += 1 if winner == 'white' else 0

        looper.set_postfix(
            wins=n_wins,
            draws=n_draws,
            losses=n_losses,
        )
        
        if time.time() - stime > time_limit*60:
            break
    
    return n_wins, n_draws, n_losses

In [45]:
def fitness_func(ga_instance, solution, solution_idx):
    # print(solution)
    king_value, man_value, advance_bonus, safety_bonus = solution
    n_wins, n_draws, n_losses = run_experiment(king_value, man_value, advance_bonus, safety_bonus, n_matches=1, max_game_len=100, time_limit=2)

    return n_wins-n_losses

In [46]:
ga_instance = pygad.GA(num_generations=50,
                       num_parents_mating=4,
                       fitness_func=fitness_func,
                       sol_per_pop=8,
                       num_genes=4,
                       init_range_low=0,
                       init_range_high=100,
                       parent_selection_type="sss",
                       keep_parents=1,
                       crossover_type="single_point",
                       mutation_type="random",
                       mutation_percent_genes=10)

If you do not want to mutate any gene, please set mutation_type=None.


In [47]:
ga_instance.run()

100%|██████████| 1/1 [00:03<00:00,  3.86s/matches, draws=0, losses=1, wins=0]
100%|██████████| 1/1 [00:16<00:00, 16.07s/matches, draws=1, losses=0, wins=0]
100%|██████████| 1/1 [00:05<00:00,  5.11s/matches, draws=0, losses=1, wins=0]
100%|██████████| 1/1 [00:06<00:00,  6.48s/matches, draws=0, losses=1, wins=0]
100%|██████████| 1/1 [00:07<00:00,  7.03s/matches, draws=0, losses=1, wins=0]
100%|██████████| 1/1 [00:07<00:00,  7.10s/matches, draws=0, losses=1, wins=0]
100%|██████████| 1/1 [00:22<00:00, 22.24s/matches, draws=1, losses=0, wins=0]
100%|██████████| 1/1 [00:05<00:00,  5.32s/matches, draws=0, losses=1, wins=0]
100%|██████████| 1/1 [00:29<00:00, 29.61s/matches, draws=1, losses=0, wins=0]
100%|██████████| 1/1 [00:05<00:00,  5.76s/matches, draws=0, losses=1, wins=0]
100%|██████████| 1/1 [00:08<00:00,  8.52s/matches, draws=0, losses=1, wins=0]
100%|██████████| 1/1 [00:04<00:00,  4.78s/matches, draws=0, losses=0, wins=1]
100%|██████████| 1/1 [00:29<00:00, 29.78s/matches, draws=1, loss

KeyboardInterrupt: 

In [48]:
solution, solution_fitness, solution_idx = ga_instance.best_solution()

print("Parameters of the best solution : {solution}".format(solution=solution))
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))

100%|██████████| 1/1 [00:04<00:00,  4.95s/matches, draws=0, losses=0, wins=1]
100%|██████████| 1/1 [00:05<00:00,  5.02s/matches, draws=0, losses=0, wins=1]
  0%|          | 0/1 [00:05<?, ?matches/s]


KeyboardInterrupt: 