In [1]:
import os
from pathlib import Path
import sys
import time
from functools import partial
from tqdm import tqdm
import copy
import  random

from pprint import pprint

pwd = Path(os.getcwd())
sys.path.append(str(pwd.parent.parent / "gym-checkers-for-thai"))

In [2]:
from checkers.agents.baselines import play_a_game, RandomPlayer
from checkers.game import Checkers
from checkers.agents import Player
from checkers.agents.alpha_beta import MinimaxPlayer, first_order_adv

from player import DeepLearningPlayer
# from model.small_model import GDQL as GDQL
from model.medium_model import GDQL_m as GDQL

import mlflow
import mlflow.pytorch

import matplotlib.pyplot as plt
import seaborn as sns

import torch

import numpy as np

In [3]:
MINIMAX_SEARCH_DEPTH = 2
WEIGHT_FOLDER = pwd / "weights" / f"vs_depth_{MINIMAX_SEARCH_DEPTH}"

N_EPISODES = 100
N_MATCHES_PER_EPS = 50

REWARD_DISCOUNT_FACTOR = 0.95

EPSILON = 0.9
EPSILON_DECAY_FACTOR = 0.999
EPSILON_MIN = 0.33

WIN_REWARD = 100
LOSE_REWARD = -40
DRAW_REWARD = -20

BATCH_SIZE = 256

TARGET_UPDATE = 4 # update target network every TARGET_UPDATE episodes
LEARNING_RATE = 1e-3

In [4]:
try:
    mlflow.end_run()
except:
    pass

In [5]:
mlflow.set_experiment("DQL with gredient descent (medium model)")
mlflow.start_run()
mlflow.log_param("MINIMAX_SEARCH_DEPTH", MINIMAX_SEARCH_DEPTH)
mlflow.log_param("N_EPISODES", N_EPISODES)
mlflow.log_param("N_MATCHES_PER_EPS", N_MATCHES_PER_EPS)
mlflow.log_param("REWARD_DISCOUNT_FACTOR", REWARD_DISCOUNT_FACTOR)
mlflow.log_param("EPSILON", EPSILON)
mlflow.log_param("EPSILON_DECAY_FACTOR", EPSILON_DECAY_FACTOR)
mlflow.log_param("EPSILON_MIN", EPSILON_MIN)
mlflow.log_param("BATCH_SIZE", BATCH_SIZE)
mlflow.log_param("TARGET_UPDATE", TARGET_UPDATE)
mlflow.log_param("LEARNING_RATE", LEARNING_RATE)
mlflow.log_param("WIN_REWARD", WIN_REWARD)
mlflow.log_param("LOSE_REWARD", LOSE_REWARD)
mlflow.log_param("DRAW_REWARD", DRAW_REWARD)

-20

In [6]:
# Create the folder if it doesn't exist
WEIGHT_FOLDER.mkdir(parents=True, exist_ok=True)

In [7]:
online_model = GDQL(lr=LEARNING_RATE)
target_model = GDQL(lr=LEARNING_RATE)
try:
    online_model.load_state_dict(torch.load(WEIGHT_FOLDER / "online_model.pth"))
    target_model.load_state_dict(torch.load(WEIGHT_FOLDER / "target_model.pth"))
except FileNotFoundError:
    print("No weights found, starting from scratch")
except RuntimeError:
    print("Weights are corrupted, starting from scratch")

max_win_rate = 0

for episode in range(N_EPISODES):
    stime = time.time()
    n_wins, n_losses, n_draws = 0, 0, 0
    mean_loss = 0
    DeepLearningPlayer.experience.clear()


    looper = tqdm(range(N_MATCHES_PER_EPS), unit="matches", leave=True, desc=f"Episode {episode+1}")
    for i in looper:
        ch = Checkers()

        black_player = DeepLearningPlayer('black',
                                model=online_model,
                                epsilon=EPSILON,
                                epsilon_decay=EPSILON_DECAY_FACTOR,
                                epsilon_min=EPSILON_MIN,
                                win_reward=WIN_REWARD,
                                lose_reward=LOSE_REWARD,
                                draw_reward=DRAW_REWARD,)

        if MINIMAX_SEARCH_DEPTH == 0:
            # Random player function
            white_player = RandomPlayer('white', seed=i)
        else:
            # Minimax player function
            white_player = MinimaxPlayer('white', 
                                        partial(first_order_adv, 'white', 86, 54.5, 87, 26),
                                        search_depth=MINIMAX_SEARCH_DEPTH)
        
        # push into environment
        winner = play_a_game(ch, black_player.next_move, white_player.next_move, 100, is_show_detail=False)
        if winner == 'black':
            n_wins += 1
            black_player.set_win()
        elif winner == 'white':
            n_losses += 1
            black_player.set_lose()
        else:
            n_draws += 1

        if len(DeepLearningPlayer.experience) > BATCH_SIZE:
            batch_states = random.sample(DeepLearningPlayer.experience, BATCH_SIZE)
            
            # find target, online Q values and compute loss
            loss = 0
            for batch_idx, (state, action, reward, next_state) in enumerate(batch_states):
                online_model.train()
                target_model.eval()

                # find target Q
                if next_state is not None:
                    max_next_state_value = -np.inf
                    ch.restore_state(next_state)
                    available_actions = ch.legal_moves()
                    for available_action in available_actions:
                        model_input = target_model.board2input(next_state[0], 'black', available_action)
                        next_state_value = target_model(model_input)
                        max_next_state_value = max(max_next_state_value, next_state_value)
                    target_q = reward + max_next_state_value * REWARD_DISCOUNT_FACTOR
                else:
                    target_q = reward

                # find online Q
                model_input = online_model.board2input(state[0], 'black', action)
                online_q = online_model(model_input)

                loss += (online_q - target_q) ** 2
            loss /= BATCH_SIZE
            mean_loss += loss.item()
            looper.set_postfix(loss=loss.item(),
                               win_rate=n_wins / (i+1),)

            # compute loss
            online_model.optimizer.zero_grad()
            loss.backward()
            online_model.optimizer.step()

    if episode % TARGET_UPDATE == 0:
        target_model.load_state_dict(online_model.state_dict())
        print("\tTarget model updated")
    print(f"\tWins: {n_wins}, Losses: {n_losses}, Draws: {n_draws}")

    mlflow.log_metric("runl time", time.time() - stime, step=episode)
    mlflow.log_metric("win rate", n_wins / N_MATCHES_PER_EPS, step=episode)
    mlflow.log_metric("draw rate", n_draws / N_MATCHES_PER_EPS, step=episode)
    mlflow.log_metric("mean of mse loss", mean_loss / N_MATCHES_PER_EPS, step=episode)
    if n_wins / N_MATCHES_PER_EPS > max_win_rate:
        max_win_rate = n_wins / N_MATCHES_PER_EPS
        torch.save(online_model.state_dict(), WEIGHT_FOLDER / "online_model.pth")
        torch.save(target_model.state_dict(), WEIGHT_FOLDER / "target_model.pth")
        print(f"\tNew max win rate: {max_win_rate}")
        mlflow.pytorch.log_model(online_model, "models")
        mlflow.log_artifact(WEIGHT_FOLDER / "online_model.pth")
        mlflow.log_artifact(WEIGHT_FOLDER / "target_model.pth")

Weights are corrupted, starting from scratch


Episode 1: 100%|██████████| 50/50 [05:47<00:00,  6.95s/matches, loss=44.6, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 2: 100%|██████████| 50/50 [05:51<00:00,  7.04s/matches, loss=13.4, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 3: 100%|██████████| 50/50 [06:09<00:00,  7.40s/matches, loss=24, win_rate=0]  


	Wins: 0, Losses: 50, Draws: 0


Episode 4: 100%|██████████| 50/50 [06:26<00:00,  7.74s/matches, loss=17, win_rate=0]  


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 5: 100%|██████████| 50/50 [05:59<00:00,  7.18s/matches, loss=13.3, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 6: 100%|██████████| 50/50 [06:04<00:00,  7.29s/matches, loss=14.2, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 7: 100%|██████████| 50/50 [05:44<00:00,  6.90s/matches, loss=8.61, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 8: 100%|██████████| 50/50 [06:08<00:00,  7.37s/matches, loss=10.5, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 9: 100%|██████████| 50/50 [05:54<00:00,  7.10s/matches, loss=15, win_rate=0]  


	Wins: 0, Losses: 50, Draws: 0


Episode 10: 100%|██████████| 50/50 [06:11<00:00,  7.42s/matches, loss=10.7, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 11: 100%|██████████| 50/50 [06:05<00:00,  7.30s/matches, loss=9.37, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 12: 100%|██████████| 50/50 [06:06<00:00,  7.33s/matches, loss=11.8, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 13: 100%|██████████| 50/50 [06:11<00:00,  7.43s/matches, loss=9.9, win_rate=0] 


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 14: 100%|██████████| 50/50 [06:07<00:00,  7.35s/matches, loss=9.89, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 15: 100%|██████████| 50/50 [06:19<00:00,  7.59s/matches, loss=13.1, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 16: 100%|██████████| 50/50 [06:01<00:00,  7.22s/matches, loss=17.7, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 17: 100%|██████████| 50/50 [06:11<00:00,  7.43s/matches, loss=13.4, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 18: 100%|██████████| 50/50 [05:53<00:00,  7.07s/matches, loss=9.9, win_rate=0] 


	Wins: 0, Losses: 50, Draws: 0


Episode 19: 100%|██████████| 50/50 [06:42<00:00,  8.06s/matches, loss=19.1, win_rate=0]


	Target model updated
	Wins: 0, Losses: 48, Draws: 2


Episode 20: 100%|██████████| 50/50 [06:10<00:00,  7.42s/matches, loss=10.9, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 21: 100%|██████████| 50/50 [06:08<00:00,  7.37s/matches, loss=13.1, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 22: 100%|██████████| 50/50 [06:14<00:00,  7.50s/matches, loss=12, win_rate=0]  


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 23: 100%|██████████| 50/50 [06:01<00:00,  7.23s/matches, loss=14.9, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 24: 100%|██████████| 50/50 [06:00<00:00,  7.21s/matches, loss=14.4, win_rate=0]


	Wins: 0, Losses: 49, Draws: 1


Episode 25: 100%|██████████| 50/50 [05:47<00:00,  6.95s/matches, loss=13.3, win_rate=0]


	Target model updated
	Wins: 0, Losses: 49, Draws: 1


Episode 26: 100%|██████████| 50/50 [06:17<00:00,  7.54s/matches, loss=9.25, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 27: 100%|██████████| 50/50 [05:43<00:00,  6.87s/matches, loss=9.28, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 28: 100%|██████████| 50/50 [06:14<00:00,  7.49s/matches, loss=9.12, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 29: 100%|██████████| 50/50 [05:59<00:00,  7.18s/matches, loss=11, win_rate=0]  


	Wins: 0, Losses: 50, Draws: 0


Episode 30: 100%|██████████| 50/50 [05:50<00:00,  7.00s/matches, loss=8.78, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 31: 100%|██████████| 50/50 [06:20<00:00,  7.61s/matches, loss=11, win_rate=0]  


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 32: 100%|██████████| 50/50 [06:19<00:00,  7.59s/matches, loss=13.6, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 33: 100%|██████████| 50/50 [05:46<00:00,  6.94s/matches, loss=9.71, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 34: 100%|██████████| 50/50 [05:47<00:00,  6.95s/matches, loss=14.1, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 35: 100%|██████████| 50/50 [06:15<00:00,  7.52s/matches, loss=11, win_rate=0]  


	Wins: 0, Losses: 50, Draws: 0


Episode 36: 100%|██████████| 50/50 [05:47<00:00,  6.94s/matches, loss=13.3, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 37: 100%|██████████| 50/50 [06:21<00:00,  7.62s/matches, loss=12.6, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 38: 100%|██████████| 50/50 [05:56<00:00,  7.13s/matches, loss=14, win_rate=0]  


	Wins: 0, Losses: 50, Draws: 0


Episode 39: 100%|██████████| 50/50 [06:00<00:00,  7.22s/matches, loss=10.8, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 40: 100%|██████████| 50/50 [06:00<00:00,  7.22s/matches, loss=11, win_rate=0]  


	Target model updated
	Wins: 0, Losses: 49, Draws: 1


Episode 41: 100%|██████████| 50/50 [05:42<00:00,  6.85s/matches, loss=16.6, win_rate=0.02]  


	Wins: 1, Losses: 49, Draws: 0
	New max win rate: 0.02


Episode 42: 100%|██████████| 50/50 [05:48<00:00,  6.96s/matches, loss=12.7, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 43: 100%|██████████| 50/50 [05:56<00:00,  7.14s/matches, loss=14.2, win_rate=0.02]  


	Target model updated
	Wins: 1, Losses: 49, Draws: 0


Episode 44: 100%|██████████| 50/50 [05:51<00:00,  7.04s/matches, loss=11.1, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 45: 100%|██████████| 50/50 [06:29<00:00,  7.79s/matches, loss=15, win_rate=0]  


	Wins: 0, Losses: 50, Draws: 0


Episode 46: 100%|██████████| 50/50 [05:37<00:00,  6.75s/matches, loss=67.3, win_rate=0.02]


	Target model updated
	Wins: 1, Losses: 49, Draws: 0


Episode 47: 100%|██████████| 50/50 [05:53<00:00,  7.07s/matches, loss=13.9, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 48: 100%|██████████| 50/50 [05:42<00:00,  6.85s/matches, loss=16.2, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 49: 100%|██████████| 50/50 [05:56<00:00,  7.13s/matches, loss=15.8, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 50: 100%|██████████| 50/50 [05:52<00:00,  7.06s/matches, loss=11.6, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 51: 100%|██████████| 50/50 [05:55<00:00,  7.11s/matches, loss=11.6, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 52: 100%|██████████| 50/50 [05:41<00:00,  6.83s/matches, loss=12.9, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 53: 100%|██████████| 50/50 [05:54<00:00,  7.09s/matches, loss=13.6, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 54: 100%|██████████| 50/50 [05:58<00:00,  7.17s/matches, loss=16.7, win_rate=0.02]  


	Wins: 1, Losses: 49, Draws: 0


Episode 55: 100%|██████████| 50/50 [05:44<00:00,  6.89s/matches, loss=10, win_rate=0]  


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 56: 100%|██████████| 50/50 [05:46<00:00,  6.93s/matches, loss=15.4, win_rate=0.02]  


	Wins: 1, Losses: 49, Draws: 0


Episode 57: 100%|██████████| 50/50 [06:08<00:00,  7.37s/matches, loss=12, win_rate=0]  


	Wins: 0, Losses: 49, Draws: 1


Episode 58: 100%|██████████| 50/50 [05:52<00:00,  7.04s/matches, loss=16.9, win_rate=0]


	Target model updated
	Wins: 0, Losses: 49, Draws: 1


Episode 59: 100%|██████████| 50/50 [05:40<00:00,  6.81s/matches, loss=6.11, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 60: 100%|██████████| 50/50 [05:45<00:00,  6.91s/matches, loss=5.75, win_rate=0.02]  


	Wins: 1, Losses: 49, Draws: 0


Episode 61: 100%|██████████| 50/50 [05:34<00:00,  6.70s/matches, loss=10.2, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 62: 100%|██████████| 50/50 [05:53<00:00,  7.08s/matches, loss=7.06, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 63: 100%|██████████| 50/50 [06:02<00:00,  7.25s/matches, loss=6.53, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 64: 100%|██████████| 50/50 [06:32<00:00,  7.84s/matches, loss=9.73, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 65: 100%|██████████| 50/50 [05:58<00:00,  7.16s/matches, loss=5.85, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 66: 100%|██████████| 50/50 [05:42<00:00,  6.86s/matches, loss=11, win_rate=0]  


	Wins: 0, Losses: 50, Draws: 0


Episode 67: 100%|██████████| 50/50 [05:51<00:00,  7.03s/matches, loss=8.68, win_rate=0]


	Target model updated
	Wins: 0, Losses: 49, Draws: 1


Episode 68: 100%|██████████| 50/50 [05:44<00:00,  6.89s/matches, loss=14.2, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 69: 100%|██████████| 50/50 [06:11<00:00,  7.42s/matches, loss=9.67, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 70: 100%|██████████| 50/50 [06:26<00:00,  7.73s/matches, loss=11.2, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 71: 100%|██████████| 50/50 [06:05<00:00,  7.32s/matches, loss=8.97, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 72: 100%|██████████| 50/50 [05:46<00:00,  6.94s/matches, loss=7.75, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 73: 100%|██████████| 50/50 [06:02<00:00,  7.25s/matches, loss=8.18, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 74: 100%|██████████| 50/50 [06:08<00:00,  7.38s/matches, loss=8.51, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 75: 100%|██████████| 50/50 [05:47<00:00,  6.94s/matches, loss=10.2, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 76: 100%|██████████| 50/50 [06:01<00:00,  7.23s/matches, loss=5.81, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 77: 100%|██████████| 50/50 [05:47<00:00,  6.96s/matches, loss=8.41, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 78: 100%|██████████| 50/50 [05:46<00:00,  6.93s/matches, loss=12.1, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 79: 100%|██████████| 50/50 [06:04<00:00,  7.28s/matches, loss=9.58, win_rate=0]


	Target model updated
	Wins: 0, Losses: 49, Draws: 1


Episode 80: 100%|██████████| 50/50 [05:51<00:00,  7.02s/matches, loss=9.65, win_rate=0.02]  


	Wins: 1, Losses: 49, Draws: 0


Episode 81: 100%|██████████| 50/50 [05:51<00:00,  7.04s/matches, loss=6.11, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 82: 100%|██████████| 50/50 [05:36<00:00,  6.73s/matches, loss=11.9, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 83: 100%|██████████| 50/50 [06:13<00:00,  7.48s/matches, loss=14, win_rate=0]  


	Wins: 0, Losses: 49, Draws: 1


Episode 84: 100%|██████████| 50/50 [05:45<00:00,  6.90s/matches, loss=12.6, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 85: 100%|██████████| 50/50 [06:21<00:00,  7.63s/matches, loss=8.86, win_rate=0]


	Target model updated
	Wins: 0, Losses: 49, Draws: 1


Episode 86: 100%|██████████| 50/50 [05:53<00:00,  7.08s/matches, loss=10, win_rate=0]  


	Wins: 0, Losses: 50, Draws: 0


Episode 87: 100%|██████████| 50/50 [05:46<00:00,  6.94s/matches, loss=9.51, win_rate=0.02]  


	Wins: 1, Losses: 49, Draws: 0


Episode 88: 100%|██████████| 50/50 [06:20<00:00,  7.61s/matches, loss=13.6, win_rate=0]


	Target model updated
	Wins: 0, Losses: 49, Draws: 1


Episode 89: 100%|██████████| 50/50 [05:22<00:00,  6.46s/matches, loss=10.9, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 90: 100%|██████████| 50/50 [05:17<00:00,  6.34s/matches, loss=9.72, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 91: 100%|██████████| 50/50 [05:23<00:00,  6.48s/matches, loss=14.5, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 92: 100%|██████████| 50/50 [05:16<00:00,  6.32s/matches, loss=10.2, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 93: 100%|██████████| 50/50 [05:32<00:00,  6.65s/matches, loss=12.1, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 94: 100%|██████████| 50/50 [05:06<00:00,  6.13s/matches, loss=8.75, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 95: 100%|██████████| 50/50 [05:16<00:00,  6.34s/matches, loss=8.93, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 96: 100%|██████████| 50/50 [05:21<00:00,  6.43s/matches, loss=11, win_rate=0]  


	Wins: 0, Losses: 50, Draws: 0


Episode 97: 100%|██████████| 50/50 [05:11<00:00,  6.23s/matches, loss=10.5, win_rate=0]


	Target model updated
	Wins: 0, Losses: 50, Draws: 0


Episode 98: 100%|██████████| 50/50 [05:15<00:00,  6.31s/matches, loss=6.57, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 99: 100%|██████████| 50/50 [05:17<00:00,  6.36s/matches, loss=8.29, win_rate=0]


	Wins: 0, Losses: 50, Draws: 0


Episode 100:  74%|███████▍  | 37/50 [04:51<01:42,  7.88s/matches, loss=8.77, win_rate=0]


KeyboardInterrupt: 

In [None]:
mlflow.end_rlun()

In [None]:
ch = Checkers()

black_player = DeepLearningPlayer('black',
                                model=online_model,
                                epsilon=EPSILON,
                                epsilon_decay=EPSILON_DECAY_FACTOR,
                                epsilon_min=EPSILON_MIN,)
# Random player function
white_player = RandomPlayer('white', seed=i)
        
# push into environment
winner = play_a_game(ch, black_player.next_move, white_player.next_move, 100, is_show_detail=True)

_b_b_b_b
b_b_b_b_
_._._._.
._._._._
_._._._.
._._._._
_w_w_w_w
w_w_w_w_
0 turn: black last_moved_piece: None
7 legal moves [(4, 8), (5, 8), (5, 9), (6, 9), (6, 10), (7, 10), (7, 11)]
black moved 6, 10

_b_b_b_b
b_b_._b_
_._._b_.
._._._._
_._._._.
._._._._
_w_w_w_w
w_w_w_w_
1 turn: white last_moved_piece: None
7 legal moves [(24, 21), (24, 20), (25, 22), (25, 21), (26, 23), (26, 22), (27, 23)]
white moved 25, 22

_b_b_b_b
b_b_._b_
_._._b_.
._._._._
_._._._.
._._w_._
_w_._w_w
w_w_w_w_
2 turn: black last_moved_piece: None
8 legal moves [(1, 6), (2, 6), (4, 8), (5, 8), (5, 9), (7, 11), (10, 14), (10, 15)]
black moved 10, 15

_b_b_b_b
b_b_._b_
_._._._.
._._._b_
_._._._.
._._w_._
_w_._w_w
w_w_w_w_
3 turn: white last_moved_piece: None
8 legal moves [(22, 18), (22, 17), (24, 21), (24, 20), (26, 23), (27, 23), (29, 25), (30, 25)]
white moved 27, 23

_b_b_b_b
b_b_._b_
_._._._.
._._._b_
_._._._.
._._w_w_
_w_._w_.
w_w_w_w_
4 turn: black last_moved_piece: None
9 legal moves [(1, 6), (2, 6), (4, 8),