In [2]:
import numpy as np
import time
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, random_split
import torch.optim as optim
import matplotlib.pyplot as plt
import sys, os

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(project_root)

from Resources.Model import Model_v8
from Resources.Game import *


##### global parameters

In [9]:
with open('/Users/Philip/Desktop/Projects/RL Chess/MCTS/Game Saves v8_3/stats', 'rb') as f:
    stats = torch.load(f)
stats = stats.int()
global_white_wins = stats[1]
global_black_wins = stats[2]
global_draws = stats[3]

percentage_decisive = (global_white_wins + global_black_wins) / (global_white_wins + global_black_wins + global_draws)
print(percentage_decisive)

tensor(0.3985)


In [10]:
print(global_white_wins)
print(global_black_wins)
print(global_draws)

tensor(59947, dtype=torch.int32)
tensor(60253, dtype=torch.int32)
tensor(181404, dtype=torch.int32)


In [24]:
gamma = 0.98
value_diff_scale = 250
value_diff_scale_early = 5
# games saved in batches to reduce i/o stream
# each batch is a input file and a label file containing [batch_size] individual games
batch_target = 100
batch_size = int(batch_target // percentage_decisive.item()) # so that on average we have [batch_target] decisive games

In [25]:
print(batch_size)

250


##### local counters

In [26]:
white_wins = 0
black_wins = 0
draws = 0

game_count = 0          # counting decisive games
batch_count = 0         # number of batches locally done

In [27]:
first_load = True
initialize_batch = True

batch_white_wins = 0
batch_black_wins = 0
batch_draws = 0

while True:

    t0 = time.time()

    # load newest model initially and for every new batch
    if initialize_batch or first_load:
        model = Model_v8()
        model_saves = os.listdir('../Monte Carlo/Model Saves MC v8_3')
        if len(model_saves) > 0:
            newest_model = max(int(i[6:-8]) for i in model_saves)
            model.load_state_dict(torch.load('../Monte Carlo/Model Saves MC v8_3/model_{}_batches'.format(newest_model)))

        first_load = False

    meta_games = []
    meta_boards_white = []; meta_boards_black = []
    i = 0

    meta_active = []

    for game_ind in range(batch_size):
        meta_games.append( Game() )
        meta_boards_white.append([]); meta_boards_black.append([])
        meta_active.append(True)

    model.eval()

    while True in meta_active:

        if i % 10 == 0:
            print('i = {}, with {} active games '.format(i, meta_active.count(True)))

        i += 1
        full_board_batch = []
        meta_board_batch_sizes = [] # save batch sizes to split model output afterwards

        meta_moves = []

        # go through games, collect positions for value evaluations
        for g, game in enumerate(meta_games):

            if not meta_active[g]:
                meta_board_batch_sizes.append(0)
                meta_moves.append([])
                continue

            meta_moves.append(game.PossibleMoves())

            game_ini = game.copy()
            board_batch = [board_to_tensor(game.pieces)]

            mate = False

            for move in meta_moves[-1]:
                game.PlayMove(move)
                board_batch.append(board_to_tensor(game.pieces))
                game.FlipBoard()
                if game.is_over():
                    mate = True
                    break
                game = game_ini.copy()

            meta_games[g] = game.copy()

            if mate:
                meta_active[g] = False
                meta_board_batch_sizes.append(0)

                game.FlipBoard()

                if i % 2 == 1:
                    meta_boards_white[g].append(board_to_bool_tensor(game.pieces))
                if i % 2 == 0:
                    meta_boards_black[g].append(board_to_bool_tensor(game.pieces))

            if not mate:
                full_board_batch = full_board_batch + board_batch
                meta_board_batch_sizes.append(len(board_batch))

        if len(full_board_batch) == 0:
            break

        # get values of all positions
        full_board_batch = torch.stack(full_board_batch)
        out = model(full_board_batch).detach()
        meta_values = torch.split(out, meta_board_batch_sizes)

        # make moves for all games
        for g, game in enumerate(meta_games):

            if not meta_active[g]:
                continue

            values = meta_values[g]

            if i < 7:
                scale = value_diff_scale_early
            else:
                scale = value_diff_scale
            values_diff = [scale*(values[i] - values[0]) for i in range(1, len(values))]
            move_prob = torch.softmax(torch.Tensor(values_diff), dim=0).numpy()
            chosen_i = np.random.choice(range(len(meta_moves[g])), p=move_prob)
            chosen_move = meta_moves[g][chosen_i]

            game.PlayMove(chosen_move)

            if i % 2 == 1:
                meta_boards_white[g].append(board_to_bool_tensor(game.pieces))
            if i % 2 == 0:
                meta_boards_black[g].append(board_to_bool_tensor(game.pieces))

            game.FlipBoard()

    meta_inputs = []
    meta_labels = []

    for g, game in enumerate(meta_games):
        
        winner = game.get_winner()

        if winner == 'draw':
            draws += 1; batch_draws += 1
            continue
            reward_white = 0;   reward_black = 0

        elif winner == 'white':
            white_wins += 1; batch_white_wins += 1
            reward_white = 1;   reward_black = -1

        elif winner == 'black':
            black_wins += 1; batch_black_wins += 1
            reward_white = -1;  reward_black = 1

        labels_white = [reward_white * gamma**(len(meta_boards_white[g]) - 1 - i) for i in range(len(meta_boards_white[g]))]
        labels_black = [reward_black * gamma**(len(meta_boards_black[g]) - 1 - i) for i in range(len(meta_boards_black[g]))]

        meta_inputs = meta_inputs + meta_boards_white[g] + meta_boards_black[g]
        meta_labels = meta_labels + labels_white + labels_black

    if batch_white_wins + batch_black_wins == 0:
        print('skip')
        continue

    inputs_tens = torch.stack(meta_inputs)
    labels_tens = torch.Tensor(meta_labels)

    print('batch of {} had {} decisive games'.format(batch_size, batch_white_wins + batch_black_wins))

    batch_count += 1

    print('total time = ', time.time() - t0)
    print('with {} decisive games'.format(white_wins + black_wins))

    break

    continue

    with open('/Users/Philip/Desktop/Projects/RL Chess/MCTS/Game Saves v9/stats', 'rb') as f:
        stats = torch.load(f)
    stats = stats.int()
    stats[0] += 1 # batch index
    stats[1] += batch_white_wins
    stats[2] += batch_black_wins
    stats[3] += batch_draws
    torch.save(stats, '/Users/Philip/Desktop/Projects/RL Chess/MCTS/Game Saves v9/stats')

    # print('update index ', stats[0])

    new_batch_index = stats[0]

    print('time: ', time.time() - t0)

    # torch.save(inputs_tens, '/Users/Philip/Desktop/Projects/RL Chess/MCTS/Game Saves v9/inputs_{}'.format(new_batch_index))
    # torch.save(labels_tens, '/Users/Philip/Desktop/Projects/RL Chess/MCTS/Game Saves v9/labels_{}'.format(new_batch_index))

    print('local batches: {} --  w: {}, b: {}, d: {}'.format(batch_count, white_wins, black_wins, draws))

    if batch_count % 5 == 0:
        print(' -- global batches = {} --  w: {}, b: {}, d: {} (total: {})'.format(
            new_batch_index, stats[1], stats[2], stats[3], stats[1] + stats[2] + stats[3]))

    initialize_batch = True

    batch_white_wins = 0
    batch_black_wins = 0
    batch_draws = 0

i = 0, with 250 active games 


i = 10, with 250 active games 
i = 20, with 249 active games 
i = 30, with 243 active games 
i = 40, with 236 active games 
i = 50, with 227 active games 
i = 60, with 214 active games 
i = 70, with 199 active games 
i = 80, with 163 active games 
i = 90, with 133 active games 
i = 100, with 104 active games 
i = 110, with 85 active games 
i = 120, with 70 active games 
i = 130, with 50 active games 
i = 140, with 34 active games 
i = 150, with 24 active games 
i = 160, with 14 active games 
i = 170, with 8 active games 
i = 180, with 6 active games 
i = 190, with 5 active games 
i = 200, with 5 active games 
i = 210, with 2 active games 
i = 220, with 2 active games 
i = 230, with 2 active games 
i = 240, with 1 active games 
i = 250, with 1 active games 
batch of 250 had 100 decisive games
total time =  274.3464708328247
with 100 decisive games


In [28]:
# Test duration of game old way

t0 = time.time()

for i_game in range(batch_size):

    game = Game()
    i = 0
    boards_white = [];  boards_black = []

    model.eval()

    while not game.is_over():
        
        i += 1
        moves = game.PossibleMoves()

        game_ini = game.copy()
        board_batch = [board_to_tensor(game.pieces)]

        mate = False

        for move in moves:
            game.PlayMove(move)
            board_batch.append(board_to_tensor(game.pieces))
            game.FlipBoard()
            if game.is_over():
                mate = True
                chosen_move = move
                game = game_ini.copy()
                break
            game = game_ini.copy()

        if not mate:
            board_tensor = torch.stack(board_batch)
            values = model(board_tensor)
            if i < 7:
                scale = value_diff_scale_early
            else:
                scale = value_diff_scale
            values_diff = [scale*(values[i] - values[0]) for i in range(1, len(values))]
            move_prob = torch.softmax(torch.Tensor(values_diff), dim=0).numpy()
            chosen_i = np.random.choice(range(len(moves)), p=move_prob)
            chosen_move = moves[chosen_i]
            
        game.PlayMove(chosen_move)

        if i % 2 == 1:
            boards_white.append(board_to_bool_tensor(game.pieces))
        if i % 2 == 0:
            boards_black.append(board_to_bool_tensor(game.pieces))

        game.FlipBoard()
    
    winner = game.get_winner()

    if winner == 'draw':
        draws += 1; batch_draws += 1
        continue
        reward_white = 0;   reward_black = 0

    elif winner == 'white':
        white_wins += 1; batch_white_wins += 1
        reward_white = 1;   reward_black = -1

    elif winner == 'black':
        black_wins += 1; batch_black_wins += 1
        reward_white = -1;  reward_black = 1

    labels_white = [reward_white * gamma**(len(boards_white) - 1 - i) for i in range(len(boards_white))]
    labels_black = [reward_black * gamma**(len(boards_black) - 1 - i) for i in range(len(boards_black))]

    inputs_tens = torch.stack(boards_white + boards_black)
    labels_tens = torch.Tensor(labels_white + labels_black)

    if initialize_batch:
        batch_inputs = inputs_tens.clone()
        batch_labels = labels_tens.clone()
        initialize_batch = False
    else:
        batch_inputs = torch.cat((batch_inputs, inputs_tens))
        batch_labels = torch.cat((batch_labels, labels_tens))

    game_count += 1

print('total time = ', time.time() - t0)
print('with {} decisive games'.format(batch_white_wins + batch_black_wins))

total time =  444.49062490463257
with 200 decisive games


In [None]:
# Model_v9:
# 100 consecutive games: 337.908s (34 draws, 66 decisive)
# 100 parallel games: 151.934s 
# 400 parallel games: 545.693s batch of 400 had 132 decisive games


# Model_v8:
# parallel:
# batch of 250 had 100 decisive games
# total time =  274.3464708328247
# consequtive:
# total time =  444.49062490463257
# with 200 decisive games