In [1]:
import numpy as np
import time
import random
import matplotlib.pyplot as plt

from Game import *
from ModelGuided_TS_RollingMedian import *
from Model import Model_large, Model_small


from stockfish import Stockfish

##### Setup Model

In [2]:
model = Model_small()
model.load_state_dict(torch.load('Value Function Model Saves v3/model_3160_games'))
model.eval()

Model_small(
  (conv1_2x2): Conv2d(12, 16, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (conv2_4x4): Conv2d(12, 16, kernel_size=(4, 4), stride=(1, 1), padding=(2, 2))
  (conv3_6x6): Conv2d(12, 16, kernel_size=(6, 6), stride=(1, 1), padding=(3, 3))
  (conv4_2x2): Conv2d(16, 16, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (conv5_4x4): Conv2d(16, 16, kernel_size=(4, 4), stride=(1, 1), padding=(2, 2))
  (conv6_6x6): Conv2d(16, 16, kernel_size=(6, 6), stride=(1, 1), padding=(3, 3))
  (conv7): Conv2d(48, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv8): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=256, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=1, bias=False)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)

In [3]:
color = 'black'         # indicate our engine's color
tmax_TS = 30             # [s] calculations time per move for our TS
tmax_stockfish = 100    # [ms] calculation time per move for opponent
stockfish_elo = 100    # elo of test opponent's

stockfish = Stockfish('/Users/Philip/Desktop/Projects/RL Chess/stockfish/16/bin/stockfish')
stockfish.set_elo_rating(stockfish_elo)
stockfish.set_position()

stockfish_moves = []

boards = []
board_tensors = []

if color == 'white':
    i_play = 0
elif color == 'black':
    i_play = 1

game = Game()
next_root = None
child_root = None
i = 0

mat_diffs = []

while not game.is_over():

    print('board: ')
    # print(stockfish.get_board_visual())
    print(game.pieces)

    if i % 2 == i_play:
        # Unguided TS plays a move

        # use prints options for more information or debugging
        # use the scaling factor options to define how we navigate the search tree
        chosen_move, root = ModelGuided_TS(game, model, root=next_root, tmax=tmax_TS, prints=False, factor_wins=2, factor_mat=0.0, 
                factor_value_sum=1, factor_value_indi=1, factor_explore=1e-3)

        boards.append(game.pieces.copy())

        # prepare child root to give next calculation a warm start (re-use current calculations)
        for child in root.children:
            if child.move == chosen_move:
                child_root = child
                break

        # get move in stockfish notation
        if color == 'white':
            stockfish_move = index_to_standard(chosen_move[0]) + index_to_standard(chosen_move[1])
        else:
            flipped_moves = game.FlipPositions([chosen_move[0], chosen_move[1]])
            stockfish_move = index_to_standard(flipped_moves[0]) + index_to_standard(flipped_moves[1])

        print('TS move with {} visits: {} / {}'.format(child_root.visits, chosen_move, stockfish_move))

        mat_diff = game.MaterialDiff()
        mat_diffs.append(mat_diff)

    else:
        # get best move given time constraint from stockfish
        stockfish_move = stockfish.get_best_move_time(tmax_stockfish)

        move_from = standard_to_index(stockfish_move[0:2])
        move_to = standard_to_index(stockfish_move[2:4])

        for poss_move in game.PossibleMoves():
            if move_from == poss_move[0] and move_to == poss_move[1]:
                chosen_move = poss_move
                break

        print('stockfish move: {} / {}'.format(chosen_move, stockfish_move))

    # update move in stockfish game instance 
    stockfish.make_moves_from_current_position([stockfish_move])

    stockfish_moves.append(stockfish_move)
    
    game.PlayMove(chosen_move)


    if i % 2 == i_play:
        board_tensors.append(board_to_tensor(game.pieces.copy()))

    game.FlipBoard()

    i += 1

print('board: ')
print(stockfish.get_board_visual())

winner = game.get_winner()
print('winner: ', winner)

plt.plot(mat_diffs)
plt.title('material difference during game (>0 for TS, <0 for stockfish)')
plt.show()

values = model(torch.stack(board_tensors)).detach().numpy()
plt.plot(values)
plt.title('values')



board: 
[[14 12 13 15 16 13 12 14]
 [11 11 11 11 11 11 11 11]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 1  1  1  1  1  1  1  1]
 [ 4  2  3  5  6  3  2  4]]
stockfish move: ((6, 4), (5, 4), 'pawn') / e2e3
board: 
[[14. 12. 13. 16. 15. 13. 12. 14.]
 [11. 11. 11.  0. 11. 11. 11. 11.]
 [ 0.  0.  0. 11.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.]
 [ 4.  2.  3.  6.  5.  3.  2.  4.]]
TS move with 10046 visits: ((6, 0), (5, 0), 'pawn') / h7h6
board: 
[[14. 12. 13. 15. 16. 13. 12. 14.]
 [11. 11. 11. 11. 11. 11. 11.  0.]
 [ 0.  0.  0.  0.  0.  0.  0. 11.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.]
 [ 1.  1.  1.  1.  0.  1.  1.  1.]
 [ 4.  2.  3.  5.  6.  3.  2.  4.]]
stockfish move: ((7, 6), (5, 5), 'knight') / g1f3
board: 
[[14.  0. 13. 16. 15. 13. 12. 14

KeyboardInterrupt: 

In [4]:
print(root.game.pieces.copy())

[[14.  0.  0. 16. 15. 13. 12. 14.]
 [11. 11. 11. 13. 11.  0. 11.  0.]
 [ 0.  0. 12. 11.  0. 11.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  2.  0.]
 [ 0.  0.  0.  0.  0.  1. 11.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  1.  1.  1.  0.  0.  1.]
 [ 4.  2.  3.  6.  5.  3.  0.  4.]]


In [32]:
print(test_board)

test_game = Game()
game.pieces = test_board.copy()
game.FlipBoard()
test_board_2 = game.pieces.copy()
print(test_board_2)

[[ 0. 16.  0.  0.  0. 14.  0.  0.]
 [ 0. 11. 14.  0.  0. 11. 11.  0.]
 [ 0.  0.  0.  0.  0.  0.  0. 11.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.]
 [ 1. 11.  0.  0.  0.  0.  0.  0.]
 [ 0. 12.  0.  0.  0.  0. 12.  2.]
 [ 4.  1.  0.  1. 15.  0.  3.  0.]
 [ 0.  0.  3.  0.  6.  0.  0.  4.]]
[[14.  0.  0. 16.  0. 13.  0.  0.]
 [ 0. 13.  0.  5. 11.  0. 11. 14.]
 [12.  2.  0.  0.  0.  0.  2.  0.]
 [ 0.  0.  0.  0.  0.  0.  1. 11.]
 [ 0.  0.  0. 11.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  1.  0.  0.  4.  1.  0.]
 [ 0.  0.  4.  0.  0.  0.  6.  0.]]


In [33]:
board_tensor = torch.stack([board_to_tensor(test_board_2)])
print(board_tensor.shape)
print(model(board_tensor).detach().numpy())

torch.Size([1, 12, 8, 8])
[[-0.5117809]]


In [5]:
print(root.value)
for c, child in enumerate(root.children):
    # print(child.move, child.visits, root.values_value_indi[c])
    print(child.move, child.value)

[-0.17429683]
((3, 6), (2, 4), 'knight') [0.20158273]
((6, 2), (4, 2), 'pawn double') [0.20725262]
((5, 0), (4, 0), 'pawn') [0.1594004]
((6, 7), (5, 7), 'pawn') [0.17765297]
((6, 3), (5, 3), 'pawn') [0.16457026]
((6, 1), (5, 1), 'pawn') [0.19432324]
((7, 5), (5, 7), 'bishop') [0.21010248]
((6, 2), (5, 2), 'pawn') [0.18175095]
((3, 6), (1, 5), 'knight') [0.18227968]
((6, 5), (4, 5), 'pawn double') [0.22972205]
((3, 6), (5, 5), 'knight') [0.16762033]
((6, 5), (5, 5), 'pawn') [0.2058292]
((7, 0), (6, 0), 'rook') [0.22084834]
((4, 6), (3, 7), 'pawn') [0.16519904]
((7, 1), (5, 2), 'knight') [0.19383875]
((6, 4), (5, 4), 'pawn') [0.16737323]
((6, 1), (4, 1), 'pawn double') [0.1979411]
((3, 6), (5, 7), 'knight') [0.16750029]
((3, 6), (1, 7), 'knight') [0.16713691]
((7, 5), (6, 6), 'bishop') [0.1672442]
((6, 4), (4, 4), 'pawn double') [0.19757417]
((6, 7), (4, 7), 'pawn double') [0.20734109]
((3, 6), (4, 4), 'knight') [0.09983734]
((6, 3), (4, 3), 'pawn double') [0.16863109]
((7, 7), (7, 6), '

In [6]:
boards[-1]

array([[14., 12., 13., 16., 15., 13., 12., 14.],
       [11., 11., 11.,  0., 11., 11., 11., 11.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0., 11.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 4.,  2.,  3.,  6.,  5.,  3.,  2.,  4.]])

In [5]:
for child in root.children:
    print(child.move)

((6, 7), (4, 7), 'pawn double')
((7, 6), (5, 7), 'knight')
((6, 2), (5, 2), 'pawn')
((7, 6), (5, 5), 'knight')
((7, 1), (5, 0), 'knight')
((6, 3), (4, 3), 'pawn double')
((6, 0), (4, 0), 'pawn double')
((6, 4), (4, 4), 'pawn double')
((6, 7), (5, 7), 'pawn')
((6, 4), (5, 4), 'pawn')
((7, 1), (5, 2), 'knight')
((6, 5), (4, 5), 'pawn double')
((6, 0), (5, 0), 'pawn')
((6, 6), (5, 6), 'pawn')
((6, 2), (4, 2), 'pawn double')
((6, 1), (5, 1), 'pawn')
((6, 5), (5, 5), 'pawn')
((6, 6), (4, 6), 'pawn double')
((6, 1), (4, 1), 'pawn double')
((6, 3), (5, 3), 'pawn')


In [3]:
game = Game()
game.PlayMove(((6, 4), (4, 4), 'pawn double'))
game.FlipBoard()

chosen_move, root = ModelGuided_TS(game, model, root=None, tmax=20, batches=50, prints=False, factor_wins=2, factor_mat=0.0, 
        factor_value_sum=1, factor_value_indi=1, factor_explore=1e-3)

In [13]:
print(root.value)
print(root.game.pieces)
for c, child in enumerate(root.children):
    # print(child.move, child.visits, root.values_value_indi[c])
    # print(child.visits, child.move, child.value, child.matdiff_rollmed.get_median())
    if child.move == ((6, 0), (4, 0), 'pawn double'):
        print(child.game.pieces)
        for c_child, child_child in enumerate(child.children):
            # print(child_child.visits, child_child.move, child_child.matdiff_rollmed.get_median())
            # if child_child.move == ((7, 5), (2, 0), 'bishop'):
            if child_child.move == ((7, 3), (3, 7), 'queen'):
                print(child_child.game.pieces)
    #             print(child_child.value)
                for c_2, child_2 in enumerate(child_child.children):

                    if child_2.move == ((7, 0), (4, 0), 'rook'):
                        
                        print(child_2.game.pieces)

                        for c_3, child_3 in enumerate(child_2.children):


                            # print(child_2.visits, child_2.move, child_2.matdiff_rollmed.get_median())
                            # print(child_child.values_value_sum[c_2])
                            print(child_3.move, child_3.value)

[0.04328359]
[[14. 12. 13. 16. 15. 13. 12. 14.]
 [11. 11. 11.  0. 11. 11. 11. 11.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0. 11.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.]
 [ 4.  2.  3.  6.  5.  3.  2.  4.]]
[[14. 12. 13. 15. 16. 13. 12. 14.]
 [11. 11. 11. 11. 11. 11. 11.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0. 11.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  1.  1.  1.  0.  1.  1.  1.]
 [ 4.  2.  3.  5.  6.  3.  2.  4.]]
[[14. 12. 13. 16.  0. 13. 12. 14.]
 [11. 11. 11.  0. 11. 11. 11. 11.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0. 11.  0.  0.  0.  0.]
 [15.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  1.  1.  1.  1.  1.  1.]
 [ 4.  2.  3.  6.  5.  3.  2.  4.]]
[[14. 12. 13. 15. 16. 13. 12.  0.]
 [11. 11. 11. 11. 11. 11. 11.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0. 14.]
 [ 0

In [7]:
print(root.value)
print(root.game.pieces)
for c, child in enumerate(root.children):
    # print(child.move, child.visits, root.values_value_indi[c])
    # print(child.visits, child.move, child.value, root.values_mat[c])
    if child.move == ((6, 2), (4, 2), 'pawn double'):
        print(child.game.pieces)
        print(child.move, child.visits, root.values_mat[c])

        print('mat value blocks: ')
        print(child.matdiff_sum)
        print(child.matdiff_sum / max(child.visits, 1))
        print(root.matdiff_sum)
        print(root.matdiff_sum / max(root.visits, 1))

        # [np.tanh(child.matdiff_sum / max(child.visits, 1) + self.matdiff_sum / max(self.visits, 1))    
        #                 for child in self.children]

        for c_child, child_child in enumerate(child.children):
            # print(child_child.visits, child_child.move, child_child.value, child.values_mat[c_child])
            print(child_child.move, child_child.visits, child_child.matdiff_sum)
    #         if child_child.move == ((7, 5), (2, 0), 'bishop'):
    #             print(child_child.game.pieces)
    #             print(child_child.value)
    #             for c_2, child_2 in enumerate(child_child.children):
    #                 print(child_2.visits, child_2.move, child_2.value)

[0.04328359]
[[14. 12. 13. 16. 15. 13. 12. 14.]
 [11. 11. 11.  0. 11. 11. 11. 11.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0. 11.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.]
 [ 4.  2.  3.  6.  5.  3.  2.  4.]]
[[14. 12. 13. 15. 16. 13. 12. 14.]
 [11. 11. 11. 11. 11.  0. 11. 11.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0. 11.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  1.  1.  1.  0.  1.  1.  1.]
 [ 4.  2.  3.  5.  6.  3.  2.  4.]]
((6, 2), (4, 2), 'pawn double') 310 0.22204529527837974
mat value blocks: 
70
0.22580645161290322
-16962
-1.3156996587030716
((7, 3), (4, 6), 'queen') 2 0
((7, 4), (6, 4), 'king') 1 0
((6, 2), (4, 2), 'pawn double') 2 0
((7, 5), (4, 2), 'bishop') 1 0
((6, 1), (5, 1), 'pawn') 1 0
((6, 6), (4, 6), 'pawn double') 2 0
((6, 3), (4, 3), 'pawn double') 1 0
((7, 5), (2, 0), 'bishop') 66 -21
((6, 0), (4, 0), 'pawn double') 2 0
((4

In [22]:
g_test = Game()
print(g_test.pieces)
g_test.PlayMove(((6, 5), (5, 5), 'pawn'))
g_test.FlipBoard()
print(g_test.pieces)
g_test.PlayMove(((6, 3), (5, 3), 'pawn'))
g_test.FlipBoard()
print(g_test.pieces)
g_test.PlayMove(((6, 6), (4, 6), 'pawn double'))
g_test.FlipBoard()
print(g_test.pieces)
g_test.PlayMove(((7, 4), (3, 0), 'queen'))

print(g_test.pieces)

board_tensor = torch.stack([board_to_tensor(g_test.pieces)])
print(model(board_tensor).detach().numpy())



[[14 12 13 15 16 13 12 14]
 [11 11 11 11 11 11 11 11]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 1  1  1  1  1  1  1  1]
 [ 4  2  3  5  6  3  2  4]]
[[14. 12. 13. 16. 15. 13. 12. 14.]
 [11. 11.  0. 11. 11. 11. 11. 11.]
 [ 0.  0. 11.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.]
 [ 4.  2.  3.  6.  5.  3.  2.  4.]]
[[14. 12. 13. 15. 16. 13. 12. 14.]
 [11. 11. 11. 11.  0. 11. 11. 11.]
 [ 0.  0.  0.  0. 11.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  1.  0.  0.]
 [ 1.  1.  1.  1.  1.  0.  1.  1.]
 [ 4.  2.  3.  5.  6.  3.  2.  4.]]
[[14. 12. 13. 16. 15. 13. 12. 14.]
 [11.  0.  0. 11. 11. 11. 11. 11.]
 [ 0.  0. 11.  0.  0.  0.  0.  0.]
 [ 0. 11.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.]
 [ 1.  1.  

In [64]:
test_board= np.array(
[[14., 0. ,13., 16. ,15. ,13.,12. ,14.],
 [11. ,11. ,11. ,0. ,11. ,11., 11., 11.],
 [ 0. , 0. , 12. , 0. , 0. ,0. , 0. , 0.],
 [ 0. , 0. , 0. , 11. , 0. ,0. , 0. , 0.],
 [ 0. , 5. , 0. , 1. , 0. , 0. , 0., 0.],
 [ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.],
 [ 1. , 1. , 1. , 0. , 1. , 1. , 1. , 1.],
 [ 4. , 2. , 3. , 6. , 0. , 3. , 2. , 4.]])


board_tensor = torch.stack([board_to_tensor(test_board)])
print(model(board_tensor).detach().numpy())

[[0.08739713]]
