In [1]:
from rchess import Board
import numpy as np

In [9]:
def as_str(value):
    if value < 0:
        return f"{value:.6f}"
    else:
        return f" {value:.6f}"

def as_percentage(value):
    str = f"{value*100:.2f}%"
    for _ in range(7-len(str)):
        str = " " + str
    return str

print("%-16s %-10s %-14s %-16s %-16s %-18s %-8s" %("Move", "Visits", "Policy", "Avg. value", "UCB", "Q+U", "Raw NN Value"))
print("-" * 110)
move = "e2e4"
action = 42123
visits = 578
policy = as_percentage(0.10008979)
avg_value = as_str(-0.08786678678)
ucb = as_str(1.014844)
raw_nn_value = as_str(-0.0652234345)
print("%-5s (%-6s)   %-10s %-14s %-16s %-16s %-18s %-13s" %(move, action, f"N: {visits}", f"(P: {policy})", f"(Q: {avg_value})", f"(U: {ucb})", f"(Q+U: {ucb})", f"(V: {raw_nn_value})"))


Move             Visits     Policy         Avg. value       UCB              Q+U                Raw NN Value
--------------------------------------------------------------------------------------------------------------
e2e4  (42123 )   N: 578     (P:  10.01%)   (Q: -0.087867)   (U:  1.014844)   (Q+U:  1.014844)   (V: -0.065223)


In [1]:
from model import load_as_trt_model
import numpy as np
trt_func, model = load_as_trt_model()

from rchess import Board
import timeit
from mcts import find_best_move, debug_search


2025-01-09 20:53:58.366389: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-01-09 20:53:59.858326: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6008 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2070 SUPER, pci bus id: 0000:26:00.0, compute capability: 7.5
2025-01-09 20:54:00.504874: E tensorflow/compiler/tf2tensorrt/utils/trt_logger.cc:87] DefaultLogger 3: [runtime.cpp::~Runtime::346] Error Code 3: API Usage Error (Parameter check failed at: runtime/rt/runtime.cpp::~Runtime::346, condition: mEngineCounter.use_count() == 1. Destroying a runtime before destroying deserialized engines created by the runtime leads to undefined behavior.
)


In [6]:
board = Board()
board.push_uci("e2e4")
board.push_uci("e7e5")
board.push_uci("f1c4")
board.push_uci("b8c6")
board.push_uci("d1h5")
#board.push_uci("g8f6")
root = None
move, root, child_visits = find_best_move(board, root, trt_func, 10000, True)
debug_search(board, root)


  +---+---+---+---+---+---+---+---+
  | r |   | b | q | k | b | n | r | 8
  +---+---+---+---+---+---+---+---+
  | p | p | p | p |   | p | p | p | 7
  +---+---+---+---+---+---+---+---+
  |   |   | n |   |   |   |   |   | 6
  +---+---+---+---+---+---+---+---+
  |   |   |   |   | p |   |   | Q | 5
  +---+---+---+---+---+---+---+---+
  |   |   | B |   | P |   |   |   | 4
  +---+---+---+---+---+---+---+---+
  |   |   |   |   |   |   |   |   | 3
  +---+---+---+---+---+---+---+---+
  | P | P | P | P |   | P | P | P | 2
  +---+---+---+---+---+---+---+---+
  | R | N | B |   | K |   | N | R | 1
  +---+---+---+---+---+---+---+---+
    a   b   c   d   e   f   g   h

Fen: r1bqkbnr/pppp1ppp/2n5/4p2Q/2B1P3/8/PPPP1PPP/RNB1K1NR b KQkq - 3 3
Key: 73837f494a5e1e15
Elapsed time: 1.11s
Visits: 10000
Eval:  0.471244
Move             Visits     Policy         Avg. value       UCB              Q+U                Raw NN Value
------------------------------------------------------------------------------------

In [10]:
num_sims = 800
#Average time 0.06498299805999977
def find_one():
    board = Board()
    root = None
    move, root, child_visits = find_best_move(board, root, trt_func, num_sims, False)

# Time with timeit
time = timeit.timeit(find_one, number=50)
print("Average time", time / 50)

Average time 0.054438061000000745


In [11]:
import pstats, cProfile
#import mcts
board = Board()
cProfile.runctx("find_best_move(board, None, trt_func, 15000, True)", globals(), locals(), "Profile.prof")

s = pstats.Stats("Profile.prof")
s.strip_dirs().sort_stats("time").print_stats()

Thu Jan  9 20:52:23 2025    Profile.prof

         3705837 function calls (3698318 primitive calls) in 2.034 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      939    0.329    0.000    0.329    0.000 {built-in method tensorflow.python._pywrap_tfe.TFE_Py_Execute}
    71558    0.261    0.000    0.324    0.000 c.pyx:225(select_child)
      2/1    0.211    0.106    1.984    1.984 c.pyx:64(find_best_move)
      939    0.170    0.000    0.171    0.000 constant_op.py:75(convert_to_eager_tensor)
    15000    0.139    0.000    0.349    0.000 c.pyx:261(evaluate_node)
      939    0.122    0.000    1.014    0.001 c.pyx:210(make_predictions)
   356428    0.112    0.000    0.209    0.000 c.pyx:44(add_child)
   356429    0.079    0.000    0.093    0.000 c.pyx:24(__cinit__)
  1564821    0.059    0.000    0.059    0.000 c.pyx:250(UCB)
        1    0.050    0.050    2.034    2.034 <string>:1(<module>)
    14999    0.027    0.000    0.045

<pstats.Stats at 0x712e22d56c80>

In [None]:
# OLD RESULTS FOR 800 SIMS
def find_one():
    board = Board()
    root = None
    move, root, child_visits = find_best_move(board, root, trt_func, num_sims)

# Time with timeit
time = timeit.timeit(find_one, number=50)
print("Average time", time / 50)

Average time 0.07329234867999958


In [23]:
# OLD RESULTS FOR 800 SIMS
import pstats, cProfile
#import mcts
board = Board()
cProfile.runctx("find_best_move(board, None, trt_func, num_sims)", globals(), locals(), "Profile.prof")

s = pstats.Stats("Profile.prof")
s.strip_dirs().sort_stats("time").print_stats()

Fri Jan  3 07:27:54 2025    Profile.prof

         238772 function calls (232671 primitive calls) in 0.162 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      801    0.027    0.000    0.037    0.000 c.pyx:231(evaluate_node)
     2044    0.019    0.000    0.026    0.000 c.pyx:190(select_child)
       51    0.018    0.000    0.018    0.000 {built-in method tensorflow.python._pywrap_tfe.TFE_Py_Execute}
       51    0.011    0.000    0.067    0.001 c.pyx:178(make_predictions)
      801    0.011    0.000    0.017    0.000 c.pyx:248(expand_node)
      2/1    0.010    0.005    0.163    0.163 c.pyx:58(find_best_move)
       51    0.009    0.000    0.009    0.000 constant_op.py:75(convert_to_eager_tensor)
     1607    0.004    0.000    0.004    0.000 {method 'reduce' of 'numpy.ufunc' objects}
    45584    0.003    0.000    0.003    0.000 c.pyx:18(__get__)
    17776    0.002    0.000    0.002    0.000 c.pyx:41(__setitem__)
      80

<pstats.Stats at 0x7cf5617413c0>

In [6]:
print(board.to_string())


  +---+---+---+---+---+---+---+---+
  | r |   | b | q | k | b |   | r | 8
  +---+---+---+---+---+---+---+---+
  | p | p | p | p | p | p | p | p | 7
  +---+---+---+---+---+---+---+---+
  |   |   |   |   |   |   |   |   | 6
  +---+---+---+---+---+---+---+---+
  |   |   |   |   |   |   |   |   | 5
  +---+---+---+---+---+---+---+---+
  |   |   |   |   | n |   |   |   | 4
  +---+---+---+---+---+---+---+---+
  | P |   | N |   |   | N |   |   | 3
  +---+---+---+---+---+---+---+---+
  |   | P | Q | P | P | P | P | P | 2
  +---+---+---+---+---+---+---+---+
  | R |   | B | K |   | B |   | R | 1
  +---+---+---+---+---+---+---+---+
    a   b   c   d   e   f   g   h

Fen: r1bqkb1r/pppppppp/8/8/4n3/P1N2N2/1PQPPPPP/R1BK1B1R w kq - 3 6
Key: ef69a98e93ec1388


In [2]:
from mcts import SelfplayGame
times = []
for i in range(10):
    game = SelfplayGame()
    for i in range(10):
        t1 = time.time()
        move, child_visits = game.simulate(trt_func, 800)
        t2 = time.time()
        times.append(t2 - t1)
        game.make_move(move)
        terminal, winner = game.board.terminal()
        if terminal:
            print("Game over, winner is", winner)
print("Average time", np.mean(times))

Average time 0.31473344564437866


In [27]:
t1 = time.time()
move, child_visits = game.simulate(trt_func, 800)
t2 = time.time()
print(t2-t1)
game.make_move(move)
terminal, winner = game.board.terminal()
if terminal:
    print("Game over, winner is", winner)

0.4044685363769531


In [16]:
game = SelfplayGame()
t1 = time.time()
while True:
    move, child_visits = game.simulate(trt_func, 800)
    game.make_move(move)
    terminal, winner = game.board.terminal()
    if terminal:
        break
if winner == 1:
    print("White wins")
elif winner == -1:
    print("Black wins")
else:
    print("Draw")
t2 = time.time()
print(game.board.to_string())
print(t2-t1)
print(move)


Draw

  +---+---+---+---+---+---+---+---+
  |   |   |   |   |   |   |   |   | 8
  +---+---+---+---+---+---+---+---+
  |   |   |   |   |   |   |   |   | 7
  +---+---+---+---+---+---+---+---+
  |   |   |   |   |   |   |   |   | 6
  +---+---+---+---+---+---+---+---+
  |   |   |   |   |   |   |   |   | 5
  +---+---+---+---+---+---+---+---+
  |   |   |   |   |   |   |   |   | 4
  +---+---+---+---+---+---+---+---+
  |   |   |   |   |   | Q |   |   | 3
  +---+---+---+---+---+---+---+---+
  |   |   | K |   |   |   |   |   | 2
  +---+---+---+---+---+---+---+---+
  |   |   |   |   | k |   |   |   | 1
  +---+---+---+---+---+---+---+---+
    a   b   c   d   e   f   g   h

Fen: 8/8/8/8/8/5Q2/2K5/4k3 b - - 66 199
Key: d554af9f066be6d5
134.74590635299683
b1c2


In [4]:
import numpy as np
import timeit
import time
from math import sqrt, log

class Node:
    @property
    def Q(self):
        return self.W / self.N if self.N > 0 else 0.0

    def __init__(self):
        self.children = {}
        self.vloss = 0
        self.to_play = None

        # Values for MCTS
        self.N = 0
        self.W = 0.0
        self.P = 0.0

    def __getitem__(self, move: str):
        return self.children[move]
    
    def __setitem__(self, move: str, node):
        self.children[move] = node

    def is_leaf(self):
        return not self.children
    
# Create a dummy node with 30 fake children
dummy_node = Node()
dummy_node.N = 100  # Set parent visit count

for i in range(30):
    child = Node()
    child.N = np.random.randint(1, 100)  # Random visit count for each child
    child.W = np.random.rand() * 100  # Random total value for each child
    child.P = np.random.rand()  # Random prior probability for each child
    dummy_node.children[f"move_{i}"] = child

In [7]:
def select_child(node: Node, pb_c_base: float, pb_c_init: float, pb_c_factor: float, fpu: float):
    bestucb = -np.inf
    bestmove = None
    bestchild = None

    parent_N_sqrt = sqrt(node.N + 1)
    pb_c = log((node.N + pb_c_base + 1) / pb_c_base) * pb_c_factor + pb_c_init

    for move, child in node.children.items():
        if child.N == 0:
            puct = child.Q + pb_c * child.P * parent_N_sqrt / (child.N + 1)
        else:
            puct = fpu
        if puct > bestucb:
            bestucb = puct
            bestmove = move
            bestchild = child

    return bestmove, bestchild

# Create a dummy node with 30 fake children
dummy_node = Node()
dummy_node.N = 100  # Set parent visit count

for i in range(30):
    child = Node()
    child.N = np.random.randint(1, 100)  # Random visit count for each child
    child.W = np.random.rand() * 100  # Random total value for each child
    child.P = np.random.rand()  # Random prior probability for each child
    dummy_node.children[f"move_{i}"] = child

# Benchmark the select_child function
pb_c_base = 19652
pb_c_init = 1.25
pb_c_factor = 1.0
fpu = 0.0#bestmove, bestchild = select_child(dummy_node, pb_c_base, pb_c_init, pb_c_factor, fpu)

num_runs = 1000000
# Benchmark the select_child function using timeit
benchmark_time = timeit.timeit(
    "select_child(dummy_node, pb_c_base, pb_c_init, pb_c_factor, fpu)",
    globals=globals(),
    number=num_runs
)

print(f"Average time taken over {num_runs} runs: {benchmark_time / num_runs} seconds")
print(f"Total time taken: {benchmark_time} seconds")

Average time taken over 1000000 runs: 1.9494292420004056e-06 seconds
Total time taken: 1.9494292420004058 seconds


In [6]:
from mcts.c import select_child

# Create a dummy node with 30 fake children
dummy_node = Node()
dummy_node.N = 100  # Set parent visit count

for i in range(30):
    child = Node()
    child.N = np.random.randint(1, 100)  # Random visit count for each child
    child.W = np.random.rand() * 100  # Random total value for each child
    child.P = np.random.rand()  # Random prior probability for each child
    dummy_node.children[f"move_{i}"] = child

# Benchmark the select_child function
pb_c_base = 19652
pb_c_init = 1.25
pb_c_factor = 1.0
fpu = 0.0#bestmove, bestchild = select_child(dummy_node, pb_c_base, pb_c_init, pb_c_factor, fpu)

num_runs = 1000000
# Benchmark the select_child function using timeit
benchmark_time = timeit.timeit(
    "select_child(dummy_node, pb_c_base, pb_c_init, pb_c_factor, fpu)",
    globals=globals(),
    number=num_runs
)

print(f"Average time taken over {num_runs} runs: {benchmark_time / num_runs} seconds")
print(f"Total time taken: {benchmark_time} seconds")

Average time taken over 1000000 runs: 5.751467379999667e-06 seconds
Total time taken: 5.751467379999667 seconds


In [4]:
# Update and remove vloss
def backup(search_path, value):
    for node in reversed(search_path):
        node.N = node.N - node.vloss + 1
        node.W = node.W + node.vloss + value
        node.vloss = 0
        value = flip_value(value)

# Create a fake search path with at least 30 elements
search_path = [Node() for _ in range(30)]
value = np.random.rand()

# Benchmark the backup function using timeit
backup_time = timeit.timeit(
    "backup(search_path, value)",
    globals=globals(),
    number=num_runs
)

print(f"Average time taken over {num_runs} runs: {backup_time / num_runs} seconds")
print(f"Total time taken: {backup_time} seconds")

Average time taken over 1000000 runs: 6.049426947000029e-06 seconds
Total time taken: 6.049426947000029 seconds


In [6]:
from mcts.c import backup

# Create a fake search path with at least 30 elements
search_path = [Node() for _ in range(30)]
value = np.random.rand()

# Benchmark the backup function using timeit
backup_time = timeit.timeit(
    "backup(search_path, value)",
    globals=globals(),
    number=num_runs
)

print(f"Average time taken over {num_runs} runs: {backup_time / num_runs} seconds")
print(f"Total time taken: {backup_time} seconds")

Average time taken over 1000000 runs: 4.521437020999656e-06 seconds
Total time taken: 4.521437020999656 seconds


In [7]:
import timeit

# Sample list
sample_list = list(range(1000))

# Method 1: Using reversed()
def traverse_reversed(lst):
    for item in reversed(lst):
        pass

# Method 2: Using slicing
def traverse_slicing(lst):
    for item in lst[::-1]:
        pass

# Method 3: Using a for loop with range
def traverse_range(lst):
    for i in range(len(lst)-1, -1, -1):
        item = lst[i]

# Number of runs for benchmarking
num_runs = 100000

# Benchmark each method
time_reversed = timeit.timeit('traverse_reversed(sample_list)', globals=globals(), number=num_runs)
time_slicing = timeit.timeit('traverse_slicing(sample_list)', globals=globals(), number=num_runs)
time_range = timeit.timeit('traverse_range(sample_list)', globals=globals(), number=num_runs)

print(f"Average time taken by reversed() over {num_runs} runs: {time_reversed / num_runs} seconds")
print(f"Average time taken by slicing over {num_runs} runs: {time_slicing / num_runs} seconds")
print(f"Average time taken by range over {num_runs} runs: {time_range / num_runs} seconds")

Average time taken by reversed() over 100000 runs: 6.639731540008142e-06 seconds
Average time taken by slicing over 100000 runs: 7.250656179994621e-06 seconds
Average time taken by range over 100000 runs: 2.4007662889998754e-05 seconds


In [3]:
from rchess import Cache
import time

In [5]:
cache = Cache()
num_items = 100

start_time = time.time()
for i in range(num_items):
    cache.set(i, i)
end_time = time.time()

print(f"Time taken to insert {num_items} items: {end_time - start_time} seconds")

Time taken to insert 100 items: 0.00013494491577148438 seconds


In [43]:
import multiprocessing
from multiprocessing.managers import BaseManager
import random
import time

def insert_numbers(cache, num_items):
    for _ in range(num_items):
        num = random.randint(1, 5)
        cache.set(num, num)

def read_numbers(cache, num_items, hit_counter):
    for _ in range(num_items):
        num = random.randint(1, 10)
        if cache.get(num) is not None:
            hit_counter.value += 1

if __name__ == "__main__":
    BaseManager.register('Cache', Cache)
    manager = BaseManager()
    manager.start()
    cache = manager.Cache()
    num_items = 100
    hit_counter1 = multiprocessing.Value('i', 0)
    hit_counter2 = multiprocessing.Value('i', 0)

    p1 = multiprocessing.Process(target=insert_numbers, args=(cache, num_items))
    p2 = multiprocessing.Process(target=read_numbers, args=(cache, num_items, hit_counter1))
    p3 = multiprocessing.Process(target=read_numbers, args=(cache, num_items, hit_counter2))

    start_time = time.time()

    p1.start()
    p2.start()
    p3.start()

    p1.join()
    p2.join()
    p3.join()

    end_time = time.time()

    print(f"Time taken: {end_time - start_time} seconds")
    print(f"Cache hits by process 2: {hit_counter1.value}")
    print(f"Cache hits by process 3: {hit_counter2.value}")

Time taken: 0.0209195613861084 seconds
Cache hits by process 2: 51
Cache hits by process 3: 51


In [1]:
%load_ext cython

In [4]:
%%cython
#cython: profile=True, language_level=3
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
import pstats, cProfile
from rchess import Board
import numpy as np
import timeit
cimport numpy as cnp

cdef class Node:
    cdef public float P, W
    cdef public unsigned int N, vloss
    cdef public bint to_play
    cdef public dict children

    def __cinit__(self):
        self.P = 0.0
        self.W = 0.0
        self.N = 0
        self.vloss = 0
        self.children = {}
        self.to_play = False

    def __getitem__(self, int move):
        return self.children[move]
    
    def __setitem__(self, int move, Node node):
        self.children[move] = node

    cdef inline Node get_child(self, int move):
        return self.children[move]

    cdef inline void add_child(self, int move, float p):
        self.children[move] = Node()

    cdef inline bint is_leaf(self):
        return not self.children

    cdef inline void apply_vloss(self):
        self.N += 1
        self.W -= 1
        self.vloss += 1

    cdef inline void remove_vloss(self):
        self.N -= self.vloss
        self.W += self.vloss
        self.vloss = 0
    
def expand_node(Node node, list legal_moves_int, bint to_play):
    cdef int move
    cdef child
    node.to_play = to_play
    for move in legal_moves_int:
        node.add_child(move, 0)
    #for move in legal_moves_int:
    #    child = node.get_child(move)

def expand_node2(Node node, list legal_moves_int, bint to_play):
    cdef int move
    cdef child
    node.to_play = to_play
    for move in legal_moves_int:
        node[move] = Node()
    #for move in legal_moves_int:
    #    child = node[move]

board = Board()
node = Node()
legal_moves_int = [i for i in range(30)]
to_play = board.to_play()

cProfile.runctx("expand_node(node, legal_moves_int, to_play)", globals(), locals(), "Profile.prof")

s = pstats.Stats("Profile.prof")
s.strip_dirs().sort_stats("time").print_stats()

# time it
num_runs = 25000
expand_time = timeit.timeit(
    "expand_node(node, legal_moves_int, to_play)",
    globals=globals(),
    number=num_runs
)
print("Total time", expand_time) # 0.3
print("Average time", expand_time / num_runs) #1.3 e-6



Content of stderr:
In file included from /home/tomaz/.local/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929,
                 from /home/tomaz/.local/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12,
                 from /home/tomaz/.local/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5,
                 from /home/tomaz/.cache/ipython/cython/_cython_magic_23d4a2a35b45cd7a2c68cc6d0e8e9833f03054c6.c:1252:
      |  ^~~~~~~Fri Jan  3 17:24:29 2025    Profile.prof

         64 function calls in 0.000 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.exec}
       30    0.000    0.000    0.000    0.000 _cython_magic_23d4a2a35b45cd7a2c68cc6d0e8e9833f03054c6.pyx:32(add_child)
       30    0.000    0.000    0.000    0.000 _cython_magic_23d4a2a35b45cd7a2c68cc6d0e8e9833f03054c6.pyx:15(__cinit__

In [4]:
%load_ext cython

In [6]:
%%cython
import time


cdef double start_time, end_time

start_time = time.time()
a = 0
for i in range(10):
    time.sleep(0.1)
end_time = time.time()

print(start_time)
print(end_time)
print(f"Time taken for loop: {end_time - start_time} seconds")

1736449042.1141045
1736449043.1153278
Time taken for loop: 1.0012233257293701 seconds


In [6]:
%%cython
#cython: profile=True, language_level=3
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
import pstats, cProfile
from rchess import Board
import numpy as np
import timeit
cimport numpy as cnp
from actionspace import map_b, map_w
from libc.math cimport log, exp

cdef class Node:
    cdef public float P, W
    cdef public unsigned int N, vloss
    cdef public bint to_play
    cdef public dict children

    def __cinit__(self, float p):
        self.P = p
        self.W = 0.0
        self.N = 0
        self.vloss = 0
        self.children = {}
        self.to_play = False

    def __getitem__(self, int move):
        return self.children[move]
    
    def __setitem__(self, int move, Node node):
        self.children[move] = node

    cdef inline Node get_child(self, int move):
        return self.children[move]

    cdef inline void add_child(self, int move, float p):
        self.children[move] = Node(p)

    cdef inline bint is_leaf(self):
        return not self.children

    cdef inline void apply_vloss(self):
        self.N += 1
        self.W -= 1
        self.vloss += 1

    cdef inline void remove_vloss(self):
        self.N -= self.vloss
        self.W += self.vloss
        self.vloss = 0

def evaluate_node(Node node, float[:] policy_logits, list legal_moves):
    cdef dict map = map_w if node.to_play else map_b
    cdef object move
    cdef Py_ssize_t moves_count = len(legal_moves)
    cdef Py_ssize_t i, p_idx
    cdef cnp.ndarray[cnp.float32_t, ndim=1] policy = np.zeros(moves_count, dtype=np.float32)
    cdef float[:] policy_mw = policy
    cdef float p, logsumexp
    cdef float _max = -99.9
    cdef float expsum = 0.0
    cdef Node child
    
    i = 0
    for i in range(moves_count):
        move = legal_moves[i]
        p_idx = map[move.uci()]
        p = policy_logits[p_idx]
        policy_mw[i] = p
        if p > _max:
            _max = p

    i = 0
    for i in range(moves_count):
        expsum += exp(policy_mw[i] - _max)

    i = 0
    logsumexp = log(expsum) + _max
    for i in range(moves_count):
        move = legal_moves[i]
        node.add_child(hash(move), exp(policy[i] - logsumexp))


board = Board()
node = Node(0.0)
node.to_play = board.to_play()
lm_uci = board.legal_moves_uci()
lm_num = board.legal_moves_num()
lm = board.legal_moves()
#for m in lm_num:
#    node[m] = Node()

policy_logits = np.random.rand(1858).astype(np.float32)

cProfile.runctx("evaluate_node(node, policy_logits, lm)", globals(), locals(), "Profile.prof")

s = pstats.Stats("Profile.prof")
s.strip_dirs().sort_stats("time").print_stats()

# time it
num_runs = 50000
eval_time = timeit.timeit(
    "evaluate_node(node, policy_logits, lm)",
    globals=globals(),
    number=num_runs
)
print()
print("Total time", eval_time) # 0.8
print("Average time", eval_time / num_runs) #1.3 e-6



In [2]:
%load_ext cython

In [14]:
%%cython
#cython: profile=True, language_level=3
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
import pstats, cProfile
from rchess import Board
import numpy as np
import timeit
cimport numpy as cnp
from actionspace import map_b, map_w
from libc.math cimport log, exp

def test1():
    cdef int batch_size = 14
    cdef list image = [i for i in range(109)]
    cdef list images = []
    cdef cnp.ndarray images_np
    for i in range(batch_size):
        images.append(image)
    images_np = np.array(images, dtype=np.int64)
    if images_np.shape[0] < batch_size:
        images_np = np.pad(images_np, ((0, batch_size - images_np.shape[0]), (0, 0)), mode='constant')

def test2():
    cdef int batch_size = 14
    cdef cnp.ndarray[cnp.int64_t, ndim=2] batch = np.zeros((16, 109), dtype=np.int64)
    cdef long[:, :] batch_mw = batch
    cdef list image = [i for i in range(109)]
    cdef Py_ssize_t i, j
    for i in range(batch_size):
        for j in range(109):
            batch_mw[i][j] = image[j]

# Use timeit to compare the two functions
num_runs = 10000
time1 = timeit.timeit("test1()", globals=globals(), number=num_runs)
time2 = timeit.timeit("test2()", globals=globals(), number=num_runs)

print()
print(f"Average time taken by test1 over {num_runs} runs: {time1 / num_runs} seconds")
print(f"Average time taken by test2 over {num_runs} runs: {time2 / num_runs} seconds")
    




Content of stderr:
In file included from /home/tomaz/.local/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929,
                 from /home/tomaz/.local/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12,
                 from /home/tomaz/.local/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5,
                 from /home/tomaz/.cache/ipython/cython/_cython_magic_c7a77ded7544762dce59dacf2aebe97aee323004.c:1261:
      |  ^~~~~~~
Average time taken by test1 over 10000 runs: 4.180069889998776e-05 seconds
Average time taken by test2 over 10000 runs: 4.937918499990701e-06 seconds


In [8]:
from rchess import Board

board = Board()
board.push_uci("e2e4")
board.push_uci("e7e5")
board.push_uci("g1f3")
board.push_uci("b8c6")
board.push_uci("f1c4")
board.push_uci("g8f6")

tmp_board = board.clone()
tmp_board.push_uci("d2d4")
tmp_board.push_uci("d7d5")
tmp_board.push_uci("c1f4")
tmp_board.push_uci("c8f5")
depth = 4


In [12]:
print(tmp_board.moves_history(tmp_board.ply() - depth))
print(tmp_board.moves_history(tmp_board.ply() - board.ply()))
print(tmp_board.moves_history(depth))

[346, 4013, 4827, 7395, 157, 3749]
[4827, 7395, 157, 3749]
[4827, 7395, 157, 3749]
