In [1]:
from ipywidgets import interactive, interact
import ipywidgets as widgets
from IPython.display import SVG, display
from matplotlib import colormaps

import json
import math
import chess

In [2]:
cmap = colormaps["PuRd"]

def hexify(f):
    assert f >= 0 and f <= 1
    return f"{int(f * 255):02x}"

def get_color(val):
    if val < 1e-3:
        return f"#ffffff77"
    r, g, b, _ = cmap(val)
    return f"#{hexify(r)}{hexify(g)}{hexify(b)}ff"    

In [19]:
filename = "../runs/15/trace2.json"
# filename = "../debug-traces/trace4.json"
# filename = "../trace2.json"
# filename = "../replay.json"

In [20]:
with open(filename) as f:
    trace = json.load(f)

outcome = trace["outcome"]
steps = trace["steps"]
    
def f(step=widgets.IntSlider(min=0, max=len(steps), step=1, value=0)):
    b = chess.Board()
    for move in steps[0:step]:
        b.push(chess.Move.from_uci(move[0]))

    if step == len(steps):
        return b

    moves = [m for m, _, _ in steps[step][2]]
    num_acts = [n for _, n, _ in steps[step][2]]
    sum_num = sum(num_acts) + 1e-4
    score = [v / sum_num for v in num_acts]
    distr = sorted(zip(moves, score), key=lambda p: p[1], reverse=True)
    labels = widgets.HBox([
        widgets.Label(value=f"{m}", style=dict(background=get_color(c)))
        for m, c in distr
    ])
    out = widgets.Output()
    with out:
        display(SVG(data=b._repr_svg_()))
    return widgets.VBox([out, labels])

interact(f);

interactive(children=(IntSlider(value=0, description='step', max=5), Output()), _dom_classes=('widget-interact…

In [18]:
outcome

{'termination': 'Checkmate', 'winner': 'White'}

In [6]:
def replay(trace, step):
    steps = trace["steps"]
    b = chess.Board()
    for move in steps[0:step]:
        b.push(chess.Move.from_uci(move[0]))

    moves, num_acts, q_values = zip(*steps[step][2])
    num_acts = [n for _, n, _ in steps[step][2]]
    q_values = [v for _, _, v in steps[step][2]]
    sum_num = sum(num_acts) + 1e-4
    score = [v / sum_num for v in num_acts]
    return b, list(zip(moves, score, q_values, num_acts))

In [21]:
brd, distr = replay(trace, 3)
distr = sorted(distr, key=lambda p: p[1], reverse=True)
distr

[('b8c6', 0.08823516436005241, 0.0689985454082489, 6),
 ('c7c6', 0.08823516436005241, 0.07015839219093323, 6),
 ('g5g4', 0.07352930363337701, 0.053960297256708145, 5),
 ('g8h6', 0.058823442906701606, 0.029189247637987137, 4),
 ('f8g7', 0.058823442906701606, 0.029189247637987137, 4),
 ('f7f6', 0.058823442906701606, 0.035698674619197845, 4),
 ('d7d6', 0.058823442906701606, 0.036903463304042816, 4),
 ('h7h5', 0.058823442906701606, 0.035698674619197845, 4),
 ('d7d5', 0.058823442906701606, 0.03630106896162033, 4),
 ('a7a5', 0.058823442906701606, 0.035698674619197845, 4),
 ('f8h6', 0.044117582180026205, 0.01602086052298546, 3),
 ('b8a6', 0.044117582180026205, 0.020511703565716743, 3),
 ('f7f5', 0.044117582180026205, 0.018641842529177666, 3),
 ('h7h6', 0.029411721453350803, -0.00022217445075511932, 2),
 ('e7e6', 0.029411721453350803, 0.00038021989166736603, 2),
 ('b7b6', 0.029411721453350803, -0.00022217445075511932, 2),
 ('e7e5', 0.029411721453350803, -0.00022217445075511932, 2),
 ('c7c5', 0

In [None]:
brd.push(chess.Move.from_uci("e2h5"))
brd

In [30]:
list(brd.legal_moves)

[Move.from_uci('e8d8')]

In [52]:
outcome

{'termination': 'Checkmate', 'winner': 'White'}

In [4]:
import json
import chess
import libencoder
import torch
import numpy as np

with open("../runs/178/trace1.json", "r") as f:
    trace = json.load(f)

outcome = trace["outcome"]
steps = [(chess.Move.from_uci(step[0]), []) for step in trace["steps"]]

ds = libencoder.encode(steps)

In [18]:
boards, meta, dist, moves = ds[60]
# inp = np.concatenate((boards, meta), axis=-1).astype(np.float32)
# inp = inp.transpose((2, 0, 1))
# print(np.linalg.norm(inp))
# with torch.no_grad():
#     ret_distr, ret_score = model(torch.from_numpy(inp).unsqueeze(dim=0))
# ret_distr = ret_distr.detach().cpu().numpy().squeeze()
# ret_distr = np.exp(ret_distr)
# ret_score = ret_score.detach().cpu().item()
# ret_distr, ret_score
meta[0,:,:]

array([[ 1, 31,  0,  0,  0,  0,  0],
       [ 1, 31,  0,  0,  0,  0,  0],
       [ 1, 31,  0,  0,  0,  0,  0],
       [ 1, 31,  0,  0,  0,  0,  0],
       [ 1, 31,  0,  0,  0,  0,  0],
       [ 1, 31,  0,  0,  0,  0,  0],
       [ 1, 31,  0,  0,  0,  0,  0],
       [ 1, 31,  0,  0,  0,  0,  0]], dtype=uint32)

In [8]:
from importlib import reload
import train
import libencoder
reload(train)
reload(libencoder)

<module 'libencoder' from '/home2/jiasen/workspace/smart-chess-rust/target/release/libencoder.so'>

In [22]:
import torch
import nn
model = nn.load_model(device="cpu", checkpoint="../runs/14/tb_logs/chess/version_0/checkpoints/epoch=5-step=498.ckpt")

..loading checkpoint:  ../runs/14/tb_logs/chess/version_0/checkpoints/epoch=5-step=498.ckpt


In [23]:
def infer(fn, index):
    ds = train.ChessDataset(fn)

    board_enc = ds[index][0]
    moves_enc = ds.steps[index][3]
  
    pi, value = model(board_enc.unsqueeze(0))
    pi = torch.exp(pi.detach()).squeeze()

    pi = pi[moves_enc]
    pi = pi / pi.sum()

    value = value.detach().squeeze()
    return pi, value

In [24]:
with open(filename) as f:
    trace = json.load(f)

step = 3
prior, value = infer(filename, step)
total_n = sum([n for _, n, _ in trace["steps"][step][2]])

distr = replay(trace, step)

skipping cudagraphs for unknown reason


In [25]:
total_n, prior, value

(68,
 tensor([0.0450, 0.0493, 0.0460, 0.0523, 0.0492, 0.0468, 0.0468, 0.0474, 0.0476,
         0.0466, 0.0477, 0.0478, 0.0489, 0.0502, 0.0469, 0.0450, 0.0467, 0.0480,
         0.0475, 0.0484, 0.0458]),
 tensor(-0.0194))

In [26]:
def uct(prior, total_n, q_value, current_n, reverse, cpuct):
    award = q_value / (current_n + 1e-5) * (-1 if reverse else 1)
    exploration = math.sqrt(total_n) / (1 + current_n) * prior * cpuct
    return (f"{award + exploration:0.3f}", f"{award:0.3f}", f"{exploration:0.3f}")

In [27]:
rotate = step % 2 == 1
import pandas as pd
pd.DataFrame(
    [(m, n, q, prior[i].item(), uct(prior[i].item(), total_n, q, n, rotate, 0.1)) for i, (m, n, q) in enumerate(trace["steps"][step][2])],
    columns=("move", "nact", "q", "prior", "uct"),
)

Unnamed: 0,move,nact,q,prior,uct
0,g8h6,4,0.029189,0.045004,"(0.000, -0.007, 0.007)"
1,g8f6,1,-0.014807,0.049322,"(0.035, 0.015, 0.020)"
2,f8g7,4,0.029189,0.046023,"(0.000, -0.007, 0.008)"
3,f8h6,3,0.016021,0.052297,"(0.005, -0.005, 0.011)"
4,b8c6,6,0.068999,0.049162,"(-0.006, -0.011, 0.006)"
5,b8a6,3,0.020512,0.046841,"(0.003, -0.007, 0.010)"
6,h7h6,2,-0.000222,0.046814,"(0.013, 0.000, 0.013)"
7,f7f6,4,0.035699,0.047412,"(-0.001, -0.009, 0.008)"
8,e7e6,2,0.00038,0.047613,"(0.013, -0.000, 0.013)"
9,d7d6,4,0.036903,0.046608,"(-0.002, -0.009, 0.008)"
