In [239]:
from glob import glob
from itertools import product
import json
import os
from pathlib import Path
import random
import re

import chess
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

In [4]:
tqdm.pandas()

In [5]:
%cd ..

/Users/robertperrotta/github/reconchess-tools


In [36]:
from reconchess.utilities import add_pawn_queen_promotion
from reconchess_tools.utilities import simulate_sense, simulate_move, possible_requested_moves, without_opponent_pieces
from reconchess_tools.strategy import non_dominated_sense_by_own_pieces

In [7]:
import dask
import dask.bag as db
import dask.dataframe as dd
from dask.distributed import Client, progress

In [76]:
client = Client(n_workers=6, threads_per_worker=1)
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 54692 instead


0,1
Client  Scheduler: tcp://127.0.0.1:54693  Dashboard: http://127.0.0.1:54692/status,Cluster  Workers: 6  Cores: 6  Memory: 17.18 GB


In [16]:
df = pd.read_pickle("notebooks/histories.pkl")
df

Unnamed: 0_level_0,white,black,winner,timeout,actions
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
228132,attacker,StrangeFish2,False,False,a8 b1c3 f2 e7e5 b1 c3e4 e3 f8b4 a4 e4f6 d5 d8f...
86166,StrangeFish,DynamicEntropy,False,False,00 e2e4 g2 e7e5 g6 g1f3 c2 b8c6 e7 f3e5 d4 c6e...
116936,Zugzwang,StrangeFish2,True,False,e4 g2g3 d3 c7c5 d4 h2h3 e2 0000 e4 g1f3 d5 b8a...
57896,guest-22108,random,False,True,a1 h2h3 f8 e7e5 g5 h1h2 f1 f8e7 c7 h2h1 g4 a7a...
203262,Oracle,Autobot,True,False,e2 e2e4 b3 a7a6 c7 g1f3 f2 e7e6 g7 d2d4 f2 f8b...
...,...,...,...,...,...
112777,StrangeFish,Zugzwang,False,False,00 e2e4 e3 g7g6 g5 c2c3 f6 f8g7 f7 d2d4 e6 d7d...
54112,TOEFL,random,False,False,h5 g2g4 d2 a7a6 f3 d2d3 e4 h7h6 b1 g4f5 f5 d7e...
231042,StrangeFish,Oracle,True,True,00 c2c4 b3 e7e5 c7 d2d4 b3 b8c6 e7 d4e5 g2 c6e...
168582,StrangeFish,StrangeFish2,False,True,00 e2e4 e3 e7e5 g7 g1f3 e2 d7d6 e7 b1c3 c4 f8e...


In [17]:
df["num_actions"] = df["actions"].str.split().apply(len)

In [18]:
df.sort_values("num_actions").tail(20)

Unnamed: 0_level_0,white,black,winner,timeout,actions,num_actions
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
207222,attacker,trout,False,True,h5 e2e4 f1 e7e5 b5 f1c4 d1 g8f6 g6 d1h5 h2 d7d...,9954
113108,trout,attacker,True,True,d7 e2e4 c1 e7e5 h8 g1f3 a8 f8c5 g6 d2d4 h8 d8h...,9956
127858,attacker,trout,False,True,e4 e2e4 h1 e7e5 d8 f1c4 a6 d7d5 c1 d1h5 c1 b8c...,9958
154174,attacker,trout,False,True,c5 e2e4 c4 e7e5 h4 f1c4 b2 d7d5 g1 d1h5 f2 f8d...,9962
230685,attacker,trout,False,True,d1 e2e4 b2 e7e5 h8 f1c4 h6 d7d5 a8 d1h5 c1 b8c...,9962
165037,attacker,trout,False,True,g3 e2e4 b6 e7e5 f1 f1c4 b2 d7d5 h2 d1h5 b6 f8d...,9966
109663,trout,attacker,True,True,b3 e2e4 g6 e7e5 a3 d2d4 g2 f8c5 h4 g1f3 f3 d8h...,9972
140923,trout,attacker,True,True,b4 e2e4 d7 e7e5 c3 d2d4 a5 f8c5 h3 g1f3 b3 d8h...,9972
120058,trout,attacker,True,True,f4 e2e4 g2 e7e5 h3 d2d4 c5 f8c5 h6 g1f3 e3 d8h...,9988
240354,attacker,trout,False,True,e3 e2e4 g4 e7e5 f2 f1c4 a6 d7d5 d3 d1h5 b6 f8d...,9990


Todo:
- drop low-performing bots
- drop game-ending code errors
- trim histories with insufficient material and mark as draws

In [220]:
top_bots = {
    "StrangeFish2",
    "penumbra",
    "LaQ-Bot",
    "StrangeFish",
    "Stockenstein",
    "LaSalle Bot",
    "Oracle",
    "genetic",
#     "attacker",
}
filtered = df.loc[
    df["white"].isin(top_bots) &
    df["black"].isin(top_bots) &
    (df["num_actions"] > 0) &
    ~(df["timeout"] & (df["num_actions"] < 20))
].sort_index()
filtered

Unnamed: 0_level_0,white,black,winner,timeout,actions,num_actions
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
15141,genetic,penumbra,False,False,a1 d2d4 h1 e7e5 a1 0000 c1 g8f6 a1 0000 g3 d7d...,44
15142,penumbra,genetic,True,False,c3 e2e4 a1 0000 a5 d2d4 a1 0000 g6 b1c3 a1 000...,34
15623,genetic,penumbra,False,False,a1 d2d4 c1 g8f6 a1 0000 b5 e7e5 a1 0000 d2 d7d...,48
15624,penumbra,genetic,True,False,e4 e2e4 a1 0000 h5 d2d4 a1 0000 a8 g1f3 a1 000...,54
15833,StrangeFish,Oracle,True,True,f2 c2c4 b2 e7e5 b6 d2d4 b3 b8c6 e6 b1c3 e2 f8b...,310
...,...,...,...,...,...,...
246905,Oracle,StrangeFish2,True,False,b5 e2e4 c2 c7c5 b5 g1f3 d4 b8c6 b6 f1b5 c2 e7e...,214
246916,StrangeFish,Oracle,True,False,00 e2e4 g3 d7d5 g5 d2d4 b4 e7e5 b5 b1c3 e2 d5e...,26
246917,StrangeFish2,StrangeFish,False,False,00 d2d4 b4 c7c5 e7 e2e4 e2 c5d4 d7 c2c3 e2 b8c...,196
246939,StrangeFish2,Oracle,False,False,00 e2e4 f2 e7e6 e7 c2c4 e2 d7d5 e7 c4d5 b2 e6d...,160


In [221]:
filtered.sort_values("num_actions").tail(20)

Unnamed: 0_level_0,white,black,winner,timeout,actions,num_actions
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
197222,StrangeFish,Stockenstein,False,True,00 e2e4 f2 e7e5 g5 g1f3 e2 g8f6 e7 f3e5 00 d7d...,1978
222904,StrangeFish2,StrangeFish,True,True,00 e2e4 g2 e7e5 c7 b1c3 e2 g8f6 e6 g1f3 e2 b8c...,1992
217796,StrangeFish2,Oracle,True,True,00 c2c4 b2 e7e5 d6 e2e3 b3 g8f6 c5 b1c3 e2 d7d...,1994
156889,StrangeFish2,Oracle,False,False,00 e2e3 g3 e7e5 g6 d2d4 c2 e5d4 b7 e3d4 f2 g8f...,2040
166360,Oracle,StrangeFish2,True,True,b3 e2e4 f2 e7e5 b7 b1c3 e2 g8f6 f7 g1f3 c2 b8c...,2056
41716,genetic,penumbra,False,True,c5 e2e3 d3 d7d5 g7 d1h5 h4 g8f6 c7 h5f3 h5 e7e...,2093
48876,StrangeFish2,Oracle,False,True,00 e2e4 g3 e7e5 e7 b1c3 c2 d7d5 e7 c3d5 f2 g8f...,2142
208156,Oracle,StrangeFish,True,True,f2 e2e4 g4 e7e5 g5 g1f3 e2 b8c6 c7 d2d4 f2 g8f...,2252
175757,StrangeFish2,StrangeFish,False,True,00 d2d4 e3 d7d5 d7 c2c4 d2 g8f6 d7 c4d5 00 c7c...,2274
119706,genetic,Oracle,False,True,g7 d2d4 g4 e7e5 d7 c1g5 c2 e5d4 g7 g5d8 d3 e8d...,2390


In [222]:
filtered.sort_values("num_actions").head(20)

Unnamed: 0_level_0,white,black,winner,timeout,actions,num_actions
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
121061,genetic,Oracle,True,False,g7 e2e4 g4 e7e5 d7 f1b5 c2 d7d5 e7 b5e8,10
120412,genetic,Oracle,True,False,g7 e2e4 g4 e7e5 d7 f1b5 c2 d7d5 e7 b5e8,10
44556,genetic,LaSalle Bot,True,False,d3 e2e3 e2 d7d5 f7 f1b5 d2 e7e5 c7 b5e8,10
43249,LaSalle Bot,StrangeFish,False,False,e7 e2e4 f3 c7c5 e7 d2d4 e2 d8a5 e7 g1f3 e2 a5e1,12
41848,LaSalle Bot,genetic,False,False,e7 g1f3 b2 e7e6 e7 d2d4 f2 f8b4 e7 d4c5 c2 b4e1,12
123914,Oracle,penumbra,False,False,d6 e2e4 e3 e7e5 g5 g1f3 e2 f8b4 c7 d2d4 e2 b4e1,12
42022,genetic,LaSalle Bot,True,False,f7 e2e3 e2 g8f6 g5 d1f3 e2 e7e5 d5 f1b5 e2 d7d...,14
42309,genetic,penumbra,True,False,e5 e2e3 a6 e7e5 f7 d1g4 g1 d7d5 e7 f1b5 e4 g8f...,14
41674,genetic,LaSalle Bot,True,False,e4 e2e3 e2 d7d5 g7 d1h5 e2 c8g4 c7 f1b5 h5 g4h...,14
18405,Oracle,genetic,True,False,f6 e2e4 d2 f7f5 f7 e4f5 h5 0000 c7 d1h5 d1 a7a...,14


In [223]:
def has_underpromotion(game):
    for move in game["actions"].split()[1::2]:
        if len(move) == 5 and move[-1] != "q":
            return True
    return False

def underpromotions(game):
    return " ".join(
        move for move in game["actions"].split()[1::2]
        if len(move) == 5 and move[-1] != "q"
    )

x = filtered.loc[filtered.apply(has_underpromotion, axis=1)].copy()
x["under"] = x.apply(underpromotions, axis=1)
x

Unnamed: 0_level_0,white,black,winner,timeout,actions,num_actions,under
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
40007,StrangeFish,Oracle,True,True,00 c2c4 b2 e7e5 f6 e2e3 b3 g8f6 e7 g1f3 e2 e5e...,652,g7g8n g7f8n g7f8b g7g8b g7f8n g7f8n g7f8b
40164,Oracle,genetic,True,False,g7 e2e4 b3 e7e6 g7 g1f3 f2 d8f6 c7 b1c3 f2 f6g...,158,b2b1n
40262,Oracle,genetic,True,False,f6 e2e4 g4 e7e6 b6 d2d4 c2 d8f6 c5 b1c3 e3 f6g...,70,e7e8r
40430,Oracle,genetic,True,False,b5 e2e4 f2 e7e6 b7 b1c3 e2 b8c6 f7 g1f3 b4 d8f...,62,g2f1n
40553,genetic,Oracle,False,False,g5 e2e3 b3 d7d5 g7 d1h5 c4 e7e5 c7 h5f3 f2 g8f...,84,f2g1r
...,...,...,...,...,...,...,...
232547,StrangeFish2,Oracle,True,True,00 e2e4 b4 d7d5 e7 e4d5 f3 d8d5 00 g1e2 b4 b8c...,182,f2f1r
234008,Oracle,StrangeFish,False,True,e5 e2e4 d3 e7e6 b6 b1c3 e2 d7d5 f7 d2d4 b4 d5d...,112,b7a8n
235176,Oracle,StrangeFish,False,True,f6 e2e4 g3 e7e5 g6 d2d4 e2 g8f6 b5 g1f3 e2 e5d...,93,g7f8r
238621,Oracle,StrangeFish,True,False,e2 e2e4 g4 e7e5 g5 d2d4 e2 e5d4 d7 g1f3 e2 d8e...,174,e7e8r


The next step is to expand each game into state transitions. The vanilla aproach would be to simply keep track of decisions and observations for each player. We might also want to store MHT boards but that will take a lot of computation! We could store the positions of our own pieces as an intermediate approach. For now, we will store all of this in a general way, such as UCIs and FENs rather than bitboards-matrices or other architecture-specific formats.

In [137]:
def expand_game(game):    
    transitions = []
        
    board = chess.Board()
    actions = iter(game["actions"].split())
    
    white_history = ["w"]
    black_history = ["b"]
    
    active_history = white_history
    waiting_history = black_history
    active_payout = 1 if game["winner"] else -1
    
    is_first = True
    num_captures = 0
    moves_with_kings_only = 0
    
    try:
        while True:
            
            active_state_pre_sense = ",".join(active_history)
            
            sense_str = next(actions)
            
            # Override initial sense decisions since they have no impact
            if is_first:
                sense_str = "00"
                
            sense = None if sense_str == "00" else chess.SQUARE_NAMES.index(sense_str)
            active_history.append(sense_str)
            result = simulate_sense(board, sense)
            result = "".join("_" if piece is None else str(piece) for square, piece in result)
            active_history.append(result)
            active_state_post_sense = ",".join(active_history)
            
            if is_first:
                is_first = False
            else:
                transitions.append({
                    "step": "sense",
                    "before": active_state_pre_sense,
                    "actions": " ".join(chess.SQUARE_NAMES[sq] for sq in non_dominated_sense_by_own_pieces(board)),
                    "action": sense_str,
                    "after": active_state_post_sense,
                    "eventual_payout": active_payout,
                })
            
            move = chess.Move.from_uci(next(actions))
            move = add_pawn_queen_promotion(board, move)
            active_history.append(str(move))
            result, capture_square = simulate_move(board, move)
            
            active_history.append(str(result or chess.Move.null()))
            capture_square = "00" if capture_square is None else chess.SQUARE_NAMES[capture_square]
            active_history.append(capture_square)
            waiting_history.append(capture_square)
            active_state_post_move = ",".join(active_history)
            
            transitions.append({
                "step": "move",
                "before": active_state_post_sense,
                "actions": " ".join(str(move) for move in possible_requested_moves(board)),
                "action": str(move),
                "after": active_state_post_move,
                "eventual_payout": active_payout,
            })
            
            if num_captures == 30:
                moves_with_kings_only += 1
                
            if moves_with_kings_only > 4:
                # override result and declare it a draw
                for transition in transitions:
                    transition["eventual_payout"] = 0
                break
                
            if capture_square is not None:
                num_captures += 1
            
            board.push(result)
            active_history, waiting_history = waiting_history, active_history
            active_payout *= -1
            
    except StopIteration:
        pass
    
    except:
        print(game.name)
        print(game["actions"])
        print(" ".join(actions))
        print(board)
        print(board.fen())
        print(sense, result)
        print(move, result, capture_square)
        print(",".join(active_history))
        print(",".join(waiting_history))
        raise
    
    transitions = pd.DataFrame(transitions)
    transitions["is_terminal"] = False
    transitions["is_terminal"].iloc[-1] = True
    if transitions["step"].iloc[-2] == "move":
        transitions["is_terminal"].iloc[-2] = True
    else:
        transitions["is_terminal"].iloc[-3] = True
    return transitions

def save_game(game):
    filename = f"notebooks/transitions/{game.name}.csv"
    if not os.path.exists(filename):
        transitions = expand_game(game)
        transitions.to_csv(f"notebooks/transitions/{game.name}.csv", index=False)

os.makedirs("notebooks/transitions", exist_ok=True)

expand_game(filtered.loc[121061])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0,step,before,actions,action,after,eventual_payout,is_terminal
0,move,"w,00,",g1h3 g1f3 b1c3 b1a3 h2h3 g2g3 f2f3 e2e3 d2d3 c...,e2e4,"w,00,,e2e4,e2e4,00",1,False
1,sense,"b,00",b2 c2 d2 e2 f2 g2 b3 c3 d3 e3 f3 g3 b4 c4 d4 e...,g4,"b,00,g4,_________",-1,False
2,move,"b,00,g4,_________",g8h6 g8f6 b8c6 b8a6 h7h6 g7g6 f7f6 e7e6 d7d6 c...,e7e5,"b,00,g4,_________,e7e5,e7e5,00",-1,False
3,sense,"w,00,,e2e4,e2e4,00,00",d3 e3 f3 b4 c4 d4 e4 f4 g4 b5 c5 d5 e5 f5 g5 b...,d7,"w,00,,e2e4,e2e4,00,00,d7,___pp_bqk",1,False
4,move,"w,00,,e2e4,e2e4,00,00,d7,___pp_bqk",g1h3 g1f3 g1e2 f1a6 f1b5 f1c4 f1d3 f1e2 e1e2 d...,f1b5,"w,00,,e2e4,e2e4,00,00,d7,___pp_bqk,f1b5,f1b5,00",1,False
5,sense,"b,00,g4,_________,e7e5,e7e5,00,00",b2 c2 d2 e2 f2 g2 b3 c3 d3 e3 f3 g3 b4 c4 d4 e...,c2,"b,00,g4,_________,e7e5,e7e5,00,00,c2,NBQPPP___",-1,False
6,move,"b,00,g4,_________,e7e5,e7e5,00,00,c2,NBQPPP___",g8e7 g8h6 g8f6 f8e7 f8d6 f8c5 f8b4 f8a3 e8e7 d...,d7d5,"b,00,g4,_________,e7e5,e7e5,00,00,c2,NBQPPP___...",-1,True
7,sense,"w,00,,e2e4,e2e4,00,00,d7,___pp_bqk,f1b5,f1b5,0...",e2 f2 g2 d3 e3 f3 b4 c4 d4 e4 f4 g4 b5 c5 d5 e...,e7,"w,00,,e2e4,e2e4,00,00,d7,___pp_bqk,f1b5,f1b5,0...",1,False
8,move,"w,00,,e2e4,e2e4,00,00,d7,___pp_bqk,f1b5,f1b5,0...",b5e8 b5d7 b5c6 b5a6 b5c4 b5a4 b5d3 b5e2 b5f1 g...,b5e8,"w,00,,e2e4,e2e4,00,00,d7,___pp_bqk,f1b5,f1b5,0...",1,True


In [138]:
#!rm -rf notebooks/transitions/*.csv

In [139]:
filtered.progress_apply(save_game, axis=1);

  0%|          | 0/18021 [00:00<?, ?it/s]

In [140]:
transitions = dd.read_csv("notebooks/transitions/*.csv")
transitions

Unnamed: 0_level_0,step,before,actions,action,after,eventual_payout,is_terminal
npartitions=18021,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
,object,object,object,object,object,int64,bool
,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...
,...,...,...,...,...,...,...


In [141]:
transitions.loc[transitions["is_terminal"]].head(5)

Unnamed: 0,step,before,actions,action,after,eventual_payout,is_terminal
84,move,"w,00,,e2e4,e2e4,00,00,f7,____ppkbn,g1f3,g1f3,0...",f3a8 f3b7 f3c6 f3h5 f3d5 f3g4 f3e4 f3e2 f3d1 c...,f3d1,"w,00,,e2e4,e2e4,00,00,f7,____ppkbn,g1f3,g1f3,0...",-1,True
86,move,"b,00,e2,QKBP_P___,e7e5,e7e5,00,00,e2,QKBP_P__N...",f8g8 f8g7 f8e7 e8d8 e8c8 e8b8 e8a8 e8e7 e8e6 e...,e8e1,"b,00,e2,QKBP_P___,e7e5,e7e5,00,00,e2,QKBP_P__N...",1,True


In [142]:
len(transitions)

2551197

In [10]:
file = glob("notebooks/transitions/*.csv")[0]
file

'notebooks/transitions/163039.csv'

My plan is to take the naive approach for now and encode every state (before and after) as a sequence of frames and a mask of available actions. Every transition will be characterized by a pair of states, the action that connects them, a flag marking terminal states, and the payout of the state if terminal. Then we will follow ideas from TD3 and train an estimator of expected (discounted) future reward and a policy to maximize the expected future reward. The ragged collection of states will be stored independently then stacked and padded per-batch at training time.

And for lack of a more clever approach, I will train a network for each color separately. I'd like to flip the board to the two players could share a network but the asymmetry of move order is significant in reconchess: white knows the true board state before making its first move, black does not.

In [35]:
chess.PIECE_NAMES

[None, 'pawn', 'knight', 'bishop', 'rook', 'queen', 'king']

In [110]:
num_channels = 19
[
    # My pieces are indexed as piece_type - 1
    MY_PAWNS,
    MY_KNIGHTS,
    MY_BISHOPS,
    MY_ROOKS,
    MY_QUEENS,
    MY_KING,
    # My castling rights are flags at the rook locations
    MY_CASTLING_RIGHTS,
    # Sense shows squares seen and opponent pieces by type as piece_type + 7
    WAS_SENSED,
    OP_PAWNS_SEEN,
    OP_KNIGHTS_SEEN,
    OP_BISHOPS_SEEN,
    OP_ROOKS_SEEN,
    OP_QUEENS_SEEN,
    OP_KING_SEEN,
    # Move info includes from-square, requested to-square, resulting to-square, capture-square
    REQUESTED_FROM_SQUARE,
    REQUESTED_TO_SQUARE,
    TAKEN_TO_SQUARE,
    CAPTURE_SQUARE,
    # Opponent capture square is recorded, too
    OP_CAPTURE_SQUARE,
] = list(range(num_channels))


def empty_frame():
    return np.zeros((8, 8, num_channels), dtype=bool)

In [113]:
def set_square(frame, square, channel):
    frame[
        chess.square_rank(square),
        chess.square_file(square),
        channel,
    ] = True

In [178]:
def set_my_pieces(frame, board: chess.Board):
    for piece_type in chess.PIECE_TYPES:
        squares = board.pieces(piece_type, board.turn)
        for square in squares:
            set_square(frame, square, piece_type - 1)
    for square in chess.SquareSet(board.castling_rights & board.occupied_co[board.turn]):
        set_square(frame, square, MY_CASTLING_RIGHTS)


board = chess.Board()
frame = empty_frame()
set_my_pieces(frame, board)
print("MY_PAWNS")
print(frame[::-1, :, MY_PAWNS].astype(int))
print("MY_KNIGHTS")
print(frame[::-1, :, MY_KNIGHTS].astype(int))
print("MY_BISHOPS")
print(frame[::-1, :, MY_BISHOPS].astype(int))
print("MY_ROOKS")
print(frame[::-1, :, MY_ROOKS].astype(int))
print("MY_QUEENS")
print(frame[::-1, :, MY_QUEENS].astype(int))
print("MY_KING")
print(frame[::-1, :, MY_KING].astype(int))
print("MY_CASTLING_RIGHTS")
print(frame[::-1, :, MY_CASTLING_RIGHTS].astype(int))

MY_PAWNS
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 1]
 [0 0 0 0 0 0 0 0]]
MY_KNIGHTS
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 1 0]]
MY_BISHOPS
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 1 0 0 1 0 0]]
MY_ROOKS
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 1]]
MY_QUEENS
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]]
MY_KING
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0]]
MY_CASTLING_RIGHTS
[[0 0

In [179]:
def set_sense(frame, board, square):
    rank = chess.square_rank(square)
    file = chess.square_file(square)
    for drank in [-1, 0, 1]:
        r = rank + drank
        if 0 <= r < 8:
            for dfile in [-1, 0, 1]:
                f = file + dfile
                if 0 <= f < 8:
                    frame[r, f, WAS_SENSED] = True
                    piece = board.piece_at(chess.square(f, r))
                    if piece and piece.color != board.turn:
                        frame[r, f, piece.piece_type + 7] = True
                        

board = chess.Board()
frame = empty_frame()
set_sense(frame, board, chess.D7)
print("WAS_SENSED")
print(frame[::-1, :, WAS_SENSED].astype(int))
print("OP_PAWNS_SEEN")
print(frame[::-1, :, OP_PAWNS_SEEN].astype(int))
print("OP_KNIGHTS_SEEN")
print(frame[::-1, :, OP_KNIGHTS_SEEN].astype(int))
print("OP_BISHOPS_SEEN")
print(frame[::-1, :, OP_BISHOPS_SEEN].astype(int))
print("OP_ROOKS_SEEN")
print(frame[::-1, :, OP_ROOKS_SEEN].astype(int))
print("OP_QUEENS_SEEN")
print(frame[::-1, :, OP_QUEENS_SEEN].astype(int))
print("OP_KING_SEEN")
print(frame[::-1, :, OP_KING_SEEN].astype(int))

WAS_SENSED
[[0 0 1 1 1 0 0 0]
 [0 0 1 1 1 0 0 0]
 [0 0 1 1 1 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
OP_PAWNS_SEEN
[[0 0 0 0 0 0 0 0]
 [0 0 1 1 1 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
OP_KNIGHTS_SEEN
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
OP_BISHOPS_SEEN
[[0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
OP_ROOKS_SEEN
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
OP_QUEENS_SEEN
[[0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 

In [180]:
def set_move(frame, requested_move, taken_move, capture_square, op_capture_square):
    set_square(frame, requested_move.from_square, REQUESTED_FROM_SQUARE)
    set_square(frame, requested_move.to_square, REQUESTED_TO_SQUARE)
    if taken_move is None:
        set_square(frame, requested_move.from_square, TAKEN_TO_SQUARE)
    else:
        set_square(frame, taken_move.to_square, TAKEN_TO_SQUARE)
    if capture_square is not None:
        set_square(frame, capture_square, CAPTURE_SQUARE)
    if op_capture_square is not None:
        set_square(frame, op_capture_square, OP_CAPTURE_SQUARE)

            
board = chess.Board()
for move in "e2e4 f7f5".split():
    board.push(chess.Move.from_uci(move))
frame = empty_frame()
set_move(frame, chess.Move.from_uci("e4f5"), chess.Move.from_uci("e4f5"), chess.F5, None)
print("REQUESTED_FROM_SQUARE")
print(frame[::-1, :, REQUESTED_FROM_SQUARE].astype(int))
print("REQUESTED_TO_SQUARE")
print(frame[::-1, :, REQUESTED_TO_SQUARE].astype(int))
print("TAKEN_TO_SQUARE")
print(frame[::-1, :, TAKEN_TO_SQUARE].astype(int))
print("CAPTURE_SQUARE")
print(frame[::-1, :, CAPTURE_SQUARE].astype(int))
print("OP_CAPTURE_SQUARE")
print(frame[::-1, :, OP_CAPTURE_SQUARE].astype(int))

REQUESTED_FROM_SQUARE
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
REQUESTED_TO_SQUARE
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
TAKEN_TO_SQUARE
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
CAPTURE_SQUARE
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]
OP_CAPTURE_SQUARE
[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]


In [206]:
move = chess.Move.null()
move.from_square, DIFFS.index(move.to_square - move.from_square)

(0, 0)

In [253]:
DIFFS = [
    # Pass
    0,
    # Sliding moves
    *[
        k * i
        for i in range(1, 8)
        for k in [-1, -7, -8, -9, 1, 9, 8, 7]
    ],
    # Knight moves
    -10, -17, -15, -6, 10, 17, 15, 6,
]
num_move_types = len(DIFFS)

def empty_mask():
    return np.zeros((8, 8, num_move_types + 1), dtype=bool)

def set_sense_mask(mask, board):
#     TODO revise game histories to use non-dominated sense actions
    for square in non_dominated_sense_by_own_pieces(board):
        set_square(mask, square, -1)
#     mask[1:-1, 1:-1, -1] = True
        
def set_move_mask(mask, board):
    for move in possible_requested_moves(board):
        set_square(mask, move.from_square, DIFFS.index(move.to_square - move.from_square))

In [254]:
def stack_frames(transitions, payout):
    transitions["stack"] = [np.stack(transitions["result"].iloc[:i + 1]) for i in range(len(transitions))]
    transitions["mask after"] = np.roll(transitions["mask"], -1, axis=0)
    transitions["mask after"].iloc[-1] = transitions["mask after"].iloc[-1].copy()
    transitions["mask after"].iloc[-1][:] = False
    transitions["is_terminal"] = False
    transitions.iloc[-1, -1] = True
    transitions["future payout"] = payout
    transitions.drop(columns=["result"], inplace=True)
    return transitions

def expand_game(game):
    white_transitions = []
    black_transitions = []
        
    board = chess.Board()
    actions = iter(game["actions"].split())
    
    white_history = []
    black_history = []
    
    active_transitions = white_transitions
    waiting_transitions = black_transitions
    white_payout = 1 if game["winner"] else -1
    
    is_first = True
    num_captures = 0
    moves_with_kings_only = 0
    
    try:
        while True:
                        
            sense = next(actions)
            
            # Skip initial sense decisions since they have no impact
            if is_first:
                is_first = False
            else:
                frame = empty_frame()
                set_my_pieces(frame, board)
                
                mask = empty_mask()
                set_sense_mask(mask, board)
                
                action = empty_mask()
                if sense != "00":
                    square = chess.SQUARE_NAMES.index(sense)
                    r = chess.square_rank(square)
                    f = chess.square_file(square)
                    if not mask[r, f, -1]:
                        rs, fs = np.where(mask[:, :, -1])
                        dist = abs(rs - r) + abs(fs - f)
                        i = np.argmin(dist)
                        square = chess.square(fs[i], rs[i])
                else:
                    # Use first valid square in place of skipped sense
                    square = np.argmax(mask[:, :, -1])
                    
                set_square(action, square, -1)
                set_sense(frame, board, square)
                assert np.any(mask * action), f"Sense {chess.SQUARE_NAMES[square]} ({square}) not in mask!\n{board}\n{chess.COLOR_NAMES[board.turn]} to sense\n{mask[::-1, :, -1].astype(int)}"
                
                active_transitions.append({
                    "mask": mask,
                    "action": action,
                    "result": frame,
                })
            
            move = chess.Move.from_uci(next(actions))
            move = add_pawn_queen_promotion(board, move)
            result, capture_square = simulate_move(board, move)
            
            # Discard games in which underpromotion occurred
            if result.promotion is not None and result.promotion != 5:
                print("Discarding game containing underpromotion", move, result)
                return
            
            # Correct requested underpromotions to queen promotions
            if move.promotion is not None and move.promotion != 5:
                move.promotion = 5
            
            frame = empty_frame()
            set_my_pieces(frame, board)
            # Assume no opponent capture square and correct later as needed
            set_move(frame, move, result, capture_square, None)
                        
            action = empty_mask()
            set_square(action, move.from_square, DIFFS.index(move.to_square - move.from_square))
            
            if capture_square is not None:
                set_square(waiting_transitions[-1]["result"], capture_square, OP_CAPTURE_SQUARE)
                
            mask = empty_mask()
            set_move_mask(mask, board)
            
            assert np.any(mask * action), f"Move {move} not in allowed moves on board {board}!"
            
            active_transitions.append({
                "mask": mask,
                "action": action,
                "result": frame,
            })
            
            if num_captures == 30:
                moves_with_kings_only += 1
                
            if moves_with_kings_only > 4:
                # override result and declare it a draw
                white_payout = 0
                break
                
            if capture_square is not None:
                num_captures += 1
            
            board.push(result)
            active_transitions, waiting_transitions = waiting_transitions, active_transitions
            
    except StopIteration:
        pass
    
    except:
        print(game.name)
        print(game["white"])
        print(game["black"])
        print(game["actions"])
        print(" ".join(actions))
        print(board)
        print(board.fen())
        # print(sense, result)
        # print(move, result, capture_square)
        raise
    
    white_transitions = stack_frames(pd.DataFrame(white_transitions), white_payout)
    black_transitions = stack_frames(pd.DataFrame(black_transitions), -white_payout)
    
    return white_transitions, black_transitions

def save_game(game):
    transitions = expand_game(game)
    if transitions is not None:
        white_transitions, black_transitions = transitions
#         white_transitions.to_csv(f"notebooks/white_transitions/{game.name}.csv", index=False)
#         black_transitions.to_csv(f"notebooks/black_transitions/{game.name}.csv", index=False)
        white_transitions.to_pickle(f"notebooks/white_transitions/{game.name}.pkl", protocol=4)
        black_transitions.to_pickle(f"notebooks/black_transitions/{game.name}.pkl", protocol=4)

os.makedirs("notebooks/white_transitions", exist_ok=True)
os.makedirs("notebooks/black_transitions", exist_ok=True)

white_transitions, black_transitions = expand_game(filtered.loc[40007])

# white_transitions["mask"] = white_transitions["mask"].apply(lambda x: x.shape)
# white_transitions["mask after"] = white_transitions["mask after"].apply(lambda x: x.shape)
# white_transitions["action"] = white_transitions["action"].apply(lambda x: x.shape)
# white_transitions["result"] = white_transitions["result"].apply(lambda x: x.shape)
# white_transitions["stack"] = white_transitions["stack"].apply(lambda x: x.shape)

white_transitions

Unnamed: 0,mask,action,stack,mask after,is_terminal,future payout
0,"[[[True, False, False, False, False, False, Fa...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,1
1,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, False, Fa...",False,1
2,"[[[True, False, False, False, False, False, Fa...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,1
3,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, False, Fa...",False,1
4,"[[[True, False, False, False, False, False, Fa...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,1
...,...,...,...,...,...,...
320,"[[[True, False, False, False, False, False, Fa...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,1
321,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, False, Fa...",False,1
322,"[[[True, False, False, False, False, False, Fa...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,1
323,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, False, Fa...",False,1


In [252]:
# !rm -rf notebooks/white_transitions/*.pkl
# !rm -rf notebooks/black_transitions/*.pkl

In [255]:
filtered.progress_apply(save_game, axis=1);

  0%|          | 0/18021 [00:00<?, ?it/s]

Discarding game containing underpromotion b2b1n b2b1n
Discarding game containing underpromotion e7e8r e7e8r
Discarding game containing underpromotion g2f1n g2f1n
Discarding game containing underpromotion f2g1r f2g1r
Discarding game containing underpromotion e2e1n e2e1n
Discarding game containing underpromotion b2b1n b2b1n
Discarding game containing underpromotion e7e8r e7e8r
Discarding game containing underpromotion d7d8r d7d8r
Discarding game containing underpromotion f2g1r f2g1r
Discarding game containing underpromotion b7a8r b7a8r
Discarding game containing underpromotion e7e8b e7e8b
Discarding game containing underpromotion h2g1r h2g1r
Discarding game containing underpromotion e2e1n e2e1n
Discarding game containing underpromotion f2e1r f2e1r
Discarding game containing underpromotion d7d8n d7d8n
Discarding game containing underpromotion c7c8r c7c8r
Discarding game containing underpromotion h7h8r h7h8r
Discarding game containing underpromotion a2a1r a2a1r
Discarding game containing u

Discarding game containing underpromotion e7f8r e7f8r
Discarding game containing underpromotion f2f1r f2f1r
Discarding game containing underpromotion c2d1r c2d1r
Discarding game containing underpromotion d2e1r d2e1r
Discarding game containing underpromotion g7g8r g7g8r
Discarding game containing underpromotion c2c1r c2c1r
Discarding game containing underpromotion c2c1r c2c1r
Discarding game containing underpromotion d7c8r d7c8r
Discarding game containing underpromotion g7f8r g7f8r
Discarding game containing underpromotion e2e1r e2e1r
Discarding game containing underpromotion b7b8r b7b8r
Discarding game containing underpromotion b2b1r b2b1r
Discarding game containing underpromotion b7a8r b7a8r
Discarding game containing underpromotion f7f8r f7f8r
Discarding game containing underpromotion b7b8b b7b8b
Discarding game containing underpromotion g2g1n g2g1n
Discarding game containing underpromotion c7c8b c7c8b
Discarding game containing underpromotion b7b8r b7b8r
Discarding game containing u