In [1]:
!pip install chess

Collecting chess
  Downloading chess-1.11.2.tar.gz (6.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m69.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: chess
  Building wheel for chess (setup.py) ... [?25l[?25hdone
  Created wheel for chess: filename=chess-1.11.2-py3-none-any.whl size=147776 sha256=38612bc5a4d2ba45335a25817dc948aaeb5010eb637c6c6d22a45d518b674a65
  Stored in directory: /root/.cache/pip/wheels/fb/5d/5c/59a62d8a695285e59ec9c1f66add6f8a9ac4152499a2be0113
Successfully built chess
Installing collected packages: chess
Successfully installed chess-1.11.2


In [2]:

import pickle
import numpy as np
import torch
import chess
from torch.utils.data import Dataset, DataLoader, random_split

p2i = {
    chess.PAWN:   0, chess.KNIGHT: 1, chess.BISHOP: 2,
    chess.ROOK:   3, chess.QUEEN:  4, chess.KING:   5,
}

move_encoding = {}  

def convert_game_to_tensor(game):
    fv = [
        game['white_material'], game['black_material'],
        game['material_balance'],
        int(game['white_can_castle']), int(game['black_can_castle']),
        game['white_center_control'], game['black_center_control'],
    ]
    lm = game['legal_moves_count'][:15] + [0]*(15 - len(game['legal_moves_count'][:15]))
    fv.extend(lm)
    global eco_encoding, default_eco
    fv.append(eco_encoding.get(game['eco'], default_eco))
    mi = []
    for mv in game['moves'][:15]:
        if mv not in move_encoding:
            move_encoding[mv] = len(move_encoding)
        mi.append(move_encoding[mv])
    mi += [0]*(15-len(mi))
    fv.extend(mi)
    fv = np.array(fv, dtype=np.float32)
    
    bs = []
    start_mat = 39
    total_plys = len(game['legal_moves_count'])
    for i in range(15):
        prog = (i+1)/15
        w = int(start_mat*(1-prog) + game['white_material']*prog)
        b = int(start_mat*(1-prog) + game['black_material']*prog)
        bal = w - b
        idx_w, idx_b = 2*i, 2*i+1
        lm_w = game['legal_moves_count'][idx_w]  if idx_w < total_plys else 0
        lm_b = game['legal_moves_count'][idx_b]  if idx_b < total_plys else 0
        bs.append([w, b, bal, lm_w, lm_b, i])
    bs = np.array(bs, dtype=np.float32)
    
    board = chess.Board()
    imgs = []
    for mv in game['moves'][:15]:
        try: board.push_san(mv)
        except: board.push_uci(mv)
        one_hot = np.zeros((12,8,8),dtype=np.float32)
        for sq, piece in board.piece_map().items():
            r, f = 7-(sq//8), sq%8
            idx = (0 if piece.color else 6) + p2i[piece.piece_type]
            one_hot[idx, r, f] = 1
        imgs.append(one_hot)
    while len(imgs)<15:
        imgs.append(np.zeros((12,8,8),dtype=np.float32))
    imgs = np.stack(imgs,axis=0)
    
    return fv, bs, imgs

class ChessSequenceDataset(Dataset):
    def __init__(self, games, result_map):
        self.games = games
        self.result_map = result_map
    def __len__(self):
        return len(self.games)
    def __getitem__(self, ix):
        fv, bs, img = convert_game_to_tensor(self.games[ix])
        lbl = self.result_map[self.games[ix]['result']]
        return (
            torch.tensor(fv),
            torch.tensor(bs),
            torch.tensor(img),
            torch.tensor(lbl, dtype=torch.long)
        )

dataset_path = "/kaggle/input/chess-dataset/lichess_processed_1000000_games_first_15_moves.pkl"
with open(dataset_path, 'rb') as f:
    raw_games = pickle.load(f)

eco_codes = sorted({g['eco'] for g in raw_games})
eco_encoding = {eco: idx for idx, eco in enumerate(eco_codes, start=1)}
default_eco = -1



binary_games = [g for g in raw_games if g['result'] in ('1-0','0-1')]


ds = ChessSequenceDataset(binary_games, result_map={'1-0':0,'0-1':1})
n_train = int(0.8*len(ds))
train_ds, test_ds = random_split(ds, [n_train, len(ds)-n_train])

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True,  num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=128, shuffle=False, num_workers=4, pin_memory=True)





In [3]:


import numpy as np
from scipy.stats import pearsonr

balances = []
labels   = []

for fv, bs, img, label in test_ds:
    mat_bal = fv.numpy()[2]     
    is_white = 1 if label.item()==0 else 0
    balances.append(mat_bal)
    labels.append(is_white)

balances = np.array(balances)
labels   = np.array(labels)

r, pval = pearsonr(balances, labels)
print(f"Pearson r = {r:.3f}, p-value = {pval:.2e}")


Pearson r = 0.243, p-value = 0.00e+00


In [4]:

from sklearn.linear_model   import LogisticRegression
from sklearn.metrics        import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    balances.reshape(-1,1), labels, test_size=0.2, random_state=42
)

clf = LogisticRegression(max_iter=1000).fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:,1]

print("Material‐only Logistic Regression:")
print(f"  Accuracy = {accuracy_score(y_test, y_pred):.3f}")
print(f"  AUC      = {roc_auc_score(y_test, y_proba):.3f}")

Material‐only Logistic Regression:
  Accuracy = 0.588
  AUC      = 0.630
