In [1]:
# How good are Small LLMs at chess gameplay?
# Can they explain their reasoning?
# Does finetuning make them better?
# Do reasoning (e.g. chain-of-thought) techniques improve their performance?
# Can they be finetuned via RL selfplay?

#### Part 1: Eval off-the-shelf LLMs on chess gameplay

Use huggingface LLM api and python-chess and stockfish api to evaluate LLMs on chess gameplay.

In [2]:
# install python-chess and stockfish if not already installed

In [3]:
try:
  from stockfish import Stockfish
except ImportError:
  !pip install stockfish
  from stockfish import Stockfish

try:
  import chess
except ImportError:
  !pip install python-chess
  import chess

In [4]:
import os

In [5]:
STOCKFISH_REMOTE = "https://github.com/official-stockfish/Stockfish/releases/latest/download/stockfish-ubuntu-x86-64-avx2.tar"
STOCKFISH_LOCAL = "./stockfish/stockfish-ubuntu-x86-64-avx2"

In [6]:

if not os.path.exists(STOCKFISH_LOCAL):
  !wget -O stockfish.tar $STOCKFISH_REMOTE
  !tar -xf stockfish.tar
  !rm stockfish.tar

In [7]:
os.path.exists(STOCKFISH_LOCAL)

True

In [8]:
from stockfish import Stockfish

stockfish_params = {
    # "Debug Log File": "",
    "Contempt": 0,
    "Min Split Depth": 0,
    "Threads": 1, # More threads will make the engine stronger, but should be kept at less than the number of logical processors on your computer.
    "Ponder": "false",
    "Hash": 256, # Default size is 16 MB. It's recommended that you increase this value, but keep it as some power of 2. E.g., if you're fine using 2 GB of RAM, set Hash to 2048 (11th power of 2).
    "MultiPV": 1,
    "Skill Level": 20,
    "Move Overhead": 10,
    "Minimum Thinking Time": 20,
    "Slow Mover": 100,
    "UCI_Chess960": "false",
    "UCI_LimitStrength": "false",
    "UCI_Elo": 1350
}

stockfish = Stockfish(STOCKFISH_LOCAL, parameters=stockfish_params)

In [9]:
print(stockfish.get_board_visual())
stockfish.get_best_move()

+---+---+---+---+---+---+---+---+
| r | n | b | q | k | b | n | r | 8
+---+---+---+---+---+---+---+---+
| p | p | p | p | p | p | p | p | 7
+---+---+---+---+---+---+---+---+
|   |   |   |   |   |   |   |   | 6
+---+---+---+---+---+---+---+---+
|   |   |   |   |   |   |   |   | 5
+---+---+---+---+---+---+---+---+
|   |   |   |   |   |   |   |   | 4
+---+---+---+---+---+---+---+---+
|   |   |   |   |   |   |   |   | 3
+---+---+---+---+---+---+---+---+
| P | P | P | P | P | P | P | P | 2
+---+---+---+---+---+---+---+---+
| R | N | B | Q | K | B | N | R | 1
+---+---+---+---+---+---+---+---+
  a   b   c   d   e   f   g   h



'e2e4'

In [10]:
stockfish.get_fen_position()

'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'

In [20]:
import chess

board = chess.Board()

print(board)

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R


In [21]:
stockfish.set_fen_position(board.fen(), send_ucinewgame_token=True)
best_move = stockfish.get_best_move()
board.push_uci(best_move)
print(board)

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . P . . .
. . . . . . . .
P P P P . P P P
R N B Q K B N R


In [22]:
best_move

'e2e4'

In [17]:
board.outcome()

In [11]:
# huggingface AutoModelForCausalLM
try:
  from transformers import AutoModelForCausalLM, AutoTokenizer
except ImportError:
  !pip install transformers
  from transformers import AutoModelForCausalLM, AutoTokenizer

Collecting transformers
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m828.7 kB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hCollecting filelock (from transformers)
  Downloading filelock-3.17.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.26.0 (from transformers)
  Downloading huggingface_hub-0.29.1-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.5.2-cp38-abi3-

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
MODEL_NAME = "allenai/OLMo-7B-0724-hf"  # AllenAI's OLMo-7B model

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)


In [24]:
BASE_PROMPT = """You are playing chess. You are {llm_color}, playing against {opponent_color}. It is your turn. The board is as follows:
{board}
The sequence of moves so far is: [{history}]
Your next move (in UCI notation) is:"""
print(BASE_PROMPT.format(llm_color="white", opponent_color="black", board=board, moves=""))

You are playing chess. You are white, playing against black. It is your turn. The board is as follows:
r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . P . . .
. . . . . . . .
P P P P . P P P
R N B Q K B N R
The sequence of moves so far is: []
Your move (in UCI notation) is:


In [33]:
# ERROR_MOVE_PROMPT="Illegal move: {move}! Try again.\nYour move (in UCI notation) is:"
ERROR_MOVE_PROMPT="""Illegal move: {move}! Some legal moves are: [{legal_moves}]. Try again.
Your next move (in UCI notation) is:"""

SHOW_MAX_LEGAL_MOVES = 10
print(
  ERROR_MOVE_PROMPT.format(move="e2e4", board=board, 
  legal_moves=', '.join([m.uci() for i, m in enumerate(board.legal_moves) if i >= (SHOW_MAX_LEGAL_MOVES-1)]))
  )

Illegal move: e2e4! Some legal moves are: [c7c6, b7b6, a7a6, h7h5, g7g5, f7f5, e7e5, d7d5, c7c5, b7b5, a7a5]. Try again.
Your move (in UCI notation) is:


In [None]:
class LLMGame:
  def __init__(self, model, tokenizer, stockfish, base_prompt, color='white', stockfish_time_limit=None, 
               show_legal_moves_limit=10,
               llm_illegal_move_retries=3, llm_illegal_move_prompt=ERROR_MOVE_PROMPT):
    self.model = model
    self.tokenizer = tokenizer
    self.stockfish = stockfish
    self.board = chess.Board()
    self.base_prompt = base_prompt
    self.color = color
    self.stockfish_time_limit = stockfish_time_limit
    self.show_legal_moves_limit = show_legal_moves_limit
    self.llm_illegal_move_retries = llm_illegal_move_retries
    self.llm_illegal_move_prompt = llm_illegal_move_prompt
    self.history = []
    self.outcome = None
    self.error_outcome = None

  def get_prompt(self, error_move=None):
    legal_moves = [m.uci() for i, m in enumerate(self.board.legal_moves) if i >= (self.show_legal_moves_limit-1)]
    prompt = self.base_prompt.format(
        llm_color=self.color,
        opponent_color='black' if self.color == 'white' else 'white',
        board=self.board,
        history=', '.join(self.history),
        legal_moves=', '.join(legal_moves)
    )
    if error_move:
      prompt += ('\n' + self.llm_illegal_move_prompt.format(
        move=error_move, 
        board=self.board,
        legal_moves=', '.join(legal_moves)
      ))
    return prompt
  
  def make_engine_move(self):
    if self.board.is_game_over():
      return
    self.stockfish.set_fen_position(self.board.fen(), send_ucinewgame_token=True)
    if self.stockfish_time_limit:
      move = self.stockfish.get_best_move_time(self.stockfish_time_limit)
    else:
      move = self.stockfish.get_best_move()
    self.history.append(move)
    self.board.push_uci(move)
  
  def make_llm_move(self, retries=None):
    if self.board.is_game_over():
      return
    prompt = self.get_prompt()
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids
    output = model.generate(input_ids, max_length=100)
    resp = tokenizer.decode(output[0])
    print(resp)
    move = resp.split('\n')[-1].strip()

    if retries is None:
      retries = self.llm_illegal_move_retries

    if move not in self.board.legal_moves:
      if retries > 0:
        prompt = self.get_prompt(error_move=move)
        self.llm_move(retries=retries-1)
      raise ValueError("Illegal move: {move}")
    
    self.history.append(move)
    self.board.push_uci(move)

  def play(self):
    self.board = chess.Board()
    self.history = []
    self.outcome = None
    self.error_outcome = None

    if self.color == 'black':
      if not self.board.is_game_over():
        try:
          self.make_engine_move()
          self.outcome = self.board.outcome()
        except ValueError as e:
          print(e)
          self.error_outcome = ('Engine', e)
          self.outcome = self.board.outcome()  
          return

    while not self.board.is_game_over():
      if not self.board.is_game_over():
        try:
          self.make_llm_move()
          self.outcome = self.board.outcome()
        except ValueError as e:
          print(e)
          self.error_outcome = ('LLM', e)
          self.outcome = self.board.outcome()
          return
      if not self.board.is_game_over():
        try:
          self.make_engine_move()
          self.outcome = self.board.outcome()
        except ValueError as e:
          print(e)
          self.error_outcome = ('Engine', e)
          self.outcome = self.board.outcome()  
          return