In [1]:
import chess
from stockfish import Stockfish
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

  from .autonotebook import tqdm as notebook_tqdm


# Helper function to calculate the size of the model

In [2]:
# ################ monkey patch for quanto
def named_module_tensors(module, recurse=False):
    for named_parameter in module.named_parameters(recurse=recurse):
      name, val = named_parameter
      flag = True
      if hasattr(val,"_data") or hasattr(val,"_scale"):
        if hasattr(val,"_data"):
          yield name + "._data", val._data
        if hasattr(val,"_scale"):
          yield name + "._scale", val._scale
      else:
        yield named_parameter

    for named_buffer in module.named_buffers(recurse=recurse):
      yield named_buffer

def dtype_byte_size(dtype):
    """
    Returns the size (in bytes) occupied by one parameter of type `dtype`.
    """
    import re
    if dtype == torch.bool:
        return 1 / 8
    bit_search = re.search(r"[^\d](\d+)$", str(dtype))
    if bit_search is None:
        raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
    bit_size = int(bit_search.groups()[0])
    return bit_size // 8

def compute_module_sizes(model):
    """
    Compute the size of each submodule of a given model.
    """
    from collections import defaultdict
    module_sizes = defaultdict(int)
    for name, tensor in named_module_tensors(model, recurse=True):
      size = tensor.numel() * dtype_byte_size(tensor.dtype)
      name_parts = name.split(".")
      for idx in range(len(name_parts) + 1):
        module_sizes[".".join(name_parts[:idx])] += size

    return module_sizes

print("Execution Completed")

Execution Completed


In [3]:
board = chess.Board()

stockfish = Stockfish()
stockfish.set_depth(20)
stockfish.set_skill_level(20)

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

In [5]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"  # Replace with the actual model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", low_cpu_mem_usage=True)
# model = model.to('mps')

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    config=bnb_config,
    low_cpu_mem_usage=True,
    offload_folder="./offload",  # Folder to offload parameters if needed
    torch_dtype=torch.float16,  # Use half-precision (float16) to save memory
    use_safetensors=True
)

Loading checkpoint shards: 100%|██████████| 8/8 [00:30<00:00,  3.80s/it]
Some parameters are on the meta device because they were offloaded to the disk.


In [6]:
def print_param_dtype(model):
    for name, param in model.named_parameters():
        print(f"{name} is loaded in {param.dtype}")

print(print_param_dtype(model))

model.embed_tokens.weight is loaded in torch.float16
model.layers.0.self_attn.q_proj.weight is loaded in torch.float16
model.layers.0.self_attn.q_proj.bias is loaded in torch.float16
model.layers.0.self_attn.k_proj.weight is loaded in torch.float16
model.layers.0.self_attn.k_proj.bias is loaded in torch.float16
model.layers.0.self_attn.v_proj.weight is loaded in torch.float16
model.layers.0.self_attn.v_proj.bias is loaded in torch.float16
model.layers.0.self_attn.o_proj.weight is loaded in torch.float16
model.layers.0.mlp.gate_proj.weight is loaded in torch.float16
model.layers.0.mlp.up_proj.weight is loaded in torch.float16
model.layers.0.mlp.down_proj.weight is loaded in torch.float16
model.layers.0.input_layernorm.weight is loaded in torch.float16
model.layers.0.post_attention_layernorm.weight is loaded in torch.float16
model.layers.1.self_attn.q_proj.weight is loaded in torch.float16
model.layers.1.self_attn.q_proj.bias is loaded in torch.float16
model.layers.1.self_attn.k_proj.wei

In [15]:
model2 = model.half()

In [18]:
model2 = model2.half()
print(print_param_dtype(model2))

model.embed_tokens.weight is loaded in torch.float16
model.layers.0.self_attn.q_proj.weight is loaded in torch.float16
model.layers.0.self_attn.q_proj.bias is loaded in torch.float16
model.layers.0.self_attn.k_proj.weight is loaded in torch.float16
model.layers.0.self_attn.k_proj.bias is loaded in torch.float16
model.layers.0.self_attn.v_proj.weight is loaded in torch.float16
model.layers.0.self_attn.v_proj.bias is loaded in torch.float16
model.layers.0.self_attn.o_proj.weight is loaded in torch.float16
model.layers.0.mlp.gate_proj.weight is loaded in torch.float16
model.layers.0.mlp.up_proj.weight is loaded in torch.float16
model.layers.0.mlp.down_proj.weight is loaded in torch.float16
model.layers.0.input_layernorm.weight is loaded in torch.float16
model.layers.0.post_attention_layernorm.weight is loaded in torch.float16
model.layers.1.self_attn.q_proj.weight is loaded in torch.float16
model.layers.1.self_attn.q_proj.bias is loaded in torch.float16
model.layers.1.self_attn.k_proj.wei

In [19]:
print(model.hf_device_map)

{'model.embed_tokens': 'mps', 'model.layers.0': 'mps', 'model.layers.1': 'mps', 'model.layers.2': 'mps', 'model.layers.3': 'mps', 'model.layers.4': 'mps', 'model.layers.5': 'mps', 'model.layers.6': 'mps', 'model.layers.7': 'mps', 'model.layers.8': 'mps', 'model.layers.9': 'mps', 'model.layers.10': 'mps', 'model.layers.11': 'mps', 'model.layers.12': 'mps', 'model.layers.13': 'mps', 'model.layers.14': 'mps', 'model.layers.15': 'disk', 'model.layers.16': 'disk', 'model.layers.17': 'disk', 'model.layers.18': 'disk', 'model.layers.19': 'disk', 'model.layers.20': 'disk', 'model.layers.21': 'disk', 'model.layers.22': 'disk', 'model.layers.23': 'disk', 'model.layers.24': 'disk', 'model.layers.25': 'disk', 'model.layers.26': 'disk', 'model.layers.27': 'disk', 'model.layers.28': 'disk', 'model.layers.29': 'disk', 'model.layers.30': 'disk', 'model.layers.31': 'disk', 'model.layers.32': 'disk', 'model.layers.33': 'disk', 'model.layers.34': 'disk', 'model.layers.35': 'disk', 'model.layers.36': 'dis

In [7]:
print("Initial Board:")
print(board)

Initial Board:
r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R


In [5]:
while not board.is_game_over():
    prompt = f"Chess Position: {board.fen()}\nBest Move:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(**inputs, max_new_tokens=10)
    move_str = tokenizer.decode(output[0], skip_special_tokens=True).split("Best Move:")[-1].strip()
    print(move_str)
    
    try:
        move = chess.Move.from_uci(move_str)
        if move in board.legal_moves:
            board.push(move)
            print(f"Deepseek Move: {move}")
        else:
            print("Deepseek generated an invalid move.")
            break
    except:
        print("Deepseek generated an invalid move.")
        break
    
    if board.is_game_over():
        break
    
    stockfish.set_fen_position(board.fen())
    stockfish_move = stockfish.get_best_move()
    board.push(chess.Move.from_uci(stockfish_move))
    print(f"Stockfish Move: {stockfish_move}")
    
    display(SVG(chess.svg.board(board=board)))

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


e4

I'm not sure what to
Deepseek generated an invalid move.


In [5]:
while not board.is_game_over():
    # Deepseek's turn
    fen_position = board.fen()
    
    # Create prompt for Deepseek
    prompt = f"""<|system|>
    You are a chess engine. Analyze the current position and provide the best move.
    The current position in FEN is: {fen_position}
    <|user|>
    What is the best move in UCI format?
    <|assistant|>
    """
    
    # Generate move with Deepseek
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=50)
    deepseek_move = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # print(deepseek_move)
    
    # Extract UCI move from response
    legal_moves = [move.uci() for move in board.legal_moves]
    selected_move = None
    for move in legal_moves:
        if move in deepseek_move:
            selected_move = move
            break
    
    # Fallback to first legal move if no valid move found
    if not selected_move:
        selected_move = legal_moves[0]
    
    # Make the move on the board
    move_obj = chess.Move.from_uci(selected_move)
    board.push(move_obj)
    print(f"Deepseek plays: {selected_move}")
    print(board)
    print("-" * 50)
    
    if board.is_game_over():
        break
    
    # Stockfish's turn
    stockfish.set_fen_position(board.fen())
    best_move = stockfish.get_best_move_time(100)
    move_obj = chess.Move.from_uci(best_move)
    board.push(move_obj)
    print(f"Stockfish plays: {best_move}")
    print(board)
    print("-" * 50)

You shouldn't move a model that is dispatched using accelerate hooks.


RuntimeError: You can't move a model that has some modules offloaded to cpu or disk.

In [8]:
print("Game Over.")
print("Result:", board.result())

Game Over.
Result: 0-1


In [8]:
board2 = chess.Board()
fen_position2 = board2.fen()

p = "Hello. I am Nafis."

system_prompt = f"""<|system|>
    You are a grandmaster-level chess engine specializing in aggressive play. 

    Guidelines:
    1. Maximize your advantage or mitigate any risks.
    2. Develop your pieces and strengthen your position.
    3. Look for check opportunities.
    4. Prioritize material gains when safe.
    5. Look to control the center of the board.
    
    Respond ONLY with the UCI move in this format:
    Best move: [your_move_here]
    </s>
    """
user_prompt = f"<|user|>\nThe current board position is: {board2.fen()}\n What is the best move in this position?\n<|assistant|>\n"
    
full_prompt = system_prompt + user_prompt

# device = torch.device("mps")  # Ensure using MPS
# model.to(device)              # Move model to MPS
# input_ids = input_ids.to(device)  # Move input_ids to MPS


inputs = tokenizer(p, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=100)
deepseek_move = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(deepseek_move)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


KeyboardInterrupt: 