In [8]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Set up model parameters

# model_name = "meta-llama/Llama-3.2-3B-Instruct"
# max_tokens = 2048     # Maximum tokens to generate in response
# temperature = 0.6    # Sampling temperature
# top_p = 0.9
# top_k = 50
# repetition_penalty = 1.0

model_name = "Qwen/Qwen2.5-7B-Instruct"  
# model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
max_tokens = 2048     # Maximum tokens to generate in response
temperature = 1.0    # Sampling temperature



# Load the tokenizer and model from Hugging Face
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]




In [9]:

#### system prompts

system_prompt = """\
You are a helpful assistant who plays chess professionally.
The assistant first thinks through the reasoning process internally and then provides the user with the best move.
The reasoning process and the answer must be enclosed within <think> </think> and <answer> </answer> tags, respectively.
The reasoning process should describe how you analyze the position and decide on the best move.
The answer must be in SAN notation, strictly using the moving piece and the destination square (e.g., Nf3, Rxf2, c5). 
Now, the user provides a FEN string and a list of legal moves for the given board.
After analyzing the position, clearly state the best move in SAN notation within <answer> </answer> tags. i.e., <answer> Nf3 </answer>
"""

#### user prompts

user_prompt = """\
Previous moves: 1. e4 c6 2. d4 d5 3. Nc3 dxe4 4. Nxe4 Nf6 5. Nxf6+ exf6 6. Nf3 Bd6 7. Bd3 O-O 8. O-O Bg4 9. c3 Nd7 10. h3 Bh5 11. Re1 Qc7 12. Be2 Rfe8 13. Nd2 Bxe2 14. Rxe2 f5 15. Nc4 Nf6 16. Bg5 Bh2+ 17. Kh1 Ne4 18. Bd2 Nxd2 19. Qxd2 Bf4 20. Rae1 Kf8 21. Qd3 g6 22. g3 Bg5 23. h4 Bf6 24. Kg2 Rxe2 25. Rxe2 Rd8 26. Qe3 h5 27. Ne5 Kg7 28. Nd3 b6 29. Qf4 Qd7 30. Ne5 Qd5+ 31. Qf3 Bxe5 32. Qxd5 Rxd5 33. Rxe5 Rxe5 34. dxe5 f6 35. f4 fxe5\nCurrent FEN string: 8/p5k1/1pp3p1/4pp1p/5P1P/2P3P1/PP4K1/8 w - - 0 36\nLegal moves: Kh3 Kf3 Kh2 Kf2 Kh1 Kg1 Kf1 fxe5 g4 c4 b3 a3 b4 a4\n
"""

assistant_prompt = """\
Let's think step by step. <think>
"""

# Define a sample conversation (list of messages)
conversation = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt},
]

# Tokenize the prompt
inputs = tokenizer.apply_chat_template(
            conversation, 
            tokenize=True, 
            add_generation_prompt=True,
            return_tensors="pt").to(device)

# concat assistant related prompts
inputs = torch.cat([inputs, tokenizer(assistant_prompt, return_tensors="pt").to(device)['input_ids']], dim=-1)
tokenizer.decode(inputs[0])


"<|im_start|>system\nYou are a helpful assistant who plays chess professionally.\nThe assistant first thinks through the reasoning process internally and then provides the user with the best move.\nThe reasoning process and the answer must be enclosed within <think> </think> and <answer> </answer> tags, respectively.\nThe reasoning process should describe how you analyze the position and decide on the best move.\nThe answer must be in SAN notation, strictly using the moving piece and the destination square (e.g., Nf3, Rxf2, c5). \nNow, the user provides a FEN string and a list of legal moves for the given board.\nAfter analyzing the position, clearly state the best move in SAN notation within <answer> </answer> tags. i.e., <answer> Nf3 </answer>\n<|im_end|>\n<|im_start|>user\nPrevious moves: 1. e4 c6 2. d4 d5 3. Nc3 dxe4 4. Nxe4 Nf6 5. Nxf6+ exf6 6. Nf3 Bd6 7. Bd3 O-O 8. O-O Bg4 9. c3 Nd7 10. h3 Bh5 11. Re1 Qc7 12. Be2 Rfe8 13. Nd2 Bxe2 14. Rxe2 f5 15. Nc4 Nf6 16. Bg5 Bh2+ 17. Kh1 Ne4 

In [10]:
# Generate the response
outputs = model.generate(
    inputs,
    max_new_tokens=max_tokens,
    temperature=temperature,
    do_sample=True,
)

# Decode the generated tokens to text
full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(full_output)


system
You are a helpful assistant who plays chess professionally.
The assistant first thinks through the reasoning process internally and then provides the user with the best move.
The reasoning process and the answer must be enclosed within <think> </think> and <answer> </answer> tags, respectively.
The reasoning process should describe how you analyze the position and decide on the best move.
The answer must be in SAN notation, strictly using the moving piece and the destination square (e.g., Nf3, Rxf2, c5). 
Now, the user provides a FEN string and a list of legal moves for the given board.
After analyzing the position, clearly state the best move in SAN notation within <answer> </answer> tags. i.e., <answer> Nf3 </answer>

user
Previous moves: 1. e4 c6 2. d4 d5 3. Nc3 dxe4 4. Nxe4 Nf6 5. Nxf6+ exf6 6. Nf3 Bd6 7. Bd3 O-O 8. O-O Bg4 9. c3 Nd7 10. h3 Bh5 11. Re1 Qc7 12. Be2 Rfe8 13. Nd2 Bxe2 14. Rxe2 f5 15. Nc4 Nf6 16. Bg5 Bh2+ 17. Kh1 Ne4 18. Bd2 Nxd2 19. Qxd2 Bf4 20. Rae1 Kf8 21. Qd

In [None]:
import spacy
import spacy_fastlang
import fasttext
# Override fastText's internal warning function to a no-op.
fasttext.FastText.eprint = lambda *args, **kwargs: None