In [1]:
import sys

sys.path.append('..')


import os

import torch

from transformers import AutoModelForCausalLM, AutoTokenizer
from reasonflow import ReasonFlowConfig, ReasonFlow

## Load model and tokenizer

In [None]:
path = 'mkurman/llama-3.2-MEDIT-3B-o1'
hf_cache = os.environ.get('HF_HOME', os.path.expanduser('~/hf_cache'))

model = AutoModelForCausalLM.from_pretrained(
    path,
    cache_dir=hf_cache,
    torch_dtype=torch.bfloat16,
)

tokenizer = AutoTokenizer.from_pretrained(
    path,
    cache_dir=hf_cache,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
if isinstance(model.config.eos_token_id, int):
    model.config.eos_token_id = [model.config.eos_token_id]

def has_eos(output, eos_token: list[int]):
    return any(x for x in eos_token if x in output)    

def dtype_to_str(dtype):
    """Convert torch.dtype to string without 'torch.' prefix."""
    return str(dtype).split('.')[-1]    

## Create input prompt

In [4]:
chat_like_texts = [
    [
        {
            "role": "user",
            "content": f"What is the strength (B1) of a 13C 90° pulse of duration 1 μs?",
        },
    ],
    [
        {
            "role": "user",
            "content": 'What is the maximum value of the z-component of the spin angular momentum of 43Ca? [I = 7⁄2] [ "10.492 x 10^-34 J s", "2.549 x 10^-34 J s", "5.493 x 10^-34 J s", "4.185 x 10^-34 J s", "1.238 x 10^-34 J s", "8.370 x 10^-34 J s", "6.234 x 10^-34 J s", "9.854 x 10^-34 J s", "3.691 x 10^-34 J s", "7.123 x 10^-34 J s" ]',
        },
    ],
]

input_ids = tokenizer(tokenizer.apply_chat_template(chat_like_texts[1], tokenize=False, add_generation_prompt=True), padding=True, truncation=True, return_tensors="pt")

## Configure MCTS

In [None]:
NUM_OF_THOUGHTS = 4 # Maximum number of thoughts to validate
NUM_OF_THINKERS = 6 # Maximum number of thinkers to reason with
TOPK_THINKERS = 2 # Maximum number of thinkers to consider
ACCEPTANCE_THRESHOLD = 0.5 # Decision threshold for accepting a token with lower probability than the most probable token
TEMPERATURES = [0.7, 1.3] # Temperature values for sampling less probable tokens

MAX_TOTAL_TOKENS = 8192 # Maximum number of tokens to generate in total
USE_TOKENS = True # Whether to use tokens for sampling or not (using tokens for stability)

DYNAMIC_TEMPERATURE = True # Whether to use dynamic temperature or not
DIVERSITY_WEIGHT = 0.3 # Weight for diversity in sampling
UNCERTAINTY_THRESHOLD = 0.5 # Appropriate threshold for uncertainty in sampling
MIN_ACCEPTANCE_THRESHOLD = 0.3 # Minimum acceptance threshold for sampling
MAX_ACCEPTANCE_THRESHOLD = 0.7 # Maximum acceptance threshold for sampling

## Run the ReasonFlow algorithm

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model.to(device)
input_tokens = input_ids['input_ids'].to(device)

model.eval()

config = ReasonFlowConfig(
    num_of_thinkers=NUM_OF_THINKERS,
    num_of_thoughts=NUM_OF_THOUGHTS,
    topk_thinkers=TOPK_THINKERS,
    acceptance_threshold=ACCEPTANCE_THRESHOLD,
    temperatures=TEMPERATURES,
    dynamic_temperature=DYNAMIC_TEMPERATURE,
    diversity_weight=DIVERSITY_WEIGHT,
    uncertainty_threshold=UNCERTAINTY_THRESHOLD,
    min_acceptance_threshold=MIN_ACCEPTANCE_THRESHOLD,
    max_acceptance_threshold=MAX_ACCEPTANCE_THRESHOLD,
)

index = 1

reason_flow = ReasonFlow(config, model, tokenizer)
output = reason_flow.generate_thoughts(chat_like_texts[index], 
                                       use_tokens=USE_TOKENS,
                                       max_new_tokens=4096, 
                                       stream=True,
                                       device=device, 
                                       torch_dtype=dtype_to_str(model.dtype),
                                       do_sample=False, 
                                       temperature=None, 
                                       top_p=None, 
                                       top_k=None, 
                                       repetition_penalty=None)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Hook registered for model.norm
<|begin_of_text|><|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 20 Feb 2025

<|eot_id|><|start_header_id|>user<|end_header_id|>

What is the maximum value of the z-component of the spin angular momentum of 43Ca? [I = 7⁄2] [ "10.492 x 10^-34 J s", "2.549 x 10^-34 J s", "5.493 x 10^-34 J s", "4.185 x 10^-34 J s", "1.238 x 10^-34 J s", "8.370 x 10^-34 J s", "6.234 x 10^-34 J s", "9.854 x 10^-34 J s", "3.691 x 10^-34 J s", "7.123 x 10^-34 J s" ]<|eot_id|><|start_header_id|>assistant<|end_header_id|>



We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class (https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)


<Thought>
Alright, I need to determine the maximum value of the z-component of the spin angular momentum of 43Ca, given the options provided. Let's break this down step by step to ensure accuracy.

First, I recall that the spin angular momentum (J) of an atom is related to its nuclear spin (I) by the equation:

\[ J = \sqrt{I(I + 1)} \hbar \]

where \( \hbar \) is the reduced Planck constant. For calcium-43 (Ca), the nuclear spin (I) is given as \( \frac{7}{2} \).

So, plugging in the values:

\[ J = \sqrt{\frac{7}{2} \left(\frac{7}{2} + 1\right)} \hbar \]
\[ J = \sqrt{\frac{7}{2} \times \frac{9}{2}} \hbar \]
\[ J = \sqrt{\frac{63}{4}} \hbar \]
\[ J = \frac{\sqrt{63}}{2} \hbar \]
\[ J = \frac{7.937}{2} \hbar \]
\[ J \approx 3.9685 \hbar \]

Now, the z-component of the spin angular momentum (Jz) is given by:

\[ J_z = m_J \hbar \]

where \( m_J \) is the magnetic quantum number, which ranges from -I to +I. For \( I = \frac{7}{2} \), the possible values of \( m_J \) are:

- \( m_J = -\fr