**PAL (program-aided language model)**

In [1]:
!pip -q install bitsandbytes accelerate transformers

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import re

In [4]:
# mistral-7b-instruct understands programming
model_id = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    trust_remote_code=True
)

Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

In [13]:
llm = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200)

In [8]:
def create_prompt(question, observation=None):
    prompt = f"""You are a helpful assistant that solves problems step by step using Python.
Question: {question}
Let's solve it step by step.
"""
    if observation:
        prompt += f"# Previous Observation: {observation}\n"
    prompt += "# Write a valid Python code, and store the final answer in a variable named 'result'.\n"
    prompt += "Thought:\n# Think about the problem.\nAction:\n"
    return prompt

In [9]:
def extract_code(text):
  """
  To extract the code from the generated text
  """
  match = re.search(r"Action:\n([\s\S]+)", text)
  if match:
      code = match.group(1).strip()
      lines = code.splitlines()
      # ignore comments
      code_lines = [line for line in lines if line.strip() and not line.strip().startswith("#")]
      return "\n".join(code_lines)
  return ""

In [10]:
def run_code(code):
    exec_env = {}
    try:
        exec(code, {}, exec_env)
    except Exception as e:
        print("Execution error:", e)
        return f"Execution Error: {e}"
    if "result" in exec_env:
        return exec_env["result"]
    """
    to give a chance to get the result when
    the LLM does not stores the final result in the 'result' varibale:
    return the last generated number
    """
    nums = [v for v in exec_env.values() if isinstance(v, (int, float))]
    if nums:
        return nums[-1]
    return "No result variable found."


In [11]:
def pal_chain(question, max_steps=3):
    observation = None
    # give the model 3 chance to calculate the final result
    for step in range(1, max_steps + 1):
        print(f"\n--- Step {step} ---")
        prompt = create_prompt(question, observation)
        output = llm(prompt)[0]
        generated = output.get('generated_text', '') or output.get('text', '')
        code = extract_code(generated)
        if not code:
            print("No code extracted.")
            return "No code generated."
        print("Generated code:\n", code)
        observation = run_code(code)
        print("Observation:", observation)
        # if a number is generated, stop trying!
        if isinstance(observation, (int, float, str)):
            break
    return observation


In [14]:

question = """Peppa has 5 candies. She buys 2 more packs of candies.
 Each pack has 3 candies. How many candies does she have now?"""

final_answer = pal_chain(question)
print("\n✅ Final Answer:", final_answer)

Both `max_new_tokens` (=200) and `max_length`(=32768) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



--- Step 1 ---
Generated code:
 initial_candies = 5
new_candies = 2
candies_per_pack = 3
new_candies_count = new_candies * candies_per_pack
result = initial_candies + new_candies_count
print(f"Peppa has {result} candies now.")
result = initial_candies + new_candies_count
print(f"Peppa has {result} candies now.")
Peppa has 11 candies now.
Peppa has 11 candies now.
Observation: 11

✅ Final Answer: 11
