In [None]:
!python src/benchmark.py

In [1]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
import torch
import re
import numpy as np
from tqdm import tqdm
import os
# local
from src.utils import get_few_shot_messages
from src.prompts import few_shot_gsm8k

In [2]:
!nvidia-smi

Sun Nov 24 22:54:27 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.127.05             Driver Version: 550.127.05     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               Off |   00000000:C3:00.0 Off |                  Off |
| 30%   47C    P8             31W /  300W |       2MiB /  49140MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
# Set hugging face token
hf_token = os.environ['HUGGING_FACE_TOKEN']
os.environ["HF_TOKEN"] = hf_token

In [4]:
# Load GSM8K test dataset
dataset = load_dataset("gsm8k", "main")["test"]

In [5]:
models = ["meta-llama/Llama-3.2-1B", "meta-llama/Llama-3.2-1B-Instruct", "Qwen/Qwen2.5-7B-Instruct"]
model_name = models[1]

In [6]:
config = AutoConfig.from_pretrained(model_name)

In [7]:
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    config=config,
    torch_dtype=torch.float16,
    device_map="auto",
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.padding_side = "left"

In [8]:
# None for Llama-3.2-3B --- Because it is not an instruct model
print(tokenizer.chat_template)

{{- bos_token }}
{%- if custom_tools is defined %}
    {%- set tools = custom_tools %}
{%- endif %}
{%- if not tools_in_user_message is defined %}
    {%- set tools_in_user_message = true %}
{%- endif %}
{%- if not date_string is defined %}
    {%- if strftime_now is defined %}
        {%- set date_string = strftime_now("%d %b %Y") %}
    {%- else %}
        {%- set date_string = "26 Jul 2024" %}
    {%- endif %}
{%- endif %}
{%- if not tools is defined %}
    {%- set tools = none %}
{%- endif %}

{#- This block extracts the system message, so we can slot it into the right place. #}
{%- if messages[0]['role'] == 'system' %}
    {%- set system_message = messages[0]['content']|trim %}
    {%- set messages = messages[1:] %}
{%- else %}
    {%- set system_message = "" %}
{%- endif %}

{#- System message #}
{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
{%- if tools is not none %}
    {{- "Environment: ipython\n" }}
{%- endif %}
{{- "Cutting Knowledge Date: December 2023\n" }}
{{- 

In [9]:
messages = get_few_shot_messages(few_shot_gsm8k)
messages

[{'role': 'user',
  'content': 'There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?'},
 {'role': 'assistant',
  'content': 'There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6. So the answer is 6.'},
 {'role': 'user',
  'content': 'If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?'},
 {'role': 'assistant',
  'content': 'There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5. So the answer is 5.'},
 {'role': 'user',
  'content': 'Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?'},
 {'role': 'assistant',
  'content': 'Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74. After eating 35, they had 74 - 35 = 39. So the answer is 39.'},
 {'role': 'user