In [7]:
%%capture
%load_ext autoreload
%autoreload 2
%cd /home/ubuntu/projects/hyper-sloth

In [8]:
from speedy_utils.all import *
from llm_utils import *
from fastcore.all import *



#### Create a LLM model

In [10]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "1"  # Choose any GPU you want to use
from vllm.lora.request import LoRARequest
from vllm import LLM, SamplingParams
import torch
# setup logger
import logging
from os import pipe

if "llm" in locals():
    del llm  #
    torch.cuda.empty_cache()
    
llm = LLM(
    model="outputs/lora/Qwen2.5-1.5B-Instruct-LORA-MATH-merge",
    tensor_parallel_size=1,
    task="generate",
    enforce_eager=True,
    dtype=torch.bfloat16,
    max_model_len=16384,
    enable_lora=True,
    # quantization="bitsandbytes", load_format="bitsandbytes",gpu_memory_utilization=0.95
)

tokenizer = llm.get_tokenizer()

from datasets import load_dataset

# Load the GSM8K dataset
gsm8k = load_dataset("gsm8k", "main")
test = gsm8k["test"]


INFO 03-15 10:14:13 [llm_engine.py:241] Initializing a V0 LLM engine (v0.7.4.dev473+g9ed6ee92) with config: model='outputs/lora/Qwen2.5-1.5B-Instruct-LORA-MATH-merge', speculative_config=None, tokenizer='outputs/lora/Qwen2.5-1.5B-Instruct-LORA-MATH-merge', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=16384, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=outputs/lora/Qwen2.5-1.5B-Instruct-LORA-MATH-merge, num_schedul

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]


INFO 03-15 10:14:15 [loader.py:429] Loading weights took 0.68 seconds
INFO 03-15 10:14:15 [punica_selector.py:18] Using PunicaWrapperGPU.
INFO 03-15 10:14:15 [model_runner.py:1146] Model loading took 2.9144 GB and 0.929388 seconds
INFO 03-15 10:14:19 [worker.py:267] Memory profiling takes 2.58 seconds
INFO 03-15 10:14:19 [worker.py:267] the current vLLM instance can use total_gpu_memory (23.64GiB) x gpu_memory_utilization (0.90) = 21.28GiB
INFO 03-15 10:14:19 [worker.py:267] model weights take 2.91GiB; non_torch_memory takes 0.02GiB; PyTorch activation peak memory takes 1.43GiB; the rest of the memory reserved for KV Cache is 16.92GiB.
INFO 03-15 10:14:19 [executor_base.py:111] # cuda blocks: 39592, # CPU blocks: 9362
INFO 03-15 10:14:19 [executor_base.py:116] Maximum concurrency for 16384 tokens per request: 38.66x
INFO 03-15 10:14:24 [llm_engine.py:447] init engine (profile, create kv cache, warmup model) took 8.56 seconds


In [None]:
# Prepare prompts for GSM8K evaluation
all_questions = [item["question"] for item in test]
standardized_prompts = [
    tokenizer.apply_chat_template(
        [{"role": "user", "content": f"{question}\nSolve step by step and put your final numerical answer inside \\boxed{{}}"}],
        tokenize=False,
        add_generation_prompt=True,
    )
    for question in all_questions
]

# Set sampling parameters for deterministic generation
sampling_params = SamplingParams(temperature=0.0, top_p=0.95, top_k=64, max_tokens=5000)

# Generate responses for all questions
outputs = llm.generate(standardized_prompts, sampling_params)#, lora_request=LoRARequest("think_math", 1, 'outputs/lora/Qwen2.5-1.5B-Instruct-LORA-MATH/'))
all_outputs = [output.outputs[0].text for output in outputs]


Processed prompts:  87%|████████▋ | 1153/1319 [02:49<03:33,  1.28s/it, est. speed input: 705.51 toks/s, output: 2122.07 toks/s]

In [10]:
print(all_outputs[0])

<think>
Janet's ducks: 16 eggs/day. Breakfast: 3 eggs/day. Muffins: 4 eggs/day. Eggs left: 16 - 3 - 4 = 9. Eggs sold: 9 * $2 = $18. Answer: $18.
</think>


Janet's ducks lay 16 eggs per day. She eats 3 eggs for breakfast and bakes muffins for her friends with 4 eggs. The number of eggs left is calculated as follows:

1. Total eggs laid: 16
2. Eggs eaten for breakfast: 3
3. Eggs used for muffins: 4
4. Eggs left: 16 - 3 - 4 = 9

Janet sells the remaining eggs at $2 each. The total revenue from selling the eggs is:

5. Eggs sold: 9
6. Price per egg: $2
7. Revenue: 9 * $2 = $18

Thus, Janet makes \boxed{18} dollars every day at the farmers' market.


In [11]:
def get_final_output(response):
    try:
        return int(response.split("\\boxed{")[1].split("}")[0])
    except:
        return None

In [12]:
final_outputs = [get_final_output(response) for response in all_outputs]
accs = []
num_error = 0
for i, gt in enumerate(test):
    pred = final_outputs[i]
    try:
        num = gt['answer'].split('####')[1]
        # num = all_answers.append(int(num))
        # all_answers.append(num)
        num = int(num)
        pred = int(pred)
        accs.append(num == pred)
    except:
        num_error += 1
        accs.append(0)
        pass
        # all_answers.append(None)


In [14]:
np.mean(accs), num_error/len(final_outputs)

(0.5367702805155421, 0.17664897649734648)

In [31]:
from IPython.display import Markdown
Markdown(all_outputs[12].replace('<think>', '\n---\n').replace('</think>', '\n---\n'))


---

Lemon tree: $90, 7 lemons/year, $1.5 each, $3/year water/food.

Costs: planting, water/food, lemons.

Total cost: $90 + (7 * $3) + (7 * $1.5).

Calculate each cost.

Planting: $90.

Water/food: 7 * $3 = $21.

Lemons: 7 * $1.5 = $10.5.

Total cost: $90 + $21 + $10.5 = $121.5.

Earnings: 7 lemons/year * $1.5 = $10.5/year.

Earnings: $10.5/year.

Earnings vs. cost: $10.5 - $121.5 = -$111.

Negative earnings: not profitable.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90.

Profitable: $10.5 > $90