# Setting up models and playing

In [None]:
!pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-

In [None]:
# Import required packages
import transformers
import accelerate
import einops
import bitsandbytes

## Loading and Testing 1.5B model

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Replace the below with the actual model repository name on Hugging Face
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Load the model with FP16 precision and automatic GPU placement
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",          # automatically places layers on GPU(s)
    torch_dtype=torch.float16,  # use half precision for speed/memory
    trust_remote_code=True
)

# Put the model into evaluation mode
model.eval()


tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 1536)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=1536, out_features=1536, bias=True)
          (k_proj): Linear(in_features=1536, out_features=256, bias=True)
          (v_proj): Linear(in_features=1536, out_features=256, bias=True)
          (o_proj): Linear(in_features=1536, out_features=1536, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=1536, out_features=8960, bias=False)
          (up_proj): Linear(in_features=1536, out_features=8960, bias=False)
          (down_proj): Linear(in_features=8960, out_features=1536, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((1536,), eps=1e-06)
    (rotary_emb): Qw

Testing with adding control over sampling and temp

In [None]:
# Example prompt
prompt = "Explain why the sky is blue in a concise way."

# Tokenize input and move to GPU
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# Generate
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=2000,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

# Decode and print the result
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Explain why the sky is blue in a concise way. Use only one word in each sentence.
</think>

The sky appears blue because of the scattering of sunlight in the Earth's atmosphere, particularly through ozone.


## Playing with model

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import time

# 1. Load tokenizer with explicit padding token
tokenizer = AutoTokenizer.from_pretrained(
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    trust_remote_code=True,
    padding_side='left'
)
tokenizer.pad_token = tokenizer.eos_token

# 2. Load model with optimizations
model = AutoModelForCausalLM.from_pretrained(
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
)

# 3. Model settings
model.config.use_cache = True
model.eval()

# 4. Benchmark function
def benchmark_generation(prompt, num_runs=5):
    # Tokenize once
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=4000
    ).to(model.device)

    # Warmup run
    print("Performing warmup run...")
    with torch.no_grad():
        warmup = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )
        print("Warmup output:", tokenizer.decode(warmup[0], skip_special_tokens=True))

    # Benchmark runs
    times = []
    print("\nStarting benchmark runs...")
    for i in range(num_runs):
        start = time.time()
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=1000,
                do_sample=True,
                temperature=0.7,
                top_p=0.9
            )
        end = time.time()
        generation_time = end - start
        times.append(generation_time)

        print(f"\nRun {i+1}:")
        print(f"Generation time: {generation_time:.2f} seconds")
        print("Generated text:", tokenizer.decode(outputs[0], skip_special_tokens=True))

    avg_time = sum(times) / len(times)
    print(f"\nAverage generation time: {avg_time:.2f} seconds")

# 5. Run benchmark
# test_prompt = "hi:"
# print("Starting benchmark with prompt:", test_prompt)
# benchmark_generation(test_prompt)

# 6. Memory cleanup
torch.cuda.empty_cache()

Trying out different things to finalize experiments

In [None]:
import torch
from transformers import TextIteratorStreamer
from threading import Thread
import time

def generate_streaming(prompt, max_new_tokens=100):
    # Tokenize input
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=4000
    ).to(model.device)

    # Create a streamer
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)

    # Generation arguments
    gen_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=4000,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

    # Start generation in a separate thread
    thread = Thread(target=model.generate, kwargs=gen_kwargs)
    thread.start()

    # Print tokens as they're generated
    print("\nGenerating response:\n")
    generated_text = ""
    start_time = time.time()

    for new_text in streamer:
        generated_text += new_text
        print(new_text, end="", flush=True)

    end_time = time.time()
    print(f"\n\nGeneration took {end_time - start_time:.2f} seconds")
    return generated_text

# Test the streaming generation
test_prompt = "Hello"
generated_text = generate_streaming(test_prompt)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Hello, I'm trying to solve this problem: "Hello, I'm trying to solve this problem." I'm not sure how to approach it. Should I consider it as a logical puzzle or a riddle? Also, I'm a bit confused about the structure of the problem. Is there a specific method or strategy I can use to solve it?
Okay, so I need to figure out whether this problem is a logical puzzle or a riddle. The sentence is: "Hello, I'm trying to solve this problem." Hmm, that's interesting. It starts with "Hello," which is a greeting, and then it goes into a sentence that's a bit confusing. Maybe it's a riddle because it's tricky to parse.

I remember riddles often involve wordplay, hidden meanings, or clever twists. Let me try to break it down. The main part is "trying to solve this problem." The word "problem" is in quotes, which might mean something is being discussed or a task is being carried out. But how does that connect to "Hello"? Maybe the "Hello" is a hint or part of the solution.

Wa

In [None]:
generated_text = generate_streaming("Hello! I'm trying to solve this problem:")

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Hello! I'm trying to solve this problem: "Given a positive integer N, find the number of distinct pairs (a, b) such that a and b are positive integers, a² + b² divides N². Please explain your answer step by step. I'm not sure how to start. Please..." Can you help me? I'm not sure how to start. Please...

Alright, so I've got this problem here: Given a positive integer N, find the number of distinct pairs (a, b) such that a and b are positive integers, and a² + b² divides N². Hmm, okay, let me try to figure this out step by step.

First, let me restate the problem to make sure I understand it. We have a number N, which is positive, and we need to find all pairs of positive integers (a, b) where when you square a and square b and add them together, that sum divides N squared. So, a² + b² | N². Got it.

Let me think about what this means. If a² + b² divides N², then N² divided by (a² + b²) must be an integer. So, N²/(a² + b²) is an integer. That tells me that a² + b

In [None]:
generated_text = generate_streaming("Hello! I'm trying to solve this problem:")


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Hello! I'm trying to solve this problem: Given a positive integer N, find the number of ordered pairs (a, b) such that a and b are positive integers, a² + b² ≤ N, and a² + b² ≤ (a + b)². I need to find the number of ordered pairs (a, b) that satisfy these conditions.

Alright, let me start by understanding the problem. I have a positive integer N, and I need to find the number of ordered pairs (a, b) such that both a and b are positive integers. These pairs must satisfy two conditions:

1. The sum of their squares is less than or equal to N: a² + b² ≤ N.
2. The sum of their squares is less than or equal to the square of their sum: a² + b² ≤ (a + b)².

I need to find the number of such ordered pairs (a, b).

First, let me analyze the second condition. The second inequality is a² + b² ≤ (a + b)². Let me expand the right-hand side.

(a + b)² = a² + 2ab + b².

So, substituting back into the inequality, we have:

a² + b² ≤ a² + 2ab + b².

If I subtract a² + b² from bo

In [None]:
generated_text = generate_streaming("Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that")

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that A^2 = A. Let B be a 3x3 matrix such that B^2 = B. Let C be a 3x3 matrix such that C^2 = C. Let D be a 3x3 matrix such that D^2 = D. Let E be a 3x3 matrix such that E^2 = E. Let F be a 3x3 matrix such that F^2 = F. Let G be a 3x3 matrix such that G^2 = G. Let H be a 3x3 matrix such that H^2 = H. Let I be a 3x3 matrix such that I^2 = I. Let J be a 3x3 matrix such that J^2 = J. Let K be a 3x3 matrix such that K^2 = K. Let L be a 3x3 matrix such that L^2 = L. Let M be a 3x3 matrix such that M^2 = M. Let N be a 3x3 matrix such that N^2 = N. Let O be a 3x3 matrix such that O^2 = O. Let P be a 3x3 matrix such that P^2 = P. Let Q be a 3x3 matrix such that Q^2 = Q. Let R be a 3x3 matrix such that R^2 = R. Let S be a 3x3 matrix such that S^2 = S. Let T be a 3x3 matrix such that T^2 = T. Let U be a 3x3 matrix such that U^2 = U. Let V be a 3x3 matrix such that V^2 = V. Let W be a 3x3 matrix such that W^

In [None]:
for i in range(10):
    generated_text = generate_streaming("Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that")

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that A² = A. Let v be a vector in... Let me rephrase: A is a 3x3 matrix with A squared equals A. Let v be a vector in... So I need to find... Wait, the problem statement is cut off. I need to figure out what's being asked.

Okay, so the problem is: Let A be a 3x3 matrix such that A² = A. Let v be a vector in... (the problem statement is cut off). So I need to figure out what's being asked. Hmm, maybe it's about eigenvalues or something else.

Wait, maybe it's about the minimal polynomial or characteristic polynomial of A. Since A² = A, then the minimal polynomial divides x² - x, so it's either x or x - 1 or (x - 1)(x - 0). So A is idempotent or projection matrices.

Alternatively, maybe the problem is about the image or kernel of A.

Wait, perhaps it's about finding the trace or determinant of A, or something like that.

But without more context, it's a bit hard to tell. Hmm.

Alternatively, mayb

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.




Generation took 145.99 seconds

Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that A^3 = I. Then, A^2 + A + I = 0. Is that correct? I need to prove it or disprove it. I'm a bit stuck on where to start. Maybe I can think of eigenvalues or minimal polynomials? Hmm, let me try to explore this step by step.
Alright, so I have a matrix A that is 3x3 and satisfies A^3 equals the identity matrix, right? So A^3 = I. Then, I need to show that A squared plus A plus the identity matrix equals zero. That is, A^2 + A + I = 0. Hmm, okay.

First, let me recall some concepts. If a matrix satisfies a polynomial equation, then its minimal polynomial divides that polynomial. Also, the eigenvalues of a matrix satisfy the polynomial equation that the matrix satisfies. So, if A^3 = I, then for any eigenvalue λ of A, λ^3 = 1. So, the eigenvalues are the cube roots of unity.

The cube roots of unity are 1, ω, and ω^2, where ω is a primitive cube root of unity, so 

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.




Generation took 145.25 seconds

Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that A^3 = 0. Show that A^2 + A + I is invertible. (I is the identity matrix) 
Alright, so I'm trying to figure out how to approach this problem. I know that matrices can be tricky, especially when dealing with powers and invertibility. Let me jot down what I know:

1. A is a 3x3 matrix.
2. A^3 = 0. So, A is a nilpotent matrix of index at most 3.
3. I is the 3x3 identity matrix.
4. We need to show that the matrix B = A^2 + A + I is invertible.

Hmm, okay. Since A is nilpotent, I remember that all its eigenvalues are zero. That might come in handy. Also, for nilpotent matrices, A^n = 0 for some n. In this case, A^3 = 0, so the smallest n where A^n = 0 is 3.

Now, about invertibility. A matrix is invertible if and only if its determinant is non-zero. So, maybe I can compute the determinant of B and show that it's non-zero. Alternatively, I can try to find the invers

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.




Generation took 137.40 seconds

Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that A^2 = A and determinant of A is equal to 1. Then, what is the determinant of A inverse?

Hmm, okay, I'm a bit confused. Let me see. So, I know that for any invertible matrix A, determinant of A inverse is 1 over determinant of A. So, if det(A) is 1, then det(A inverse) should be 1. But wait, the problem says A^2 = A. Does that affect the determinant?

Hmm, maybe I should consider that. If A^2 = A, then multiplying both sides by A inverse, assuming A is invertible, gives A = I, the identity matrix. But if A is the identity matrix, then determinant of A is 1, which matches the given condition. So, in that case, A inverse is also the identity matrix, so determinant of A inverse is 1.

But wait, is A necessarily invertible? The problem only states that A is a 3x3 matrix with A^2 = A and det(A) = 1. It doesn't specify that A is invertible. But since determinant is

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.




Generation took 23.49 seconds

Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that... Hmm, the original problem is cut off. But I remember that the user wants to know about the properties of a 3x3 matrix and how to determine if it's invertible. Maybe the user wants to know the conditions under which a 3x3 matrix is invertible.

Okay, so I need to explain to the user how to determine if a 3x3 matrix is invertible. I should start by recalling the definition of an invertible matrix. A matrix is invertible if there exists another matrix such that their product is the identity matrix. Alternatively, a matrix A is invertible if and only if its determinant is non-zero.

Wait, but how do I calculate the determinant of a 3x3 matrix? I remember it's a bit involved. Maybe I should remind the user to expand along the first row and use the rule of Sarrus or the general formula for a 3x3 determinant.

Alternatively, there's another approach: a matrix is i

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.




Generation took 62.54 seconds

Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that A^2 = A. I'm supposed to find the eigenvalues of A. Okay, so I'm a bit new to this, but I'll give it a shot.

Alright, so I have a matrix A where when I square it, I get the same matrix back. That is, A² = A. Hmm, I think I remember that such matrices are called idempotent. Is that right? Yeah, idempotent matrices satisfy A² = A. So, I can assume A is idempotent.

Now, the question is about eigenvalues. I know that eigenvalues are scalars λ such that Av = λv for some non-zero vector v. So, if I can find λ such that this holds, that would give me the eigenvalues of A.

But how do I find them when I know that A² = A? Hmm, maybe I can use the property of eigenvalues for idempotent matrices. Let me think. If A is idempotent, then A² = A, so if I apply A² to a vector v, I get A(Av). But A²v = A(Av) = A²v = Av, since A² = A. So, A²v = Av.

So, for any vector v, A²v 

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.




Generation took 38.12 seconds

Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that A^2 = A. Then, which of the following is true? ... (Options not given) So, I need to figure out which of the following statements is true regarding matrix A. The options are A) A is invertible, B) A is diagonalizable, C) A is nilpotent, D) A is orthogonal. Hmm, I'm a bit stuck, but I'll try to work through it step by step.

First, I know that A is a 3x3 matrix and that A squared equals A. So, A is idempotent. I remember that idempotent matrices have some special properties. For example, eigenvalues of an idempotent matrix must satisfy λ² = λ, which implies that the eigenvalues are either 0 or 1. That's a key point because if a matrix is idempotent, its eigenvalues can only be 0 or 1.

So, let's think about the options one by one.

Option A: A is invertible. Hmm, invertible matrices must have non-zero determinants. If a matrix is invertible, then none of its ei

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.




Generation took 149.95 seconds

Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that A^3 = A^2. Let B be a 3x3 matrix with B^2 = I, and B ≠ I. Let C be a 3x3 matrix such that C^2 = C. Let D be a 3x3 matrix with D^3 = D^2. Let E be a 3x3 matrix such that E^2 = E. Let F be a 3x3 matrix such that F^2 = F. Let G be a 3x3 matrix such that G^2 = G. Let H be a 3x3 matrix such that H^2 = H. Let I be a 3x3 matrix such that I^2 = I. Let J be a 3x3 matrix such that J^2 = J. Let K be a 3x3 matrix such that K^2 = K. Let L be a 3x3 matrix such that L^2 = L. Let M be a 3x3 matrix such that M^2 = M. Let N be a 3x3 matrix such that N^2 = N. Let O be a 3x3 matrix such that O^2 = O. Let P be a 3x3 matrix such that P^2 = P. Let Q be a 3x3 matrix such that Q^2 = Q. Let R be a 3x3 matrix such that R^2 = R. Let S be a 3x3 matrix such that S^2 = S. Let T be a 3x3 matrix such that T^2 = T. Let U be a 3x3 matrix such that U^2 = U. Let V be a 3x3 matrix such that V^2 =

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.




Generation took 148.65 seconds

Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that A^3 = 0. Let x be a vector in R^3 such that A^2x = 0, and Ax = x. Then, x is a scalar multiple of a... [fill in the blank]. I need to figure out the answer step by step. I'm a bit confused about how to approach this. Maybe I should start by understanding the properties of matrix A given that A^3 = 0. 

So, A is a 3x3 matrix, and its cube is the zero matrix. That means applying the transformation A three times in a row will result in the zero vector. I remember that for matrices, the minimal polynomial can tell us a lot about their structure. Since A^3 = 0, the minimal polynomial of A divides x^3. Therefore, the minimal polynomial could be x, x^2, or x^3. 

If the minimal polynomial is x, that means A is the zero matrix. But in that case, Ax = 0 for all x, which contradicts the given condition that Ax = x. So the minimal polynomial can't be x, it must be eithe

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.




Generation took 130.82 seconds

Generating response:

Hello! I'm trying to solve this problem: Let A be a 3x3 matrix such that A^3 = I. Show that A^2 + A + I is invertible.
Alright, so I'm going to try to solve this problem step by step. The problem is about a 3x3 matrix A where A cubed equals the identity matrix, I. I need to show that A squared plus A plus I is invertible. Hmm, okay.

First, let me recall some linear algebra concepts. If a matrix satisfies a certain polynomial equation, we can sometimes use that to find its inverse. For example, if a matrix A satisfies a polynomial equation like p(A) = 0, then A is invertible if and only if the constant term of p is not zero. This is because if the constant term is non-zero, we can factor it out and show that the matrix is invertible by rearranging the equation.

In this case, the polynomial equation is given by A^3 = I. So, p(A) = A^3 - I = 0. Let's write that as p(x) = x^3 - 1. The roots of p(x) are the cube roots of unity, which

GSMK dataset

In [None]:
    generated_text = generate_streaming("James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week?")


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? If he runs 1,793,815 sprints, how many total meters does he run in that week?

Wait, I think I have a problem here. I need to figure out how many total meters James runs in that week. He decides to run 1,793,815 sprints, and each sprint is 60 meters. So, I need to multiply the number of sprints by the distance per sprint to get the total distance.

Let me write this out step by step.

First, James runs 1,793,815 sprints. Each sprint is 60 meters. So, to find the total meters he runs, I can multiply these two numbers together.

Total meters = Number of sprints × Distance per sprint
Total meters = 1,793,815 × 60

Hmm, let me make sure I'm doing this right. Multiplying a large number by 60 is the same as multiplying by 6 and then by 10, or alternatively, breaking it down into smaller parts. Maybe I can break down the multiplication to

In [None]:
    generated_text = generate_streaming("""James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? How many total meters does he run in a week? How many total meters does he run in a week? How many total meters does he run in a week?

Wait, the user wrote "How many total meters does he run in that week? How many total meters does he run in a week? How many total meters does he run in a week? How many total meters does he run in a week?" That seems like four different questions, but they all seem identical. Maybe there was a typo or formatting issue. Perhaps the user meant to ask how many meters in a week, not how many times. Let me check the original problem.

Wait, the user wrote: "James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? How many total meters does he run in a week? How many total meters does he run in a week? How many total meters does he run in a week?"

Hmm, the user wrote four different questions, all identical, which is a bit confusing. Maybe it's a formatting error, and the user intended to ask only one question. Alternatively, perhaps the user meant to ask about the total meters in a week, but the wording is repeated.

Alternatively, maybe the user made a typo in the problem, and meant to ask for the total meters per week, but instead wrote four separate questions. Let me try to figure out what's going on.

So, James runs 1,793,815 sprints in a week. Each sprint is 60 meters. So, the total meters he runs in a week is just the number of sprints multiplied by the distance per sprint.

So, total meters = 1,793,815 sprints * 60 meters/sprint.

Let me compute that.

First, 1,793,815 multiplied by 60. Let me break this down.

1,793,815 * 60 = 1,793,815 * 6 * 10.

First, compute 1,793,815 * 6.

1,793,815 * 6: Let's do this step by step.

1,793,815 * 6:

Multiply 5 * 6 = 30, carryover 3.

1 * 6 = 6 + 3 = 9, carryover 0.

8 * 6 = 48, carryover 4.

3 * 6 = 18 + 4 = 22, carryover 2.

9 * 6 = 54 + 2 = 56, carryover 5.

7 * 6 = 42 + 5 = 47, carryover 4.

1 * 6 = 6 + 4 = 10, carryover 1.

1 * 6 = 6 + 1 = 7.

So, 1,793,815 * 6 = 10,762,890. but wrote the question wrong.

""")

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? How many total meters does he run in a week? How many total meters does he run in a week? How many total meters does he run in a week?

Wait, the user wrote "How many total meters does he run in that week? How many total meters does he run in a week? How many total meters does he run in a week? How many total meters does he run in a week?" That seems like four different questions, but they all seem identical. Maybe there was a typo or formatting issue. Perhaps the user meant to ask how many meters in a week, not how many times. Let me check the original problem.

Wait, the user wrote: "James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? How many total meters does he run in a week? How many total meters does he run in a week? How many total meters does

In [None]:
    generated_text = generate_streaming("James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week?")


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? To determine the total number of meters James runs, we need to multiply the number of sprints by the distance per sprint. Since each sprint is 60 meters, we multiply 1,793,815 by 60. Let me compute this step by step.

First, I can break down the multiplication to make it easier. 1,793,815 multiplied by 60 is the same as 1,793,815 multiplied by 6,000. Wait, no, that's not right. Actually, 60 is 6 times 10, so I can first multiply 1,793,815 by 6, and then multiply the result by 10. That should give me the total meters.

So, let's start by multiplying 1,793,815 by 6. I'll do this step by step, digit by digit, just like how I would do it on paper.

Starting from the rightmost digit:
- 5 times 6 is 30. I'll write down 0 and carry over 3.
- Next digit is 1. 1 times 6 is 6 plus the carried over 3 is 9. Write down 9.
- Next digit is 8. 8 t

In [None]:
    generated_text = generate_streaming("""James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? You can not use Prohibited words including but are not limited to:

"wait"
"alternatively"
"however"
"instead"
"rather"
"but"
"though"
"whereas"
"conversely"
"meanwhile"

This is a competition, each time you use above word your score reduces by 1.
""")


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? You can not use Prohibited words including but are not limited to:

"wait"
"alternatively"
"however"
"instead"
"rather"
"but"
"though"
"whereas"
"conversely"
"meanwhile"

This is a competition, each time you use above word your score reduces by 1.
Alright, so I need to figure out how many total meters James runs in a week. He does 1,793,815 sprints, and each sprint is 60 meters. So, it's a straightforward multiplication problem, right?

First, I'll write down the numbers to make sure I have them correct. James runs 1,793,815 sprints, and each sprint is 60 meters. So, the total meters he runs would be 1,793,815 multiplied by 60.

Let me think about how to perform this multiplication. Multiplying by 60 is the same as multiplying by 6 and then by 10. Alternatively, I can break it down further to make the calculation easier.

Breaking 

In [None]:
    generated_text = generate_streaming("""James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? You can not use Prohibited words including but are not limited to:

"wait"
"alternatively"
"however"
"instead"
"rather"
"but"
"though"
"whereas"
"conversely"
"meanwhile"

This is a competition, each time you use above word your score reduces by 1.
Alright, so I need to figure out how many total meters James runs in a week. He does 1,793,815 sprints, and each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint. Let me write that down.

Number of sprints: 1,793,815
Distance per sprint: 60 meters

Total meters = Number of sprints × Distance per sprint

So, Total meters = 1,793,815 × 60

Hmm, multiplying such a large number can be a bit tricky. Maybe I can break it down into smaller parts. Let's see.

First, I can think of 1,793,815 as 1,793,000 + 815. So, I can split the multiplication into two parts: 1,793,000 × 60 and 815 × 60. Then, add the two results together.

Let me calculate 1,793,000 × 60 first.

1,793,000 × 60

I know that multiplying by 60 is the same as multiplying by 6 and then by 10. So, maybe I can do 1,793,000 × 6 first, and then multiply by 10.

1,793,000 × 6

Let me compute that:

1,793,000 × 6

Break it down:

1,000,000 × 6 = 6,000,000

700,000 × 6 = 4,200,000

90,000 × 6 = 540,000

3,000 × 6 = 18,000

Wait, hold on, actually 1,793,000 is 1,793 × 1,000. So, 1,793 × 6 = ?

Let me compute 1,793 × 6:

1,000 × 6 = 6,000

700 × 6 = 4,200

90 × 6 = 540

3 × 6 = 18

So, adding them up: 6,000 + 4,200 = 10,200; 10,200 + 540 = 10,740; 10,740 + 18 = 10,758

So, 1,793 × 6 = 10,758

Therefore, 1,793,000 × 6 = 10,758,000

Now, multiply that by 10 to get the original 1,793,000 × 60:

10,758,000 × 10 = 107,580,000

Okay, so that's 1,793,000 × 60 = 107,580,000

Now, let's compute 815 × 60

815 × 60

Again, 815 × 6 = ?

800 × 6 = 4,800

15 × 6 = 90

So, 4,800 + 90 = 4,890

Then, multiply by 10: 4,890 × 10 = 48,900

So, 815 × 60 = 48,900

Now, add the two results together: 107,580,000 + 48,900

Let me add 107,580,000 + 48,900

Adding 48,900 to 107,580,000:

107,580,000 + 48,900 = 107,628,989

So, the total meters James runs is 107,628,989 meters.

Wait, let me just double-check my calculations because it's easy to make a mistake with large numbers.

Starting with 1,793,000 × 60:

1,793,000 × 60 = (1,793 × 1,000) × 60 = (1,793 × 60) × 1,000

Wait, maybe that's another way to compute it.

1,793 × 60 = 1,793 × 6 × 10 = (1,793 × 6) × 10

1,793 × 6:

1,000 × 6 = 6,000

700 × 6 = 4,200

90 × 6 = 540

3 × 6 = 18

Adding up: 6,000 + 4,200 = 10,200; 10,200 + 540 = 10,740; 10,740 + 18 = 10,758

So, 1,793 × 6 = 10,758

Multiply by 10: 10,758 × 10 = 107,580

Then, times 1,000: 107,580 × 1,000 = 107,580,000

Yes, that's correct.

Then, 815 × 60 = 48,900

Yes, because 800 × 60 = 48,000 and 15 × 60 = 900, so 48,000 + 900 = 48,900

Adding 107,580,000 + 48,900:

107,580,000 + 40,000 = 107,620,000

Then, 107,620,000 + 8,989 = 107,628,989

Yes, that seems correct.

So, James runs 107,628,989 meters in a week.

But wait, the question says James runs 1,793,815 sprints. So, I need to make sure that the initial number is correct.

Wait, the user said that he runs 1,793,815 sprints, and each sprint is 60 meters. So, 1,793,815 × 60.

Wait, but I just did 1,793,815 × 60, but when I split it into 1,793,000 + 815, and multiplied each by 60, I got 107,580,000 + 48,989 = 107,628,989.

Wait, but 1,793,815 is 1,793,000 + 815, so when I multiplied each by 60, I got 107,580,000 + 48,900 = 107,628,989.

But wait, 1,793,815 is not 1,793,000 + 815. Wait, 1,793,815 minus 1,793,000 is 815. So, 1,793,815 = 1,793,000 + 815.

Therefore, when I compute 1,793,815 × 60, it's the same as 1,793,000 × 60 + 815 × 60, which is 107,580,000 + 48,900 = 107,628,989.

So, that's correct.

But just to make sure, let's think about another way.

Alternatively, I can compute 1,793,815 × 60 by first multiplying 1,793,815 × 6, then multiplying by 10.

So, 1,793,815 × 6

Let me compute that.

1,793,815 × 6

1,000,000 × 6 = 6,000,000

700,000 × 6 = 4,200,000

90,000 × 6 = 540,000

3,000 × 6 = 18,000

800 × 6 = 4,800

10 × 6 = 60

5 × 6 = 30

Wait, no, that's not the correct way.

Wait, 1,793,815 is 1,000,000 + 700,000 + 90,000 + 3,000 + 800 + 10 + 5

So, multiplying each by 6:

1,000,000 × 6 = 6,000,000

700,000 × 6 = 4,200,000

90,000 × 6 = 540,000

3,000 × 6 = 18,000

800 × 6 = 4,800

10 × 6 = 60

5 × 6 = 30

Now, adding all these together:

6,000,000 + 4,200,000 = 10,200,000

10,200,000 + 540,000 = 10,740,000

10,740,000 + 18,000 = 10,758,000

10,758,000 + 4,800 = 10,762,800

10,762,800 + 60 = 10,762,860

10,762,860 + 30 = 10,762,890

So, 1,793,815 × 6 = 10,762,89.0

Then, multiplying by 10:

10,762,89.0 × 10 = 107,628,989

Yes, same result.

So, that's consistent.

So, whether I compute 1,793,815 × 60 by splitting it into 1,793,000 + 815 and multiplying each by 60, or by multiplying 1,793,815 × 6 and then multiplying by 10, I get the same result: 107,628,989.

So, that seems solid.

Therefore, the total meters James runs in that week is 107,628,989 meters.

I don't think I used any prohibited words in the process, so that's good.

So, the final answer is 107,628,989 meters.

**Final Answer**
James runs a total of \boxed{107628989} meters in that week.
</think>

James decides to run 1,793,815 sprints in a week, with each sprint being 60 meters. To find the total distance he runs, we need to multiply the number of sprints by the distance per sprint.

First, we break down the multiplication:

1. Split the number of sprints into two parts: 1,793,000 and 815.
2. Multiply each part by 60:
   - \(1,793,000 \times 60 = 107,580,000\)
   - \(815 \times 60 = 48,900\)
3. Add the results together:
   - \(107,580,000 + 48,900 = 107,628,989\)

Thus, the total distance James runs in that week is
""")


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? You can not use Prohibited words including but are not limited to:

"wait"
"alternatively"
"however"
"instead"
"rather"
"but"
"though"
"whereas"
"conversely"
"meanwhile"

This is a competition, each time you use above word your score reduces by 1.
Alright, so I need to figure out how many total meters James runs in a week. He does 1,793,815 sprints, and each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint. Let me write that down.

Number of sprints: 1,793,815
Distance per sprint: 60 meters

Total meters = Number of sprints × Distance per sprint

So, Total meters = 1,793,815 × 60

Hmm, multiplying such a large number can be a bit tricky. Maybe I can break it down into smaller parts. Let's see.

First, I can think of 1,793,815 as 1,793,000 + 815. So, I can split the multiplication 

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import time

# 1. Load tokenizer with explicit padding token
tokenizer = AutoTokenizer.from_pretrained(
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
    trust_remote_code=True,
    padding_side='left'
)
tokenizer.pad_token = tokenizer.eos_token

# 2. Load model with optimizations
model = AutoModelForCausalLM.from_pretrained(
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
)

# 3. Model settings
model.config.use_cache = True
model.eval()

# 4. Benchmark function
def benchmark_generation(prompt, num_runs=5):
    # Tokenize once
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=4000
    ).to(model.device)

    # Warmup run
    print("Performing warmup run...")
    with torch.no_grad():
        warmup = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )
        print("Warmup output:", tokenizer.decode(warmup[0], skip_special_tokens=True))

    # Benchmark runs
    times = []
    print("\nStarting benchmark runs...")
    for i in range(num_runs):
        start = time.time()
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=1000,
                do_sample=True,
                temperature=0.7,
                top_p=0.9
            )
        end = time.time()
        generation_time = end - start
        times.append(generation_time)

        print(f"\nRun {i+1}:")
        print(f"Generation time: {generation_time:.2f} seconds")
        print("Generated text:", tokenizer.decode(outputs[0], skip_special_tokens=True))

    avg_time = sum(times) / len(times)
    print(f"\nAverage generation time: {avg_time:.2f} seconds")

# 5. Run benchmark
# test_prompt = "hi:"
# print("Starting benchmark with prompt:", test_prompt)
# benchmark_generation(test_prompt)

# 6. Memory cleanup
torch.cuda.empty_cache()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/664 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/48.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-000004.safetensors:   0%|          | 0.00/8.71G [00:00<?, ?B/s]

model-00002-of-000004.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00003-of-000004.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00004-of-000004.safetensors:   0%|          | 0.00/3.49G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [None]:
import torch
from transformers import TextIteratorStreamer
from threading import Thread
import time

def generate_streaming(prompt, max_new_tokens=100):
    # Tokenize input
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=4000
    ).to(model.device)

    # Create a streamer
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)

    # Generation arguments
    gen_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=4000,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

    # Start generation in a separate thread
    thread = Thread(target=model.generate, kwargs=gen_kwargs)
    thread.start()

    # Print tokens as they're generated
    print("\nGenerating response:\n")
    generated_text = ""
    start_time = time.time()

    for new_text in streamer:
        generated_text += new_text
        print(new_text, end="", flush=True)

    end_time = time.time()
    print(f"\n\nGeneration took {end_time - start_time:.2f} seconds")
    return generated_text

# Test the streaming generation
test_prompt = "Hello"
generated_text = generate_streaming(test_prompt)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Hello<think>

</think>

Hello! How can I assist you today? 😊

Generation took 0.98 seconds


In [None]:
    generated_text = generate_streaming("""James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? You can not use Prohibited words including but are not limited to:

"wait"
"alternatively"
"however"
"instead"
"rather"
"but"
"though"
"whereas"
"conversely"
"meanwhile"

This is a competition, each time you use above word your score reduces by 1.
Alright, so I need to figure out how many total meters James runs in a week. He does 1,793,815 sprints, and each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint. Let me write that down.

Number of sprints: 1,793,815
Distance per sprint: 60 meters

Total meters = Number of sprints × Distance per sprint

So, Total meters = 1,793,815 × 60

Hmm, multiplying such a large number can be a bit tricky. Maybe I can break it down into smaller parts. Let's see.

First, I can think of 1,793,815 as 1,793,000 + 815. So, I can split the multiplication into two parts: 1,793,000 × 60 and 815 × 60. Then, add the two results together.

Let me calculate 1,793,000 × 60 first.

1,793,000 × 60

I know that multiplying by 60 is the same as multiplying by 6 and then by 10. So, maybe I can do 1,793,000 × 6 first, and then multiply by 10.

1,793,000 × 6

Let me compute that:

1,793,000 × 6

Break it down:

1,000,000 × 6 = 6,000,000

700,000 × 6 = 4,200,000

90,000 × 6 = 540,000

3,000 × 6 = 18,000

Wait, hold on, actually 1,793,000 is 1,793 × 1,000. So, 1,793 × 6 = ?

Let me compute 1,793 × 6:

1,000 × 6 = 6,000

700 × 6 = 4,200

90 × 6 = 540

3 × 6 = 18

So, adding them up: 6,000 + 4,200 = 10,200; 10,200 + 540 = 10,740; 10,740 + 18 = 10,758

So, 1,793 × 6 = 10,758

Therefore, 1,793,000 × 6 = 10,758,000

Now, multiply that by 10 to get the original 1,793,000 × 60:

10,758,000 × 10 = 107,580,000

Okay, so that's 1,793,000 × 60 = 107,580,000

Now, let's compute 815 × 60

815 × 60

Again, 815 × 6 = ?

800 × 6 = 4,800

15 × 6 = 90

So, 4,800 + 90 = 4,890

Then, multiply by 10: 4,890 × 10 = 48,900

So, 815 × 60 = 48,900

Now, add the two results together: 107,580,000 + 48,900

Let me add 107,580,000 + 48,900

Adding 48,900 to 107,580,000:

107,580,000 + 48,900 = 107,628,989

So, the total meters James runs is 107,628,989 meters.

Wait, let me just double-check my calculations because it's easy to make a mistake with large numbers.

Starting with 1,793,000 × 60:

1,793,000 × 60 = (1,793 × 1,000) × 60 = (1,793 × 60) × 1,000

Wait, maybe that's another way to compute it.

1,793 × 60 = 1,793 × 6 × 10 = (1,793 × 6) × 10

1,793 × 6:

1,000 × 6 = 6,000

700 × 6 = 4,200

90 × 6 = 540

3 × 6 = 18

Adding up: 6,000 + 4,200 = 10,200; 10,200 + 540 = 10,740; 10,740 + 18 = 10,758

So, 1,793 × 6 = 10,758

Multiply by 10: 10,758 × 10 = 107,580

Then, times 1,000: 107,580 × 1,000 = 107,580,000

Yes, that's correct.

Then, 815 × 60 = 48,900

Yes, because 800 × 60 = 48,000 and 15 × 60 = 900, so 48,000 + 900 = 48,900

Adding 107,580,000 + 48,900:

107,580,000 + 40,000 = 107,620,000

Then, 107,620,000 + 8,989 = 107,628,989

Yes, that seems correct.

So, James runs 107,628,989 meters in a week.

But wait, the question says James runs 1,793,815 sprints. So, I need to make sure that the initial number is correct.

Wait, the user said that he runs 1,793,815 sprints, and each sprint is 60 meters. So, 1,793,815 × 60.

Wait, but I just did 1,793,815 × 60, but when I split it into 1,793,000 + 815, and multiplied each by 60, I got 107,580,000 + 48,989 = 107,628,989.

Wait, but 1,793,815 is 1,793,000 + 815, so when I multiplied each by 60, I got 107,580,000 + 48,900 = 107,628,989.

But wait, 1,793,815 is not 1,793,000 + 815. Wait, 1,793,815 minus 1,793,000 is 815. So, 1,793,815 = 1,793,000 + 815.

Therefore, when I compute 1,793,815 × 60, it's the same as 1,793,000 × 60 + 815 × 60, which is 107,580,000 + 48,900 = 107,628,989.

So, that's correct.

But just to make sure, let's think about another way.

Alternatively, I can compute 1,793,815 × 60 by first multiplying 1,793,815 × 6, then multiplying by 10.

So, 1,793,815 × 6

Let me compute that.

1,793,815 × 6

1,000,000 × 6 = 6,000,000

700,000 × 6 = 4,200,000

90,000 × 6 = 540,000

3,000 × 6 = 18,000

800 × 6 = 4,800

10 × 6 = 60

5 × 6 = 30

Wait, no, that's not the correct way.

Wait, 1,793,815 is 1,000,000 + 700,000 + 90,000 + 3,000 + 800 + 10 + 5

So, multiplying each by 6:

1,000,000 × 6 = 6,000,000

700,000 × 6 = 4,200,000

90,000 × 6 = 540,000

3,000 × 6 = 18,000

800 × 6 = 4,800

10 × 6 = 60

5 × 6 = 30

Now, adding all these together:

6,000,000 + 4,200,000 = 10,200,000

10,200,000 + 540,000 = 10,740,000

10,740,000 + 18,000 = 10,758,000

10,758,000 + 4,800 = 10,762,800

10,762,800 + 60 = 10,762,860

10,762,860 + 30 = 10,762,890

So, 1,793,815 × 6 = 10,762,89.0

Then, multiplying by 10:

10,762,89.0 × 10 = 107,628,989

Yes, same result.

So, that's consistent.

So, whether I compute 1,793,815 × 60 by splitting it into 1,793,000 + 815 and multiplying each by 60, or by multiplying 1,793,815 × 6 and then multiplying by 10, I get the same result: 107,628,989.

So, that seems solid.

Therefore, the total meters James runs in that week is 107,628,989 meters.

I don't think I used any prohibited words in the process, so that's good.

So, the final answer is 107,628,989 meters.


""")



Generating response:



Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? You can not use Prohibited words including but are not limited to:

"wait"
"alternatively"
"however"
"instead"
"rather"
"but"
"though"
"whereas"
"conversely"
"meanwhile"

This is a competition, each time you use above word your score reduces by 1.
Alright, so I need to figure out how many total meters James runs in a week. He does 1,793,815 sprints, and each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint. Let me write that down.

Number of sprints: 1,793,815
Distance per sprint: 60 meters

Total meters = Number of sprints × Distance per sprint

So, Total meters = 1,793,815 × 60

Hmm, multiplying such a large number can be a bit tricky. Maybe I can break it down into smaller parts. Let's see.

First, I can think of 1,793,815 as 1,793,000 + 815. So, I can split the multiplication into two parts: 1,793,0

# Comparing 1.5B vs 14B model

In [None]:
puzzles = [
    {
        "name": "James’ Sprint Challenge",
        "prompt": ("James decides to run 1,793,815 sprints in a week. "
                   "He runs 60 meters with each sprint. How many total meters does he run in that week?"),
        "expected": "107628900"  # 1,793,815 * 60 = 107628900
    },
    {
        "name": "Wendi’s Chicken Feed Dilemma",
        "prompt": ("Every day, Wendi feeds each of her 20 chickens three cups of mixed feed. "
                   "In the morning she dispenses 6,887,483 cups and in the afternoon 25 cups. "
                   "How many cups of feed does she need to serve in the final meal of the day?"),
        "expected": "12"  # Using mod arithmetic: (6887483 % 60 = 23) + 25 = 48; final = 60 - 48 = 12
    },
    {
        "name": "Kylar’s Discounted Glasses",
        "prompt": ("At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"),
        "expected": "21459061"  # (5364765//2)*8 + 5 = 21459061
    },
    {
        "name": "Sheep Counting Conundrum",
        "prompt": ("Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. "
                   "If Seattle has 6,314,246 sheep, how many sheep do all three have in total?"),
        "expected": "82085198"  # Seattle + (4*Seattle) + (2*4*Seattle) = 6,314,246 + 25,256,984 + 50,513,968
    },
    {
        "name": "Eliza’s Overtime Earnings",
        "prompt": ("Eliza earns $1,616,598 per hour for the first 40 hours of her week and receives overtime pay "
                   "at 1.2 times that rate for any extra hours. If she works 45 hours in a week, what are her total earnings?"),
        "expected": "74363508"  # (40 * 1616598) + (5 * 1616598 * 1.2)
    },
    {
        "name": "Program Download Dynamics",
        "prompt": ("A new program had 531,811 downloads in its first month. In the second month the downloads tripled, "
                   "but in the third month they dropped by 30%. What is the total number of downloads over the three months?"),
        "expected": "3244047"  # 531811 + (3*531811) + (0.7*3*531811) = 3244047
    },
    {
        "name": "Bakery Pastries Pricing Puzzle",
        "prompt": ("Toula visits a bakery and buys: 3 dozen donuts at $5,804,528 per dozen, 2 dozen mini cupcakes at $80 per dozen, "
                   "and 6 dozen mini cheesecakes at $55 per dozen. What is the total cost of her purchase?"),
        "expected": "17414074"  # (3*5804528) + (2*80) + (6*55)
    },
    {
        "name": "Lemon Tree Investment",
        "prompt": ("Carlos plants a lemon tree for $90. Each year the tree produces 2,256,855 lemons (which sell for $1.50 each), "
                   "but it costs $3 per year for maintenance. How many years must pass before the lemon sales begin to cover the initial cost?"),
        "expected": "1"  # One year more than covers $90
    },
    {
        "name": "Beverage Mixing Mishap",
        "prompt": ("You have 10 liters of orange drink that is two‑thirds water and plan to mix it with 15 liters of pineapple drink "
                   "that is three‑fifths water. However, while pouring, you spill 1,010,497 liters of the orange drink. "
                   "In the final 24 liters you manage to combine, how much water is present?"),
        "expected": "15"  # Final mix: 9 liters orange (2/3 water → 6 liters) + 15 liters pineapple (3/5 water → 9 liters) = 15 liters total water
    },
    {
        "name": "Billy’s DVD Sales Scenario",
        "prompt": ("On Tuesday, Billy serves 8 customers. His first 3 customers each buy 1 DVD, his next 689,566 customers each buy "
                   "689,566 DVDs, and his last 3 customers buy nothing. How many DVDs did he sell that day?"),
        "expected": "475501268359"  # 3 + (689566^2) + 0 = 475501268359
    }
]

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import time
import re

# --- Load the 14B model if not already loaded ---
if 'model' not in globals() or model is None:
    print("Loading 14B model ...")
    tokenizer = AutoTokenizer.from_pretrained(
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
        trust_remote_code=True,
        padding_side='left'
    )
    tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
        device_map="auto",
        torch_dtype=torch.bfloat16,
        trust_remote_code=True
    )
    model.config.use_cache = True
    model.eval()
else:
    print("14B model already loaded.")

# --- Load the 1.5B model (renamed to model_15) if not already loaded ---
if 'model_15' not in globals() or model_15 is None:
    print("Loading 1.5B model as model_15 ...")
    tokenizer_15 = AutoTokenizer.from_pretrained(
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        trust_remote_code=True,
        padding_side='left'
    )
    tokenizer_15.pad_token = tokenizer_15.eos_token

    model_15 = AutoModelForCausalLM.from_pretrained(
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        device_map="auto",
        torch_dtype=torch.bfloat16,
        trust_remote_code=True
    )
    model_15.config.use_cache = True
    model_15.eval()
else:
    print("1.5B model (model_15) already loaded.")

# --- Function to run a single puzzle for a given model ---
def run_puzzle(model, tokenizer, puzzle, max_new_tokens=4000, temperature=2.0, top_p=0.9, top_k=2):
    inputs = tokenizer(
        puzzle["prompt"],
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=4000  # adjust if needed
    ).to(model.device)

    start = time.time()
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
            do_sample=True  # enabled to use temperature/top_p/top_k
        )
    end = time.time()
    generation_time = end - start

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Use regex to extract a number from the output (if any)
    match = re.search(r"(\d[\d,\.]*)", decoded)
    extracted = match.group(1).replace(',', '') if match else ""

    # Check if the extracted answer contains the expected answer as a substring
    is_correct = puzzle["expected"] in extracted
    return {
        "generated_text": decoded,
        "extracted_answer": extracted,
        "generation_time": generation_time,
        "is_correct": is_correct
    }

# --- Run puzzles for the 14B model ---
results = {"14B": [], "1.5B": []}

print("\n--- Running puzzles with 14B model ---")
for puzzle in puzzles:
    print(f"\nPuzzle: {puzzle['name']}")
    result = run_puzzle(model, tokenizer, puzzle)
    results["14B"].append({
        "puzzle": puzzle["name"],
        "is_correct": result["is_correct"],
        "extracted_answer": result["extracted_answer"],
        "generation_time": result["generation_time"]
    })
    print(f"Prompt: {puzzle['prompt']}")
    print(f"Expected: {puzzle['expected']}")
    print(f"Model output: {result['generated_text']}")
    print(f"Extracted answer: {result['extracted_answer']}")
    print(f"Time taken: {result['generation_time']:.2f} seconds")
    print("Correct:" if result["is_correct"] else "Incorrect!")

correct_count_14B = sum(1 for res in results["14B"] if res["is_correct"])
print(f"\n14B model got {correct_count_14B} out of {len(puzzles)} puzzles correct.")

# --- Run puzzles for the 1.5B model (model_15) ---
print("\n--- Running puzzles with 1.5B model (model_15) ---")
for puzzle in puzzles:
    print(f"\nPuzzle: {puzzle['name']}")
    result = run_puzzle(model_15, tokenizer_15, puzzle)
    results["1.5B"].append({
        "puzzle": puzzle["name"],
        "is_correct": result["is_correct"],
        "extracted_answer": result["extracted_answer"],
        "generation_time": result["generation_time"]
    })
    print(f"Prompt: {puzzle['prompt']}")
    print(f"Expected: {puzzle['expected']}")
    print(f"Model output: {result['generated_text']}")
    print(f"Extracted answer: {result['extracted_answer']}")
    print(f"Time taken: {result['generation_time']:.2f} seconds")
    print("Correct:" if result["is_correct"] else "Incorrect!")

correct_count_15 = sum(1 for res in results["1.5B"] if res["is_correct"])
print(f"\n1.5B model (model_15) got {correct_count_15} out of {len(puzzles)} puzzles correct.")

# --- Cleanup ---
torch.cuda.empty_cache()


14B model already loaded.
Loading 1.5B model as model_15 ...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



--- Running puzzles with 14B model ---

Puzzle: James’ Sprint Challenge


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week?
Expected: 107628900
Model output: James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? Express the total in scientific notation.
Okay, so James is running a lot of sprints in a week. Each sprint is 60 meters, right? He does 1,793,815 sprints. Hmm, I think I can figure this out step by step.

First, I know he runs 60 meters each time. So if he runs that 60 meters multiple times, the total distance would be 60 meters multiplied by the number of sprints he does. That makes sense. So, total meters = 60 * number of sprints.

The number of sprints is 1,793,815. Let me write that down. So, 1,793,815 sprints. Now, I need to multiply that by 60.

Hmm, multiplying a large number like 1,793,815 by 60 might be a bit tricky, but maybe I can break it down. I remember that 1,793,81

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: Every day, Wendi feeds each of her 20 chickens three cups of mixed feed. In the morning she dispenses 6,887,483 cups and in the afternoon 25 cups. How many cups of feed does she need to serve in the final meal of the day?
Expected: 12
Model output: Every day, Wendi feeds each of her 20 chickens three cups of mixed feed. In the morning she dispenses 6,887,483 cups and in the afternoon 25 cups. How many cups of feed does she need to serve in the final meal of the day? To answer this, you should assume that Wendi feeds each of her chickens three cups every day. How does the answer change if she feeds each of her chickens 3 cups only in the morning and not at all in the afternoon?

Part A: Wendi needs to provide how many cups of feed to serve in the final meal of the day?

Part B: How does the answer change if she feeds each of her chickens only in the morning?

Part C: How does the answer change if she feeds only 3 cups in each of the morning and 0 cups in each afternoon for each 

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: At the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?
Expected: 21459061
Model output: At the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay? Let the answer be in the form of a number, without any units. If the answer is a decimal, use the decimal form, otherwise, use a fraction.

To determine the number of glasses, Kylar can use the following approach: Kylar can buy 100 glasses, but every second glass costs only 60% of that price. If Kylar can use the approach above, what's the total cost? Let the answer be in the number of... glasses, but if the answer is a decimal, use the decimal form, otherwise, use a fraction.

Wait, I'm getting confused. Let me try to figure this step by step. First, I need to figure out the total cost for purchasing 5,364,765 glasses,

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. If Seattle has 6,314,246 sheep, how many sheep do all three have in total?
Expected: 82085198
Model output: Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. If Seattle has 6,314,246 sheep, how many sheep do all three have in total? Let me explain my thinking step by step.

To start, let's define variables for each city.

Let S be the number of sheep in Seattle.

C is the number of sheep in Charleston, and T is the number of sheep in Toulouse.

Given that S = 6,314,246.

We know that C is four times as much as S, so:

C = 4 × S

Similarly, T is twice as many sheep as C, so:

T = 2 × C

Now, let's compute the number of sheep in each location.

First, Charleston: 4 × S = ?

Then, Toulouse: 2 × (C) = ?

Finally, add them up to get total sheep.

Let's compute each step.

First, S is 6,314,246.

C is 4 times S: 4 × 6,314,246.

Let me calcu

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: Eliza earns $1,616,598 per hour for the first 40 hours of her week and receives overtime pay at 1.2 times that rate for any extra hours. If she works 45 hours in a week, what are her total earnings?
Expected: 74363508
Model output: Eliza earns $1,616,598 per hour for the first 40 hours of her week and receives overtime pay at 1.2 times that rate for any extra hours. If she works 45 hours in a week, what are her total earnings? Additionally, if Eliza works an additional 30 minutes on Saturday, how much more money does she make? To the nearest whole number.
Alright, let me try to figure this out. So Eliza works 45 hours in a week. The problem is asking for her total earnings. Hmm. First, I need to figure out how much she earns per hour and how overtime works.

She earns $1,616,598 for the first 40 hours, which is her regular rate. Then, for any hours she works beyond that, she gets overtime pay at 1.2 times that rate. So, overtime pay is 1.2 multiplied by her regular hourly rate.

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: A new program had 531,811 downloads in its first month. In the second month the downloads tripled, but in the third month they dropped by 30%. What is the total number of downloads over the three months?
Expected: 3244047
Model output: A new program had 531,811 downloads in its first month. In the second month the downloads tripled, but in the third month they dropped by 30%. What is the total number of downloads over the three months? Express the answer as a whole number.

To determine the total downloads, we need to compute the downloads for each of the three months and then sum them up.

First, the first month has 531,811 downloads. 

For the second month, the downloads tripled. So, we can compute this as 531,811 multiplied by 3. Let's do the calculation: 531,811 × 3. 

Breaking it down, 530,000 × 3 is 1,590,000, and then 1,811 × 3 is 5,433. Adding these together, we get 1,595,433 downloads for the second month.

Now, moving on to the third month, where the downloads dropped

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: Toula visits a bakery and buys: 3 dozen donuts at $5,804,528 per dozen, 2 dozen mini cupcakes at $80 per dozen, and 6 dozen mini cheesecakes at $55 per dozen. What is the total cost of her purchase?
Expected: 17414074
Model output: Toula visits a bakery and buys: 3 dozen donuts at $5,804,528 per dozen, 2 dozen mini cupcakes at $80 per dozen, and 6 dozen mini cheesecakes at $55 per dozen. What is the total cost of her purchase? Express your answer in dollars, rounded to the nearest hundredth.
Tooula buys 3 dozen donuts, 2 dozen mini cupcakes, and 6 dozen mini cheesecakes.

Each donut box costs $5,804,528 per dozen.

Each cupcake box is $80 per dozen.

Each cheesecake box is $55 per dozen.

So the cost for the donuts is 3 times $5,804,528.

Similarly for cupcakes and cheesecakes.

So the total cost is 3*5,804,528 + 2*80 + 6*55. Let's calculate that.

Wait, but 5,804,528 seems like a really high number. Maybe I misread it.

Wait no, the problem says: 3 dozen donuts at $5,804,528 p

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: Carlos plants a lemon tree for $90. Each year the tree produces 2,256,855 lemons (which sell for $1.50 each), but it costs $3 per year for maintenance. How many years must pass before the lemon sales begin to cover the initial cost?
Expected: 1
Model output: Carlos plants a lemon tree for $90. Each year the tree produces 2,256,855 lemons (which sell for $1.50 each), but it costs $3 per year for maintenance. How many years must pass before the lemon sales begin to cover the initial cost? (Round your answer to the nearest integer.)

Alright, let's try to tackle this problem step by step. So Carlos has a lemon tree that he bought for $90. Every year, it produces a certain number of lemons which he can sell. But each year, there's a maintenance cost of $3. The question is asking how many years must pass before the lemon sales begin to cover the initial cost. Hmm, okay, so we're looking for the point where the total revenue from lemon sales equals the initial investment, right? But 

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: You have 10 liters of orange drink that is two‑thirds water and plan to mix it with 15 liters of pineapple drink that is three‑fifths water. However, while pouring, you spill 1,010,497 liters of the orange drink. In the final 24 liters you manage to combine, how much water is present?
Expected: 15
Model output: You have 10 liters of orange drink that is two‑thirds water and plan to mix it with 15 liters of pineapple drink that is three‑fifths water. However, while pouring, you spill 1,010,497 liters of the orange drink. In the final 24 liters you manage to combine, how much water is present? (Round your answer to the nearest integer.)

Alright, so I have this problem where I need to figure out how much water is in the final 24 liters after mixing orange and pineapple drinks. Let's break this down step by step.

First, the problem says there are 10 liters of orange drink, and it is two-thirds water. So, I need to figure out how much water is in the orange drink before mixing. Si

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: On Tuesday, Billy serves 8 customers. His first 3 customers each buy 1 DVD, his next 689,566 customers each buy 689,566 DVDs, and his last 3 customers buy nothing. How many DVDs did he sell that day?
Expected: 475501268359
Model output: On Tuesday, Billy serves 8 customers. His first 3 customers each buy 1 DVD, his next 689,566 customers each buy 689,566 DVDs, and his last 3 customers buy nothing. How many DVDs did he sell that day? To answer the question, you must use the number of digits in the number of DVD sales.
To answer the question, you must use the number of digits in the number of DVD sales.
Wait, I think I need to clarify the problem. Let me read it again:

On Tuesday, Billy serves 8 customers. His first 3 customers each buy 1 DVD, his next 689,566 customers each buy 689,566 DVDs, and his last 3 customers buy nothing. How many DVDs did he sell that day? To answer the question, you must use the number of digits in the number of DVD sales.

Wait, I'm getting confused. 

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week?
Expected: 107628900
Model output: James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? To determine the total meters he runs, James needs to multiply the number of sprints he runs in a week by the length of each sprint. So, 1,793,815 multiplied by 60. Let me compute that.

Wait, I'm not sure about the exact multiplication here, maybe there's a way to break it down to make it easier.

Alternatively, maybe James can run multiple sprints in a day, and I need to figure out how many days a week he does that, but the problem doesn't specify the number of sprints per day, so perhaps I should stick to the initial approach of multiplying the number of sprints by 60 meters each.

Let me write that out:

Total meters = number of sprints × length per sprint

Total meters = 1,793

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: Every day, Wendi feeds each of her 20 chickens three cups of mixed feed. In the morning she dispenses 6,887,483 cups and in the afternoon 25 cups. How many cups of feed does she need to serve in the final meal of the day?
Expected: 12
Model output: Every day, Wendi feeds each of her 20 chickens three cups of mixed feed. In the morning she dispenses 6,887,483 cups and in the afternoon 25 cups. How many cups of feed does she need to serve in the final meal of the day? To answer this question, you should use the following information:

Everyday, W...
To answer this question, you should use the following information:

To determine the number of cups of feed needed, we need to know the number of days, the cups of feed per day, and the cups dispensed each day.

To determine the number of days, we need to know the total number of cups of feed needed and how many cups are dispensed in a single day.

Wait, I think I need to figure this out step by step. Let me try.

First, Wendi feeds h

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: At the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?
Expected: 21459061
Model output: At the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay? To compute the total cost, you should use the following formula:

$\frac{5}{1}+\frac{5}{2}+\frac{5}{3}+...+\frac{5}{n}$
</think><think>
First, I recognize that Kylar needs to buy 5,364,765 glasses. The pricing structure is such that the first glass costs $5, the second glass costs 60% of that price, which is $3.00, and so on.

To find the total cost, I need to calculate the cost of each individual glass and sum them up. This means I'll have to compute the sum from n = 1 to n = 5,364,765 of 5 divided by n. 

Since this is a large number, I'll use the approximation of the harmonic series. For a large number n, the sum of 

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. If Seattle has 6,314,246 sheep, how many sheep do all three have in total?
Expected: 82085198
Model output: Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. If Seattle has 6,314,246 sheep, how many sheep do all three have in total? To solve, set up equations and use substitution. Let S represent the number of sheep at Seattle, Ch represent at Charyton, and Ss represent at Toulle.
Alright, let me try to figure this out. So, the problem is about sheep in three places: Toulouse, Charleston, and Seattle. The question is asking for the total number of sheep they have together. They gave me some relationships between the numbers, and I need to use substitution to find the answer.

First, let me note down what's given:

- Toulouse has twice as many sheep as Charleston. Hmm, so T = 2 * C.
- Charleston has four times as many sheep as Seattle.

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: Eliza earns $1,616,598 per hour for the first 40 hours of her week and receives overtime pay at 1.2 times that rate for any extra hours. If she works 45 hours in a week, what are her total earnings?
Expected: 74363508
Model output: Eliza earns $1,616,598 per hour for the first 40 hours of her week and receives overtime pay at 1.2 times that rate for any extra hours. If she works 45 hours in a week, what are her total earnings? Additionally, if Eliza works 80 hours, how much more money would she make? If she... (continues to the question) but the user hasn't specified the continuation, so I think the question is complete up to the second part.
Alright, so Eliza earns $1,616,598 per hour for the first 40 hours of her week. If she works more than 40 hours, she gets overtime pay, which is 1.2 times that rate. 

The first part is asking, if Eliza works 45 hours, what's her total earnings? Let me break that down. 

She works 40 hours at her regular rate and 5 extra hours as overtime.

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: A new program had 531,811 downloads in its first month. In the second month the downloads tripled, but in the third month they dropped by 30%. What is the total number of downloads over the three months?
Expected: 3244047
Model output: A new program had 531,811 downloads in its first month. In the second month the downloads tripled, but in the third month they dropped by 30%. What is the total number of downloads over the three months? (Round your answer to the nearest whole number.)

To determine the number, I need to calculate the downloads each month, sum them up, and round to the nearest whole number. So first, the first download is 531,811. In the second month, the downloads tripled, which would be 531,811 * 3, and in the third month, they dropped by 30%, so that would be (531,811 * 3) * (70%). Then I add all these three numbers to get the total downloads over the three months.

Wait, is that correct? Let me double-check. So the first month is straightforward, 531,811 down

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: Toula visits a bakery and buys: 3 dozen donuts at $5,804,528 per dozen, 2 dozen mini cupcakes at $80 per dozen, and 6 dozen mini cheesecakes at $55 per dozen. What is the total cost of her purchase?
Expected: 17414074
Model output: Toula visits a bakery and buys: 3 dozen donuts at $5,804,528 per dozen, 2 dozen mini cupcakes at $80 per dozen, and 6 dozen mini cheesecakes at $55 per dozen. What is the total cost of her purchase? Express the total cost in dollars, using only the number of digits in the number of the number of donuts, mini cupcakes, and mini cheesecakes. If the total cost is a multiple of 10,000, then express it using only the number of digits in the number of the number of mini cupcakes. If the total cost is a multiple of 10,000, then express it using only the number of digits in the number of the mini cheesecakes.

Wait, no, I need to clarify: The total cost should be expressed in dollars, using only the number of digits in the number of donuts, mini cupcakes, an

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: Carlos plants a lemon tree for $90. Each year the tree produces 2,256,855 lemons (which sell for $1.50 each), but it costs $3 per year for maintenance. How many years must pass before the lemon sales begin to cover the initial cost?
Expected: 1
Model output: Carlos plants a lemon tree for $90. Each year the tree produces 2,256,855 lemons (which sell for $1.50 each), but it costs $3 per year for maintenance. How many years must pass before the lemon sales begin to cover the initial cost? (Round your answer to the nearest integer.)

Okay, let's see. So Carlos plants a lemon tree for $90. Every year, the tree gives 2,256,855 lemons, and each of those lemons sells for $1.50. But each year, it costs $3 to maintain. The question is asking how many years must pass before the lemon sales begin to cover the initial cost. So we need to figure out after how many years the total lemon sales will have enough to cover the $90 he initially invested.

Hmm. So first, let me break down the infor

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Prompt: You have 10 liters of orange drink that is two‑thirds water and plan to mix it with 15 liters of pineapple drink that is three‑fifths water. However, while pouring, you spill 1,010,497 liters of the orange drink. In the final 24 liters you manage to combine, how much water is present?
Expected: 15
Model output: You have 10 liters of orange drink that is two‑thirds water and plan to mix it with 15 liters of pineapple drink that is three‑fifths water. However, while pouring, you spill 1,010,497 liters of the orange drink. In the final 24 liters you manage to combine, how much water is present? To
the nearest
...
...
...
...
...
...
...
...
...
...

To the
 nearest
...

...

To the
 nearest
...

To the
 nearest
...

To the nearest
...

To the nearest
...

To the nearest
...

...

To the nearest
...

To the nearest

To the nearest

To the nearest

...

To the nearest

To the nearest

To the nearest

To the nearest

To the nearest

To the nearest

To the nearest

To the nearest

To 

#Testing both models with different smapling and temperature

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import time
import re
import json
from datetime import datetime
import os
from concurrent.futures import ThreadPoolExecutor
import gc
from tqdm.notebook import tqdm

# Create results directory with subdirectories for different output types
# Use an absolute path to save in a permanent location
base_dir = os.path.expanduser("~/param_search_results")  # Saves to user's home directory
results_dir = os.path.join(base_dir, "test4")
tokens_dir = os.path.join(results_dir, "token_outputs")

# Create directories if they don't exist
for dir_path in [base_dir, results_dir, tokens_dir]:
    os.makedirs(dir_path, exist_ok=True)

# Print the paths so you can easily find the results
print(f"Results will be saved to: {results_dir}")
print(f"Token outputs will be saved to: {tokens_dir}")

def save_tokens(tokens, puzzle_name, param_str, timestamp):
    """Save token-level information"""
    token_data = {
        'input_tokens': tokens['input_tokens'].tolist(),
        'output_tokens': tokens['output_tokens'].tolist(),
        'input_decoded': tokens['input_decoded'],
        'output_decoded': tokens['output_decoded'],
        'token_count': len(tokens['output_tokens']),
    }

    filename = f"tokens_{puzzle_name}_{param_str}_{timestamp}.json"
    filepath = os.path.join(tokens_dir, filename)
    with open(filepath, 'w') as f:
        json.dump(token_data, f, indent=2)
    return filename

def process_puzzle_batch(model, tokenizer, batch, param_set, model_name):  # Added model_name parameter
    """Process a batch of puzzles with given parameters and save token information"""
    results = []
    timestamp = get_timestamp()

    for puzzle in batch:
        try:
            # Prepare input
            inputs = tokenizer(
                puzzle["prompt"],
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=4000
            ).to(model.device)

            input_tokens = inputs['input_ids'][0]
            input_decoded = tokenizer.decode(input_tokens)

            # Generate output
            start = time.time()
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=4000,
                    temperature=param_set['temperature'],
                    top_p=param_set['top_p'],
                    top_k=param_set['top_k'],
                    do_sample=True,
                    return_dict_in_generate=True,
                    output_scores=True
                )

            generation_time = time.time() - start

            # Extract output tokens and decode
            output_tokens = outputs.sequences[0]
            output_decoded = tokenizer.decode(output_tokens, skip_special_tokens=True)
            extracted = extract_answer(output_decoded)

            # Save token information
            token_info = {
                'input_tokens': input_tokens.cpu(),
                'output_tokens': output_tokens.cpu(),
                'input_decoded': input_decoded,
                'output_decoded': output_decoded
            }

            param_str = f"temp_{param_set['temperature']}_p_{param_set['top_p']}_k_{param_set['top_k']}_model_{model_name}"
            token_filename = save_tokens(token_info, puzzle['name'], param_str, timestamp)

            # Collect scores if available
            token_scores = None
            if hasattr(outputs, 'scores') and outputs.scores:
                token_scores = torch.stack(outputs.scores).cpu().numpy().tolist()

            results.append({
                'puzzle_name': puzzle['name'],
                'parameters': param_set,
                'extracted_answer': extracted,
                'expected_answer': puzzle['expected'],
                'is_correct': puzzle["expected"] in extracted,
                'generation_time': generation_time,
                'token_stats': {
                    'input_length': len(input_tokens),
                    'output_length': len(output_tokens),
                    'new_tokens': len(output_tokens) - len(input_tokens),
                    'token_file': token_filename
                },
                'has_scores': token_scores is not None
            })

        except Exception as e:
            results.append({
                'puzzle_name': puzzle['name'],
                'parameters': param_set,
                'error': str(e)
            })

        # Clear memory after each puzzle
        clear_gpu_memory()

    return results

def run_parallel_parameter_search(model, tokenizer, puzzles, model_name, params, batch_size=5):  # Added params parameter
    """Run parameter search with batched parallel processing and token storage"""
    timestamp = get_timestamp()
    param_combinations = [
        dict(zip(params.keys(), v))
        for v in zip(params['temperature'], params['top_p'], params['top_k'])
    ]

    puzzle_batches = [
        puzzles[i:i + batch_size]
        for i in range(0, len(puzzles), batch_size)
    ]

    all_results = {
        'model_name': model_name,
        'timestamp': timestamp,
        'batches': len(puzzle_batches),
        'total_puzzles': len(puzzles),
        'token_dir': tokens_dir,  # Include token directory in results
        'results': {}
    }

    # Process parameter combinations
    for param_set in tqdm(param_combinations, desc="Parameter combinations"):
        param_str = f"temp_{param_set['temperature']}_p_{param_set['top_p']}_k_{param_set['top_k']}"
        batch_results = []

        for batch_id, batch in enumerate(puzzle_batches):
            results = process_puzzle_batch(model, tokenizer, batch, param_set, model_name)  # Pass model_name
            batch_results.extend(results)

            # Save intermediate results
            save_batch_results(results, batch_id, param_str)
            clear_gpu_memory()

        # Aggregate results
        correct_count = sum(1 for r in batch_results if r.get('is_correct', False))
        total_time = sum(r.get('generation_time', 0) for r in batch_results)
        total_tokens = sum(r.get('token_stats', {}).get('new_tokens', 0)
                         for r in batch_results if 'token_stats' in r)

        all_results['results'][param_str] = {
            'parameters': param_set,
            'correct_count': correct_count,
            'accuracy': correct_count / len(puzzles),
            'average_time': total_time / len(puzzles),
            'total_tokens_generated': total_tokens,
            'average_tokens_per_puzzle': total_tokens / len(puzzles),
            'puzzle_results': batch_results
        }

    # Save final results
    summary_filename = f"summary_{model_name}_{timestamp}.json"
    filepath = os.path.join(results_dir, summary_filename)
    with open(filepath, 'w') as f:
        json.dump(all_results, f, indent=2)

    return all_results

# Helper functions (unchanged)
def get_timestamp():
    return datetime.now().strftime("%Y%m%d_%H%M%S")

def clear_gpu_memory():
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()

def extract_answer(text):
    answer_phrases = [
        r"answer is[:\s]+([0-9,\.]+)",
        r"equals[:\s]+([0-9,\.]+)",
        r"=\s*([0-9,\.]+)",
        r"solution[:\s]+([0-9,\.]+)",
        r"\\\[\s*([0-9,\.]+)\s*\\\]",
        r"\boxed{([0-9,\.]+)}",
    ]

    for phrase in answer_phrases:
        match = re.search(phrase, text.lower())
        if match:
            return match.group(1).replace(',', '')

    numbers = re.findall(r"(\d[\d,\.]*)", text)
    return numbers[-1].replace(',', '') if numbers else ""

def save_batch_results(results, batch_id, param_str):
    filename = f"batch_{batch_id}_{param_str}_{get_timestamp()}.json"
    filepath = os.path.join(results_dir, filename)
    with open(filepath, 'w') as f:
        json.dump(results, f, indent=2)
    return filename

# Example usage:
# Define parameters
params = {
    'temperature': [0.7, 0.8, 0.9],
    'top_p': [0.9, 0.95],
    'top_k': [40, 50]
}

# Run for 14B model with token storage
results_14b = run_parallel_parameter_search(
    model,
    tokenizer,
    puzzles,
    "14B",
    params,  # Pass params dictionary
    batch_size=5
)

clear_gpu_memory()

# Run for 1.5B model
results_1_5b = run_parallel_parameter_search(
    model_15,
    tokenizer_15,
    puzzles,
    "1.5B",
    params,  # Pass params dictionary
    batch_size=5
)

# # Create a ZIP file in the home directory
# zip_path = os.path.expanduser("~/param_search_results.zip")
# os.system(f'zip -r {zip_path} {base_dir}')
# print(f"\nResults have been zipped to: {zip_path}")

# # If you're in a Jupyter notebook and want to download
# if 'files' in globals():
#     files.download(zip_path)


Results will be saved to: /root/param_search_results/test4
Token outputs will be saved to: /root/param_search_results/test4/token_outputs


Parameter combinations:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` instance instead by default (as opposed to the legacy tuple of tuples format). If you want to keep returning the legacy format, please set `return_legacy_cache=True`.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad

Parameter combinations:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for

In [None]:
!zip -r /content/param_search_results.zip /content/param_search_results/test4
from google.colab import files
files.download('/content/param_search_results.zip')


zip error: Nothing to do! (try: zip -r /content/param_search_results.zip . -i /content/param_search_results/test4)


FileNotFoundError: Cannot find file: /content/param_search_results.zip

In [None]:
!ls ~/param_search_results/test4

In [None]:
!zip -r /content/test4_results.zip ~/param_search_results/test4
from google.colab import files
files.download('/content/test4_results.zip')

#Blocking word test

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import time
import re
import json
from datetime import datetime
import os
from concurrent.futures import ThreadPoolExecutor
import gc
from tqdm.notebook import tqdm
from google.colab import drive, files

# Mount Google Drive for persistent storage
drive.mount('/content/drive')

# Create results directory with subdirectories for different output types
# Save to both local temp directory and Google Drive
local_base_dir = os.path.expanduser("~/param_search_results")
gdrive_base_dir = "/content/drive/MyDrive/param_search_results"

local_results_dir = os.path.join(local_base_dir, "test5_block_words")
local_tokens_dir = os.path.join(local_results_dir, "token_outputs")

gdrive_results_dir = os.path.join(gdrive_base_dir, "test5_block_words")
gdrive_tokens_dir = os.path.join(gdrive_results_dir, "token_outputs")

# Create directories if they don't exist
for dir_path in [local_base_dir, local_results_dir, local_tokens_dir,
                 gdrive_base_dir, gdrive_results_dir, gdrive_tokens_dir]:
    os.makedirs(dir_path, exist_ok=True)

# Print the paths
print(f"Local results will be saved to: {local_results_dir}")
print(f"Local token outputs will be saved to: {local_tokens_dir}")
print(f"Google Drive results will be saved to: {gdrive_results_dir}")
print(f"Google Drive token outputs will be saved to: {gdrive_tokens_dir}")

def get_blocked_token_ids(tokenizer, words_to_block=["wait", "alternatively", "perhaps", "maybe"]):
    """Get token IDs for words we want to block"""
    blocked_ids = []
    for word in words_to_block:
        # Get all possible token IDs for the word
        word_tokens = tokenizer.encode(" " + word, add_special_tokens=False)
        word_tokens.extend(tokenizer.encode(word, add_special_tokens=False))
        word_tokens.extend(tokenizer.encode(" " + word.capitalize(), add_special_tokens=False))
        word_tokens.extend(tokenizer.encode(word.capitalize(), add_special_tokens=False))
        word_tokens.extend(tokenizer.encode(" " + word.upper(), add_special_tokens=False))
        word_tokens.extend(tokenizer.encode(word.upper(), add_special_tokens=False))
        blocked_ids.extend(word_tokens)

    # Print the blocked words and their token IDs
    print(f"\nBlocking these words: {words_to_block}")
    unique_ids = list(set(blocked_ids))
    print(f"Corresponding to these {len(unique_ids)} token IDs: {unique_ids}")

    # Print token representations for debugging
    for token_id in unique_ids:
        print(f"Token ID {token_id} = '{tokenizer.decode([token_id])}'")

    return list(set(blocked_ids))

class TokenBlockLogitsProcessor:
    """Custom logits processor for token blocking"""
    def __init__(self, blocked_token_ids):
        self.blocked_token_ids = blocked_token_ids

    def __call__(self, input_ids, scores):
        # Create a mask of blocked tokens
        mask = torch.zeros_like(scores, dtype=torch.bool)
        mask[:, self.blocked_token_ids] = True
        # Set scores to -inf where mask is True
        scores = scores.masked_fill(mask, float('-inf'))
        return scores

def save_tokens(tokens, puzzle_name, param_str, timestamp):
    """Save token-level information"""
    token_data = {
        'input_tokens': tokens['input_tokens'].tolist(),
        'output_tokens': tokens['output_tokens'].tolist(),
        'input_decoded': tokens['input_decoded'],
        'output_decoded': tokens['output_decoded'],
        'token_count': len(tokens['output_tokens']),
        'blocked_tokens': tokens.get('blocked_tokens', [])
    }

    # Save locally
    filename = f"tokens_{puzzle_name}_{param_str}_{timestamp}.json"
    local_filepath = os.path.join(local_tokens_dir, filename)
    with open(local_filepath, 'w') as f:
        json.dump(token_data, f, indent=2)

    # Save to Google Drive
    gdrive_filepath = os.path.join(gdrive_tokens_dir, filename)
    with open(gdrive_filepath, 'w') as f:
        json.dump(token_data, f, indent=2)

    # Print summary
    print(f"\nSaved token data for puzzle: {puzzle_name}")
    print(f"  - Input length: {len(tokens['input_tokens'])} tokens")
    print(f"  - Output length: {len(tokens['output_tokens'])} tokens")

    return filename

def process_puzzle_batch(model, tokenizer, batch, param_set, model_name, blocked_token_ids=None):
    """Process a batch of puzzles with token blocking"""
    results = []
    timestamp = get_timestamp()

    # Create logits processor if blocked tokens are provided
    logits_processors = []
    if blocked_token_ids is not None:
        logits_processors.append(TokenBlockLogitsProcessor(blocked_token_ids))

    for puzzle in batch:
        try:
            print(f"\nProcessing puzzle: {puzzle['name']}")

            # Prepare input
            inputs = tokenizer(
                puzzle["prompt"],
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=4000
            ).to(model.device)

            input_tokens = inputs['input_ids'][0]
            input_decoded = tokenizer.decode(input_tokens)

            # Generate output
            start = time.time()
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=4000,
                    temperature=param_set['temperature'],
                    top_p=param_set['top_p'],
                    top_k=param_set['top_k'],
                    do_sample=True,
                    return_dict_in_generate=True,
                    output_scores=True,
                    logits_processor=logits_processors if logits_processors else None
                )

            generation_time = time.time() - start

            # Extract output tokens and decode
            output_tokens = outputs.sequences[0]
            output_decoded = tokenizer.decode(output_tokens, skip_special_tokens=True)
            extracted = extract_answer(output_decoded)

            # Save token information
            token_info = {
                'input_tokens': input_tokens.cpu(),
                'output_tokens': output_tokens.cpu(),
                'input_decoded': input_decoded,
                'output_decoded': output_decoded,
                'blocked_tokens': blocked_token_ids.cpu().tolist() if blocked_token_ids is not None else []
            }

            param_str = f"temp_{param_set['temperature']}_p_{param_set['top_p']}_k_{param_set['top_k']}_model_{model_name}"
            token_filename = save_tokens(token_info, puzzle['name'], param_str, timestamp)

            # Collect scores if available
            token_scores = None
            if hasattr(outputs, 'scores') and outputs.scores:
                token_scores = torch.stack(outputs.scores).cpu().numpy().tolist()

            # Print results
            print(f"  - Generation time: {generation_time:.2f} seconds")
            print(f"  - Extracted answer: {extracted}")
            print(f"  - Expected answer: {puzzle['expected']}")
            print(f"  - Correct: {'Yes' if puzzle['expected'] in extracted else 'No'}")
            print(f"  - New tokens generated: {len(output_tokens) - len(input_tokens)}")

            results.append({
                'puzzle_name': puzzle['name'],
                'parameters': param_set,
                'extracted_answer': extracted,
                'expected_answer': puzzle['expected'],
                'is_correct': puzzle["expected"] in extracted,
                'generation_time': generation_time,
                'token_stats': {
                    'input_length': len(input_tokens),
                    'output_length': len(output_tokens),
                    'new_tokens': len(output_tokens) - len(input_tokens),
                    'token_file': token_filename
                },
                'has_scores': token_scores is not None,
                'blocking_enabled': blocked_token_ids is not None
            })

        except Exception as e:
            print(f"Error processing puzzle {puzzle['name']}: {str(e)}")
            results.append({
                'puzzle_name': puzzle['name'],
                'parameters': param_set,
                'error': str(e)
            })

        # Clear memory after each puzzle
        clear_gpu_memory()

    return results

def run_parallel_parameter_search(model, tokenizer, puzzles, model_name, params, batch_size=2):
    """Run parameter search with batched parallel processing and token blocking"""
    timestamp = get_timestamp()
    param_combinations = [
        dict(zip(params.keys(), v))
        for v in zip(params['temperature'], params['top_p'], params['top_k'])
    ]

    puzzle_batches = [
        puzzles[i:i + batch_size]
        for i in range(0, len(puzzles), batch_size)
    ]

    # Get blocked tokens
    print("\nPreparing token blocking...")
    blocked_token_ids = torch.tensor(
        get_blocked_token_ids(tokenizer),
        device=model.device
    )

    all_results = {
        'model_name': model_name,
        'timestamp': timestamp,
        'batches': len(puzzle_batches),
        'total_puzzles': len(puzzles),
        'blocked_tokens': blocked_token_ids.cpu().tolist(),
        'token_dir': local_tokens_dir,
        'gdrive_token_dir': gdrive_tokens_dir,
        'results': {}
    }

    # Process parameter combinations
    for param_set in tqdm(param_combinations, desc="Parameter combinations"):
        param_str = f"temp_{param_set['temperature']}_p_{param_set['top_p']}_k_{param_set['top_k']}"
        print(f"\nProcessing parameter set: {param_str}")
        batch_results = []

        for batch_id, batch in enumerate(tqdm(puzzle_batches, desc="Processing batches")):
            print(f"\nProcessing batch {batch_id+1}/{len(puzzle_batches)}")
            results = process_puzzle_batch(model, tokenizer, batch, param_set, model_name, blocked_token_ids)
            batch_results.extend(results)

            # Save intermediate results
            save_batch_results(results, batch_id, param_str, timestamp)
            clear_gpu_memory()

        # Aggregate results
        correct_count = sum(1 for r in batch_results if r.get('is_correct', False))
        total_time = sum(r.get('generation_time', 0) for r in batch_results)
        total_tokens = sum(r.get('token_stats', {}).get('new_tokens', 0)
                         for r in batch_results if 'token_stats' in r)

        # Print summary for this parameter set
        print(f"\nResults summary for {param_str}:")
        print(f"  - Correct: {correct_count}/{len(puzzles)} ({correct_count/len(puzzles)*100:.1f}%)")
        print(f"  - Average time: {total_time/len(puzzles):.2f} seconds per puzzle")
        print(f"  - Average tokens: {total_tokens/len(puzzles):.1f} per puzzle")

        all_results['results'][param_str] = {
            'parameters': param_set,
            'correct_count': correct_count,
            'accuracy': correct_count / len(puzzles),
            'average_time': total_time / len(puzzles),
            'total_tokens_generated': total_tokens,
            'average_tokens_per_puzzle': total_tokens / len(puzzles),
            'puzzle_results': batch_results
        }

    # Save final results locally
    summary_filename = f"summary_{model_name}_{timestamp}.json"
    local_filepath = os.path.join(local_results_dir, summary_filename)
    with open(local_filepath, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Save to Google Drive too
    gdrive_filepath = os.path.join(gdrive_results_dir, summary_filename)
    with open(gdrive_filepath, 'w') as f:
        json.dump(all_results, f, indent=2)

    print(f"\nFinal results saved to:")
    print(f"  - Local: {local_filepath}")
    print(f"  - Google Drive: {gdrive_filepath}")

    return all_results

# Helper functions (unchanged)
def get_timestamp():
    return datetime.now().strftime("%Y%m%d_%H%M%S")

def clear_gpu_memory():
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()

def extract_answer(text):
    answer_phrases = [
        r"answer is[:\s]+([0-9,\.]+)",
        r"equals[:\s]+([0-9,\.]+)",
        r"=\s*([0-9,\.]+)",
        r"solution[:\s]+([0-9,\.]+)",
        r"\\\[\s*([0-9,\.]+)\s*\\\]",
        r"\boxed{([0-9,\.]+)}",
    ]

    for phrase in answer_phrases:
        match = re.search(phrase, text.lower())
        if match:
            return match.group(1).replace(',', '')

    numbers = re.findall(r"(\d[\d,\.]*)", text)
    return numbers[-1].replace(',', '') if numbers else ""

def save_batch_results(results, batch_id, param_str, timestamp):
    # Save locally
    filename = f"batch_{batch_id}_{param_str}_{timestamp}.json"
    local_filepath = os.path.join(local_results_dir, filename)
    with open(local_filepath, 'w') as f:
        json.dump(results, f, indent=2)

    # Save to Google Drive
    gdrive_filepath = os.path.join(gdrive_results_dir, filename)
    with open(gdrive_filepath, 'w') as f:
        json.dump(results, f, indent=2)

    print(f"Batch results saved to: {filename}")
    return filename

# Example usage:
# Define parameters - use smaller set for faster testing
params = {
    'temperature': [0.7],  # Reduced to run faster
    'top_p': [0.9],        # Reduced to run faster
    'top_k': [40]          # Reduced to run faster
}


# Run for 14B model with token blocking and verbose output
results_14b = run_parallel_parameter_search(
    model_14B,
    tokenizer_14B,
    puzzles,
    "14B",
    params,  # Pass params dictionary
    batch_size=2  # Smaller batch size for stability
)

clear_gpu_memory()

# Run for 1.5B model with token blocking and verbose output
results_1_5b = run_parallel_parameter_search(
    model,
    tokenizer,
    puzzles,
    "1.5B",
    params,  # Pass params dictionary
    batch_size=2  # Smaller batch size for stability
)

"""

# Create ZIP file of local results
local_zip_path = f"/content/param_search_results_blockwords_{get_timestamp()}.zip"
!zip -r "{local_zip_path}" "{local_base_dir}"
print(f"\nResults have been zipped to: {local_zip_path}")

# Download the zip file
files.download(local_zip_path)

# Create ZIP file in Google Drive too
gdrive_zip_path = os.path.join(gdrive_base_dir, f"param_search_results_blockwords_{get_timestamp()}.zip")
!zip -r "{gdrive_zip_path}" "{gdrive_results_dir}"
print(f"\nResults have also been zipped to Google Drive: {gdrive_zip_path}")
"""

#Manual Tests


In [None]:
import torch
from transformers import TextIteratorStreamer
from threading import Thread
import time

model = model_14B
tokenizer = tokenizer_14B

def generate_streaming(prompt, max_new_tokens=100):
    # Tokenize input
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=4000
    ).to(model.device)

    # Create a streamer
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)

    # Generation arguments
    gen_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=4000,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

    # Start generation in a separate thread
    thread = Thread(target=model.generate, kwargs=gen_kwargs)
    thread.start()

    # Print tokens as they're generated
    print("\nGenerating response:\n")
    generated_text = ""
    start_time = time.time()

    for new_text in streamer:
        generated_text += new_text
        print(new_text, end="", flush=True)

    end_time = time.time()
    print(f"\n\nGeneration took {end_time - start_time:.2f} seconds")
    return generated_text

# Test the streaming generation
test_prompt = "Hello"
generated_text = generate_streaming(test_prompt)

NameError: name 'model_14B' is not defined

In [None]:
import torch
from transformers import TextIteratorStreamer
from threading import Thread
import time

model = model_14B
tokenizer = tokenizer_14B

def generate_streaming(prompt, max_new_tokens=4000):
    # Enable CUDA optimization if available
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

    # Use flash attention if available
    model.config.use_flash_attention_2 = True

    # Tokenize input (with optimized settings)
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=4000
    ).to(model.device)

    # Clear CUDA cache before generation
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    # Create a streamer
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)

    # Generation arguments with optimized settings
    gen_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        # top_p=0.9,
        # Speed optimization parameters
        use_cache=True,
        # Consider reducing these if quality can be maintained
        num_beams=1  # Greedy decoding is faster than beam search
    )

    # Start generation in a separate thread
    thread = Thread(target=model.generate, kwargs=gen_kwargs)
    thread.start()

    # Print tokens as they're generated
    print("\nGenerating response:\n")
    generated_text = ""
    start_time = time.time()

    for new_text in streamer:
        generated_text += new_text
        print(new_text, end="", flush=True)

    end_time = time.time()
    tokens_per_second = max_new_tokens / (end_time - start_time) if (end_time - start_time) > 0 else 0
    print(f"\n\nGeneration took {end_time - start_time:.2f} seconds ({tokens_per_second:.2f} tokens/sec)")
    return generated_text

# For even faster generation with lower memory usage, consider enabling 4-bit or 8-bit quantization
def load_quantized_model():
    from transformers import AutoModelForCausalLM, BitsAndBytesConfig

    # 4-bit quantization configuration
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    # Load the model with quantization
    model = AutoModelForCausalLM.from_pretrained(
        "deepseek-ai/deepseek-qwen-14b",
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    return model

# Example usage:
# model_14B = load_quantized_model()
# tokenizer_14B = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-qwen-14b")
test_prompt = "What is 1345 × 982?"
generated_text = generate_streaming(test_prompt, max_new_tokens=500)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

What is 1345 × 982? Let me think.

Alright, so I need to multiply 1345 by 982. Hmm, that's a big multiplication. Let me break it down step by step because multiplying such large numbers directly seems a bit overwhelming. Maybe I can use the standard multiplication method, where I multiply each digit of the second number by the first number and then add up the results. 

First, I should write down the numbers:

```
    1345
  ×  982
```

I think I should start by multiplying 1345 by each digit of 982, starting from the rightmost digit, which is 2. So, 1345 × 2. Let me calculate that.

1345 × 2: 

KeyboardInterrupt: 

Load 14B from here

In [None]:
print(model_15)

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 1536)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=1536, out_features=1536, bias=True)
          (k_proj): Linear(in_features=1536, out_features=256, bias=True)
          (v_proj): Linear(in_features=1536, out_features=256, bias=True)
          (o_proj): Linear(in_features=1536, out_features=1536, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=1536, out_features=8960, bias=False)
          (up_proj): Linear(in_features=1536, out_features=8960, bias=False)
          (down_proj): Linear(in_features=8960, out_features=1536, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((1536,), eps=1e-06)
    (rotary_emb): Qw

In [None]:
print(model_14B)

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(152064, 5120)
    (layers): ModuleList(
      (0-47): 48 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=5120, out_features=5120, bias=True)
          (k_proj): Linear(in_features=5120, out_features=1024, bias=True)
          (v_proj): Linear(in_features=5120, out_features=1024, bias=True)
          (o_proj): Linear(in_features=5120, out_features=5120, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=5120, out_features=13824, bias=False)
          (up_proj): Linear(in_features=5120, out_features=13824, bias=False)
          (down_proj): Linear(in_features=13824, out_features=5120, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((5120,), eps=1e-05)
        (post_attention_layernorm): Qwen2RMSNorm((5120,), eps=1e-05)
      )
    )
    (norm): Qwen2RMSNorm((5120,), eps=1e-05)
    (rotary_emb

In [None]:
import torch
from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer
from threading import Thread
import time

def load_model():
    """Load DeepSeek-R1-Distill-Qwen-14B model with optimizations for A100"""
    # Load model with FP16 precision
    model = AutoModelForCausalLM.from_pretrained(
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
        torch_dtype=torch.float16,  # Use FP16 for faster inference
        trust_remote_code=True,
        device_map="auto"
    )

    tokenizer = AutoTokenizer.from_pretrained(
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
        trust_remote_code=True
    )

    return model, tokenizer

def optimize_model(model):
    """Apply A100-specific optimizations"""
    # Enable TF32 precision (NVIDIA A100 supports this)
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

    # Enable Flash Attention if available
    if hasattr(model.config, 'use_flash_attention_2'):
        model.config.use_flash_attention_2 = True

    # Enable KV cache
    if hasattr(model.config, 'use_cache'):
        model.config.use_cache = True

    # Try to apply fused operations for attention
    torch.backends.cuda.enable_mem_efficient_sdp(True)
    torch.backends.cuda.enable_flash_sdp(True)

    return model

def generate_streaming(prompt, max_new_tokens=4000, model=None, tokenizer=None):
    """Optimized streaming generation for A100"""
    if model is None or tokenizer is None:
        model, tokenizer = load_model()

    # Apply optimizations
    model = optimize_model(model)

    # Clear CUDA cache
    torch.cuda.empty_cache()

    # Make sure we have enough VRAM available
    torch.cuda.synchronize()

    # Tokenize with optimized settings
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=4000
    ).to(model.device)

    # Create streamer with increased timeout
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True, timeout=20.0)

    # Enhanced generation arguments
    gen_kwargs = {
        "input_ids": inputs.input_ids,
        "attention_mask": inputs.attention_mask,
        "streamer": streamer,
        "max_new_tokens": max_new_tokens,
        "do_sample": False,  # Greedy decoding is faster
        "use_cache": True
    }

    # Start generation thread with higher priority
    thread = Thread(target=model.generate, kwargs=gen_kwargs)
    thread.daemon = True  # Make thread daemonic for better process exit
    thread.start()

    # Print as tokens are generated
    print("\nGenerating response:\n")
    generated_text = ""
    start_time = time.time()

    for new_text in streamer:
        generated_text += new_text
        print(new_text, end="", flush=True)

    end_time = time.time()
    duration = end_time - start_time
    tokens_generated = len(tokenizer(generated_text).input_ids)
    tokens_per_second = tokens_generated / duration if duration > 0 else 0

    print(f"\n\nGeneration took {duration:.2f} seconds")
    print(f"Generated {tokens_generated} tokens ({tokens_per_second:.2f} tokens/sec)")

    return generated_text

# Usage example:
# model, tokenizer = load_model()
# test_prompt = "Explain quantum computing in simple terms"
# generate_streaming(test_prompt, max_new_tokens=500, model=model, tokenizer=tokenizer)

# Optional: For maximum performance, try using BetterTransformer
# Uncomment and use if you have optimum installed
# from optimum.bettertransformer import BetterTransformer
# model = BetterTransformer.transform(model)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# First time loading the model
# model, tokenizer = load_model()

# Generate text with optimal settings
test_prompt = """hi, how are you?"""
generated_text = generate_streaming(test_prompt, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

hi, how are you? I'm 

Exception in thread Thread-36 (generate):
Traceback (most recent call last):
  File "/usr/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.11/threading.py", line 982, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/generation/utils.py", line 2255, in generate
    result = self._sample(
             ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/generation/utils.py", line 3257, in _sample
    outputs = model_forward(**model_inputs, return_dict=True)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^

DeepSeek-R1, an AI assistant independently developed by the Chinese company DeepSeek 

KeyboardInterrupt: 

In [None]:
test_prompt =  """James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? To solve this, you need to multiply 1,793,815 by 60. But since 1,793,815 is a large number, it's better to break it down into smaller parts. Let's see, 1,793,815 can be broken down into 1,000,000 + 700,000 + 90,000 + 3,000 + 800 + 10 + 5. Then, multiply each part by 60 and add them all together. Let's do that step by step.

First, multiply 1,000,000 by 60. That's straightforward: 1,000,000 * 60 = 60,000,000.

Next, multiply 700,000 by 60. 700,000 * 60 = 42,000,000.

Then, multiply 90,000 by 60. 90,000 * 60 = 5,400,000.

After that, multiply 3,000 by 60. 3,000 * 60 = 180,000.

Now, multiply 800 by 60. 800 * 60 = 48,000.

Next, multiply 10 by 60. 10 * 60 = 600.

Finally, multiply 5 by 60. 5 * 60 = 300.

Now, add all these results together: 60,000,000 + 42,000,000 = 102,000,000.

**Final Answer:**
James runs a total of """

generated_text = generate_streaming(test_prompt, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

James decides to run 1,793,815 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? To solve this, you need to multiply 1,793,815 by 60. But since 1,793,815 is a large number, it's better to break it down into smaller parts. Let's see, 1,793,815 can be broken down into 1,000,000 + 700,000 + 90,000 + 3,000 + 800 + 10 + 5. Then, multiply each part by 60 and add them all together. Let's do that step by step.

First, multiply 1,000,000 by 60. That's straightforward: 1,000,000 * 60 = 60,000,000.

Next, multiply 700,000 by 60. 700,000 * 60 = 42,000,000.

Then, multiply 90,000 by 60. 90,000 * 60 = 5,400,000.

After that, multiply 3,000 by 60. 3,000 * 60 = 180,000.

Now, multiply 800 by 60. 800 * 60 = 48,000.

Next, multiply 10 by 60. 10 * 60 = 600.

Finally, multiply 5 by 60. 5 * 60 = 300.

Now, add all these results together: 60,000,000 + 42,000,000 = 102,000,000.

**Final Answer:**  
James runs a total of 

Exception in thread Thread-40 (generate):
Traceback (most recent call last):
  File "/usr/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.11/threading.py", line 982, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/generation/utils.py", line 2255, in generate
    result = self._sample(
             ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/generation/utils.py", line 3257, in _sample
    outputs = model_forward(**model_inputs, return_dict=True)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^

Empty: 

In [None]:
test_prompt =  """James decides to run 1,793,186 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? To solve this, you need to multiply 1,793,815 by 60. But since 1,793,815 is a large number, it's better to break it down into smaller parts. Let's see, 1,793,815 can be broken down into 1,000,000 + 700,000 + 90,000 + 3,000 + 800 + 10 + 5. Then, multiply each part by 60 and add them all together. Let's do that step by step.

First, multiply 1,000,000 by 60. That's straightforward: 1,000,000 * 60 = 60,000,000.

Next, multiply 700,000 by 60. 700,000 * 60 = 42,000,000.

Then, multiply 90,000 by 60. 90,000 * 60 = 5,400,000.

After that, multiply 3,000 by 60. 3,000 * 60 = 180,000.

Now, multiply 800 by 60. 800 * 60 = 48,000.

Next, multiply 10 by 60. 10 * 60 = 600.

Finally, multiply 5 by 60. 5 * 60 = 300.

Now, add all these results together: 60,000,000 + 42,000,000 = 102,000,000.

**Final Answer:**
James runs a total of """

generated_text = generate_streaming(test_prompt, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

James decides to run 1,793,186 sprints in a week. He runs 60 meters with each sprint. How many total meters does he run in that week? To solve this, you need to multiply 1,793,815 by 60. But since 1,793,815 is a large number, it's better to break it down into smaller parts. Let's see, 1,793,815 can be broken down into 1,000,000 + 700,000 + 90,000 + 3,000 + 800 + 10 + 5. Then, multiply each part by 60 and add them all together. Let's do that step by step.

First, multiply 1,000,000 by 60. That's straightforward: 1,000,000 * 60 = 60,000,000.

Next, multiply 700,000 by 60. 700,000 * 60 = 42,000,000.

Then, multiply 90,000 by 60. 90,000 * 60 = 5,400,000.

After that, multiply 3,000 by 60. 3,000 * 60 = 180,000.

Now, multiply 800 by 60. 800 * 60 = 48,000.

Next, multiply 10 by 60. 10 * 60 = 600.

Finally, multiply 5 by 60. 5 * 60 = 300.

Now, add all these results together: 60,000,000 + 42,000,000 = 102,000,000.

**Final Answer:**  
James runs a total of 102,000,000 m

In [None]:
tokens = """At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"""

generated_text = generate_streaming(tokens, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?")

Alright, so I've got this problem here where Kylar wants to buy a bunch of glasses, specifically 5,364,765 of them. The store sells them with a deal: the first glass costs $5, but every second glass is 60% of that price. Hmm, okay, so I need to figure out how much Kylar has to pay in total for all those glasses.

Let me break this down. The first thing I notice is that the pricing isn't the same for every glass. The first one is $5, and then every subsequent one is 60% of $5. So, let me calculate what 60% of $5 is. 

60% can be written as 0.6 in decimal form. So, 0.6 multiplied by $5 is... let me do that: 0.6 * 5 = $3. So, every second glass costs $3. 

Now, Kylar is buying a huge number of glasses: 5,364,765. That's almost five and a half million glasses. I need to figure ou

In [None]:
test_prompt = """Now, Kylar is buying a huge number of glasses: 5,364,765. That's almost five and a half million glasses. I need to figure out how much this will cost in total.

Since the pricing alternates between $5 and $3, I think the best way to approach this is to consider the cost for every two glasses. That is, for every pair of glasses, the first one is $5 and the second one is $3. So, the cost for two glasses would be $5 + $3 = $8.

If I can figure out how many such pairs are in 5,364,765 glasses, I can multiply that by $8 to get the total cost. But wait, I need to make sure whether the total number of glasses is even or odd because if it's odd, there will be an extra glass at the end that costs $5.

Let me check: 5,364,765 divided by 2. Let me do that division. 5,364,765 ÷ 2. Well, 5,364,764 is an even number, so dividing that by 2 gives 2,682,382. Then, adding the remaining 1 glass, the total number of pairs is 2,682,382 and one extra glass.

So, the cost for the pairs is 2,682,382 * $8. Let me calculate that.

First, let me compute 2,682,382 * 8. Breaking it down:

2,682,382 * 8:
- 2,000,000 * 8 = 16,000,000
</think>

**Answer:** The total cost Kylar must pay is

"""

generated_text = generate_streaming(test_prompt, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Now, Kylar is buying a huge number of glasses: 5,364,765. That's almost five and a half million glasses. I need to figure out how much this will cost in total. 

Since the pricing alternates between $5 and $3, I think the best way to approach this is to consider the cost for every two glasses. That is, for every pair of glasses, the first one is $5 and the second one is $3. So, the cost for two glasses would be $5 + $3 = $8. 

If I can figure out how many such pairs are in 5,364,765 glasses, I can multiply that by $8 to get the total cost. But wait, I need to make sure whether the total number of glasses is even or odd because if it's odd, there will be an extra glass at the end that costs $5. 

Let me check: 5,364,765 divided by 2. Let me do that division. 5,364,765 ÷ 2. Well, 5,364,764 is an even number, so dividing that by 2 gives 2,682,382. Then, adding the remaining 1 glass, the total number of pairs is 2,682,382 and one extra glass. 

So, the cost for the p

In [None]:
test_prompt = """Now, Kylar is buying a huge number of glasses: 5,364,765. That's almost five and a half million glasses. I need to figure out how much this will cost in total.

Since the pricing alternates between $5 and $3, I think the best way to approach this is to consider the cost for every two glasses. That is, for every pair of glasses, the first one is $5 and the second one is $3. So, the cost for two glasses would be $5 + $3 = $8.

If I can figure out how many such pairs are in 5,364,765 glasses, I can multiply that by $8 to get the total cost. But wait, I need to make sure whether the total number of glasses is even or odd because if it's odd, there will be an extra glass at the end that costs $5.

Let me check: 5,364,765 divided by 2. Let me do that division. 5,364,765 ÷ 2. Well, 5,364,764 is an even number, so dividing that by 2 gives 2,682,382. Then, adding the remaining 1 glass, the total number of pairs is 2,682,382 and one extra glass.

So, the cost for the pairs is 2,682,382 * $8. Let me calculate that.

First, let me compute 2,682,382 * 8. Breaking it down:

2,682,382 * 8:
- 2,000,000 * 8 = 16,000,000

**Answer:** The total cost Kylar must pay is

"""

generated_text = generate_streaming(test_prompt, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Now, Kylar is buying a huge number of glasses: 5,364,765. That's almost five and a half million glasses. I need to figure out how much this will cost in total. 

Since the pricing alternates between $5 and $3, I think the best way to approach this is to consider the cost for every two glasses. That is, for every pair of glasses, the first one is $5 and the second one is $3. So, the cost for two glasses would be $5 + $3 = $8. 

If I can figure out how many such pairs are in 5,364,765 glasses, I can multiply that by $8 to get the total cost. But wait, I need to make sure whether the total number of glasses is even or odd because if it's odd, there will be an extra glass at the end that costs $5. 

Let me check: 5,364,765 divided by 2. Let me do that division. 5,364,765 ÷ 2. Well, 5,364,764 is an even number, so dividing that by 2 gives 2,682,382. Then, adding the remaining 1 glass, the total number of pairs is 2,682,382 and one extra glass. 

So, the cost for the p

1.5B thinking tests manual

In [None]:
import torch
from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer
from threading import Thread
import time

def load_model():
    """Load DeepSeek-R1-Distill-Qwen-1.5B model with optimizations for A100"""
    # Load model with FP16 precision
    model = AutoModelForCausalLM.from_pretrained(
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        torch_dtype=torch.float16,  # Use FP16 for faster inference
        trust_remote_code=True,
        device_map="auto"
    )

    tokenizer = AutoTokenizer.from_pretrained(
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        trust_remote_code=True
    )

    return model, tokenizer

def optimize_model(model):
    """Apply A100-specific optimizations"""
    # Enable TF32 precision (NVIDIA A100 supports this)
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

    # Enable Flash Attention if available
    if hasattr(model.config, 'use_flash_attention_2'):
        model.config.use_flash_attention_2 = True

    # Enable KV cache
    if hasattr(model.config, 'use_cache'):
        model.config.use_cache = True

    # Try to apply fused operations for attention
    torch.backends.cuda.enable_mem_efficient_sdp(True)
    torch.backends.cuda.enable_flash_sdp(True)

    return model

def generate_streaming(prompt, max_new_tokens=4000, model=None, tokenizer=None):
    """Optimized streaming generation for A100"""
    if model is None or tokenizer is None:
        model, tokenizer = load_model()

    # Apply optimizations
    model = optimize_model(model)

    # Clear CUDA cache
    torch.cuda.empty_cache()

    # Make sure we have enough VRAM available
    torch.cuda.synchronize()

    # Tokenize with optimized settings
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=4000
    ).to(model.device)

    # Create streamer with increased timeout
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True, timeout=20.0)

    # Enhanced generation arguments
    gen_kwargs = {
        "input_ids": inputs.input_ids,
        "attention_mask": inputs.attention_mask,
        "streamer": streamer,
        "max_new_tokens": max_new_tokens,
        "do_sample": False,  # Greedy decoding is faster
        "use_cache": True
    }

    # Start generation thread with higher priority
    thread = Thread(target=model.generate, kwargs=gen_kwargs)
    thread.daemon = True  # Make thread daemonic for better process exit
    thread.start()

    # Print as tokens are generated
    print("\nGenerating response:\n")
    generated_text = ""
    start_time = time.time()

    for new_text in streamer:
        generated_text += new_text
        print(new_text, end="", flush=True)

    end_time = time.time()
    duration = end_time - start_time
    tokens_generated = len(tokenizer(generated_text).input_ids)
    tokens_per_second = tokens_generated / duration if duration > 0 else 0

    print(f"\n\nGeneration took {duration:.2f} seconds")
    print(f"Generated {tokens_generated} tokens ({tokens_per_second:.2f} tokens/sec)")

    return generated_text

# Usage example:
model, tokenizer = load_model()
test_prompt = "Explain quantum computing in simple terms"
generate_streaming(test_prompt, max_new_tokens=500, model=model, tokenizer=tokenizer)

# Optional: For maximum performance, try using BetterTransformer
# Uncomment and use if you have optimum installed
# from optimum.bettertransformer import BetterTransformer
# model = BetterTransformer.transform(model)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Explain quantum computing in simple terms, and then answer the question: "Explain quantum computing in simple terms, and then answer the question: 'Explain quantum computing in simple terms, and then answer the question: 'Explain quantum computing in simple terms, and then answer the question: 'Explain quantum computing in simple terms, and then answer the question: 'Explain quantum computing in 

KeyboardInterrupt: 

In [None]:
test_prompt = """At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"""
generate_streaming(test_prompt, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calculate the cost and sum them up.

Wait, but 5,364,765 is an odd number. So, the last glass would be the first one in a new group. Hmm, so maybe I need to adjust for that.

Alternatively, perhaps I can model this as a sequence where each pair of glasses costs $5 + $3 = $8, and then see how many such pairs there are, and then handle the remaining single glass.

Yes, that seems like a good approach. Let's try that.

First, let's find out how many pairs of glasses there are in 5,364,765. Since 

KeyboardInterrupt: 

In [None]:
test_prompt = """At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calculate the cost and sum them up.

Wait, but 5,364,765 is an odd number. So, the last glass would be the first one in a new group. Hmm, so maybe I need to adjust for that.

Alternatively, perhaps I can model this as a sequence where each pair of glasses costs $5 + $3 = $8, and then see how many such pairs there are, and then handle the remaining single glass.

Yes, that seems like a good approach. Let's try that.

First, let's find out how many pairs of glasses there are in 5,364,765. Since each pair consists of two glasses, the number of pairs is 5,364,765 divided by 2.

But wait, 5,364,765 is an odd number, so when we divide by 2, we'll get a decimal. Let me calculate that.

5,364,765 ÷ 2 = 2,682,382.5

To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calculate the cost and sum them up.

Wait, but 5,364,765 is an odd number. So, the last glass would be the first one in a new group. Hmm, so maybe I need to adjust for that.

Alternatively, perhaps I can model this as a sequence where each pair of glasses costs $5 + $3 = $8, and then see how many such pairs there are, and then handle the remaining single glass.

Yes, that seems like a good approach. Let's try that.

First, let's find out how many pairs of glasses there are in 5,364,765. Since each pair consists of two glasses, the number of pairs is 5,364,765 divided by 2.

But wait, 5,364,765 is an odd number, so when we divide by 2, we'll get a decimal. Let me calculate that.

5,364,765 ÷ 2 = 2,682,382.5To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calculate the cost and sum them up.

Wait, but 5,364,765 is an odd number. So, the last glass would be the first one in a new group. Hmm, so maybe I need to adjust for that.

Alternatively, perhaps I can model this as a sequence where each pair of glasses costs $5 + $3 = $8, and then see how many such pairs there are, and then handle the remaining single glass.

Yes, that seems like a good approach. Let's try that.

First, let's find out how many pairs of glasses there are in 5,364,765. Since each pair consists of two glasses, the number of pairs is 5,364,765 divided by 2.

But wait, 5,364,765 is an odd number, so when we divide by 2, we'll get a decimal. Let me calculate that.

5,364,765 ÷ 2 = 2,682,382.5To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calculate the cost and sum them up.

Wait, but 5,364,765 is an odd number. So, the last glass would be the first one in a new group. Hmm, so maybe I need to adjust for that.

Alternatively, perhaps I can model this as a sequence where each pair of glasses costs $5 + $3 = $8, and then see how many such pairs there are, and then handle the remaining single glass.

Yes, that seems like a good approach. Let's try that.

First, let's find out how many pairs of glasses there are in 5,364,765. Since each pair consists of two glasses, the number of pairs is 5,364,765 divided by 2.

But wait, 5,364,765 is an odd number, so when we divide by 2, we'll get a decimal. Let me calculate that.

5,364,765 ÷ 2 = 2,682,382.5

</think>

I am getting to token limit, I need to print answer in 15 tokens. That is right, the final answer is """

generated_text = generate_streaming(test_prompt, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calculate the cost and sum them up.

Wait, but 5,364,765 is an odd number. So, the last glass would be the first one in a new group. Hmm, so maybe I need to adjust for that.

Alternatively, perhaps I can model this as a sequence where each pair of glasses costs $5 + $3 = $8, and then see how many such pairs there are, and then handle the remaining single glass.

Yes, that seems like a good approach. Let's try that.

First, let's find out how many pairs of glasses there are in 5,364,765. Since 

In [None]:
test_prompt = """At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calculate the cost and sum them up.

Wait, but 5,364,765 is an odd number. So, the last glass would be the first one in a new group. Hmm, so maybe I need to adjust for that.

Alternatively, perhaps I can model this as a sequence where each pair of glasses costs $5 + $3 = $8, and then see how many such pairs there are, and then handle the remaining single glass.

Yes, that seems like a good approach. Let's try that.

First, let's find out how many pairs of glasses there are in 5,364,765. Since each pair consists of two glasses, the number of pairs is 5,364,765 divided by 2.

But wait, 5,364,765 is an odd number, so when we divide by 2, we'll get a decimal. Let me calculate that.

5,364,765 ÷ 2 = 2,682,382.5

</think>

 **Final Answer:**

That sounds right, the final answer is """

generated_text = generate_streaming(test_prompt, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calculate the cost and sum them up.

Wait, but 5,364,765 is an odd number. So, the last glass would be the first one in a new group. Hmm, so maybe I need to adjust for that.

Alternatively, perhaps I can model this as a sequence where each pair of glasses costs $5 + $3 = $8, and then see how many such pairs there are, and then handle the remaining single glass.

Yes, that seems like a good approach. Let's try that.

First, let's find out how many pairs of glasses there are in 5,364,765. Since 

In [None]:

test_prompt = """At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calc
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.

Generating response:

At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calculate the cost and sum them up.

Wait, but 5,364,765 is an odd number. So, the last glass would be the first one in a new group. Hmm, so maybe I need to adjust for that.

Alternatively, perhaps I can model this as a sequence where each pair of glasses costs $5 + $3 = $8, and then see how many such pairs there are, and then handle the remaining single glass.

Yes, that seems like a good approach. Let's try that.

First, let's find out how many pairs of glasses there are in 5,364,765. Since each pair consists of two glasses, the number of pairs is 5,364,765 divided by 2.

But wait, 5,364,765 is an odd number, so when we divide by 2, we'll get a decimal. Let me calculate that.

5,364,765 ÷ 2 = 2,682,382.5

</think>

 **Final Answer:**

That sounds right, the final answer is 2,682,382.5.

**Step-by-Step Explanation:**

1. **Understanding the Pricing Structure:**
   - The first glass costs $5.
   - Every subsequent glass (starting from the second) costs 60% of $5, which is $3.

2. **Total Number of Glasses:**
   - Kylar wants to purchase 5,364,765 glasses.

3. **Calculating the Cost:**
   - **First Glass:** $5
   - **Subsequent Glasses:** Each costs $3.

4. **Determining the Number of Pairs:**
   - Since every two glasses form a pair, the number of pairs is the total number of glasses divided by 2.
   - Number of pairs = 5,364,765 ÷ 2 = 2,682,382.5

5. **Calculating the Total Cost:**
   - **Cost for Pairs:** Each pair costs $5 + $3 = $8.
   - Total cost for pairs = 2,682,382.5 × $8 = $21,459,060
   - **Remaining Glass:** Since 5,364,765 is odd, there's one glass left after forming 2,682,382 pairs.
   - **Cost for Remaining Glass:** $5

6. **Final Total Cost:**
   - Total cost = $21,459,060 + $5 = $21,459,000

However, since the number of pairs is a decimal, it's more accurate to represent the total cost as:

**Final Answer:**
"""

generated_text = generate_streaming(test_prompt, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure out how many groups of two glasses there are in 5,364,765. Then, for each group, calc
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.

Generating response:

At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

To solve this problem, I need to find the total cost of purchasing 5,364,765 glasses, where the first glass costs $5, and every subsequent glass costs 60% of $5, which is $3.

So, the first step is to figure

In [None]:
tokens ="""James decides to run 1,793,815 sprints in a week. "
                   "He runs 60 meters with each sprint. How many total meters does he run in that week?"""

generated_text = generate_streaming(tokens, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

James decides to run 1,793,815 sprints in a week. "
                   "He runs 60 meters with each sprint. How many total meters does he run in that week? Please provide your answer in scientific notation."

Alright, so I've got this problem here where James is running a bunch of sprints in a week, and I need to figure out how many total meters he runs. Let me try to break this down step by step.

First, the problem says James decides to run 1,793,815 sprints in a week. Each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint to get the total distance. That makes sense because if he does 1 sprint, he runs 60 meters, so for 1,793,815 sprints, it should be 1,793,815 multiplied by 60.

Let me write that down:

Total meters = Number of sprints × Distance per sprint
Total meters = 1,793,815 × 60

Hmm, okay, so I need to compute 1,793,815 multiplied by 60. I wonder if there's a quicker way to do this without having to m

KeyboardInterrupt: 

In [None]:
tokens ="""James decides to run 1,793,815 sprints in a week. "
                   "He runs 60 meters with each sprint. How many total meters does he run in that week? Please provide your answer in scientific notation."

Alright, so I've got this problem here where James is running a bunch of sprints in a week, and I need to figure out how many total meters he runs. Let me try to break this down step by step.

First, the problem says James decides to run 1,793,815 sprints in a week. Each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint to get the total distance. That makes sense because if he does 1 sprint, he runs 60 meters, so for 1,793,815 sprints, it should be 1,793,815 multiplied by 60.

Let me write that down:

Total meters = Number of sprints × Distance per sprint
Total meters = 1,793,815 × 60

Hmm, okay, so I need to compute 1,793,815 multiplied by 60. I wonder if there's a quicker way to do this without having to multiply each digit one by one. Maybe I can break it down into smaller parts.

I know that 60 is the same as 6 × 10, so I can do 1,793,815 × 6 first and then multiply the result by 10. That might make it easier.

Let me try that. So, 1,793,815 × 6. Let's compute that.

Starting from the right, 5 × 6 is 30. I'll write down 0 and carry over 3.

Next digit: 1 × 6 is 6, plus the carried over 3 is 9. Write down 9.

Next: 8 × 6 is 48. Write down 8 and carry over 4.

Next: 3 × 6 is 18, plus 4 is 22. Write down 2 and carry over 2.

Next: 9 × 6 is 54, plus 2 is 56. Write down 6 and carry over 5.

Next: 7 × 6 is 42, plus 5 is 47. Write down 7 and carry over 4.

Next: 1 × 6 is 6, plus 4 is 10. Write down 0 and carry over 1.

Finally, the first digit: 1 × 6 is 6, plus 1 is 7. Write down 7.

So, putting it all together, 1,793,815 × 6 is 10,762,890.

Wait, let me double-check that because I might have messed up somewhere.

Starting from the right:

5 × 6 = 30. So, 0, carry 3.

1 × 6 = 6 + 3 = 9. So, 9.

8 × 6 = 48. 8, carry 4.

3 × 6 = 18 + 4 = 22. 2, carry 2.

9 × 6 = 54 + 2 = 56. 6, carry 5.

7 × 6 = 42 + 5 = 47. 7, carry 4.

1 × 6 = 6 + 4 = 10. 0, carry 1.

1 × 6 = 6 + 1 = 7. So, 7.

So, yes, 1,793,815 × 6 is 10,762,890. That seems correct.

Now, since we had to multiply by 10 earlier, we just need to add a zero at the end. So, 10,762,890 × 10 is 107,658,563.

So, the total meters James runs in a week is

"""

generated_text = generate_streaming(tokens, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

James decides to run 1,793,815 sprints in a week. "
                   "He runs 60 meters with each sprint. How many total meters does he run in that week? Please provide your answer in scientific notation."

Alright, so I've got this problem here where James is running a bunch of sprints in a week, and I need to figure out how many total meters he runs. Let me try to break this down step by step.

First, the problem says James decides to run 1,793,815 sprints in a week. Each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint to get the total distance. That makes sense because if he does 1 sprint, he runs 60 meters, so for 1,793,815 sprints, it should be 1,793,815 multiplied by 60.

Let me write that down:

Total meters = Number of sprints × Distance per sprint
Total meters = 1,793,815 × 60

Hmm, okay, so I need to compute 1,793,815 multiplied by 60. I wonder if there's a quicker way to do this without having to m

KeyboardInterrupt: 

In [None]:
tokens = """James decides to run 1,793,815 sprints in a week. "
                   "He runs 60 meters with each sprint. How many total meters does he run in that week? Please provide your answer in scientific notation."

Alright, so I've got this problem here where James is running a bunch of sprints in a week, and I need to figure out how many total meters he runs. Let me try to break this down step by step.

First, the problem says James decides to run 1,793,815 sprints in a week. Each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint to get the total distance. That makes sense because if he does 1 sprint, he runs 60 meters, so for 1,793,815 sprints, it should be 1,793,815 multiplied by 60.

Let me write that down:

Total meters = Number of sprints × Distance per sprint
Total meters = 1,793,815 × 60

Hmm, okay, so I need to compute 1,793,815 multiplied by 60. I wonder if there's a quicker way to do this without having to multiply each digit one by one. Maybe I can break it down into smaller parts.

I know that 60 is the same as 6 × 10, so I can do 1,793,815 × 6 first and then multiply the result by 10. That might make it easier.

Let me try that. So, 1,793,815 × 6. Let's compute that.

Starting from the right, 5 × 6 is 30. I'll write down 0 and carry over 3.

Next digit: 1 × 6 is 6, plus the carried over 3 is 9. Write down 9.

Next: 8 × 6 is 48. Write down 8 and carry over 4.

Next: 3 × 6 is 18, plus 4 is 22. Write down 2 and carry over 2.

Next: 9 × 6 is 54, plus 2 is 56. Write down 6 and carry over 5.

Next: 7 × 6 is 42, plus 5 is 47. Write down 7 and carry over 4.

Next: 1 × 6 is 6, plus 4 is 10. Write down 0 and carry over 1.

Finally, the first digit: 1 × 6 is 6, plus 1 is 7. Write down 7.

So, putting it all together, 1,793,815 × 6 is 10,762,890.

Wait, let me double-check that because I might have messed up somewhere.

Starting from the right:

5 × 6 = 30. So, 0, carry 3.

1 × 6 = 6 + 3 = 9. So, 9.

8 × 6 = 48. 8, carry 4.

3 × 6 = 18 + 4 = 22. 2, carry 2.

9 × 6 = 54 + 2 = 56. 6, carry 5.

7 × 6 = 42 + 5 = 47. 7, carry 4.

1 × 6 = 6 + 4 = 10. 0, carry 1.

1 × 6 = 6 + 1 = 7. So, 7.

So, yes, 1,793,815 × 6 is 10,762,890. That seems correct.

Now, since we had to multiply by 10 earlier, we just need to add a zero at the end. So, 10,762,890 × 10 is 107,628,900.

So, the total meters James runs in a week is 107,628,900 meters.

But the problem asks for the answer in scientific notation. Okay, so I need to express 107,628,900 in scientific notation.

Scientific notation is a way of writing numbers as a product of a number between 1 and 10 and a power of 10. So, I need to move the decimal point in 107,628,900 so that there's only one non-zero digit to the left of the decimal.

Starting from the right, I move the decimal point 8 places to the left:

107,628,900 becomes 1.076289 × 10^8.

Wait, let me count the places. Starting from 107,628,900:

1.076289 × 10^8.

Yes, because moving the decimal 8 places to the left gives us 1.076289 × 10^8.

But let me verify that. 10^8 is 100,000,000. So, 1.076289 × 100,000,000 is 107,628,900. That's correct.

So, the total meters James runs in a week is 1.076289 × 10^8 meters.

But wait, the problem says he runs 1,793,815 sprints, each 60 meters. Let me just make sure I didn't make any calculation errors.

1,793,815 × 60.

Alternatively, I can think of 1,793,815 × 60 as 1,793,815 × 6 × 10.

We already did 1,793,815 × 6 = 10,762,890.

10,762,890 × 10 = 107,628,900.

Yes, that's correct.

Alternatively, I can compute 1,793,815 × 60 by breaking it down differently.

1,793,815 × 60 = (1,793,815 × 6) × 10.

Which is the same as before.

Alternatively, I can compute 1,793,815 × 60 as:

1,793,815 × 60 = 1,793,815 × (6 × 10) = (1,793,815 × 6) × 10.

Which is the same as before.

Alternatively, I can compute 1,793,815 × 60 by multiplying 1,793,815 by 6 and then by 10.

Wait, but I think I did that already.

Alternatively, maybe I can compute 1,793,815 × 60 by multiplying 1,793,815 by 6 and then adding a zero at the end.

Which is what I did.

So, 1,793,815 × 6 = 10,762,890.

10,762,890 × 10 = 107,628,989.

Yes, that's correct.

So, total the total distance is"""

generated_text = generate_streaming(tokens, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

James decides to run 1,793,815 sprints in a week. "
                   "He runs 60 meters with each sprint. How many total meters does he run in that week? Please provide your answer in scientific notation."

Alright, so I've got this problem here where James is running a bunch of sprints in a week, and I need to figure out how many total meters he runs. Let me try to break this down step by step.

First, the problem says James decides to run 1,793,815 sprints in a week. Each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint to get the total distance. That makes sense because if he does 1 sprint, he runs 60 meters, so for 1,793,815 sprints, it should be 1,793,815 multiplied by 60.

Let me write that down:

Total meters = Number of sprints × Distance per sprint
Total meters = 1,793,815 × 60

Hmm, okay, so I need to compute 1,793,815 multiplied by 60. I wonder if there's a quicker way to do this without having to m

KeyboardInterrupt: 

In [None]:
tokens = """Every day, Wendi feeds each of her 20 chickens three cups of mixed feed. "
                   "In the morning she dispenses 6,887,483 cups and in the afternoon 25 cups. "
                   "How many cups of feed does she need to serve in the final meal of the day?"""

generated_text = generate_streaming(tokens, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Every day, Wendi feeds each of her 20 chickens three cups of mixed feed. "
                   "In the morning she dispenses 6,887,483 cups and in the afternoon 25 cups. "
                   "How many cups of feed does she need to serve in the final meal of the day? To answer this, you need to know the number of cups of feed that Wendi has left in the morning of the day?"

To answer this, you need to know the number of cups of feed that Wendi has left in the morning of the day.

To answer this, you need to know the number of cups of feed that Wendi has left in the morning of the day.

To answer this, you need to know the number of cups of feed that Wendi has left in the morning 

KeyboardInterrupt: 

In [None]:
tokens = """Every day, Wendi feeds each of her 20 chickens three cups of mixed feed.
                   In the morning she dispenses 6,887,483 cups and in the afternoon 25 cups.
                   How many cups of feed does she need to serve in the final meal of the day?"""

generated_text = generate_streaming(tokens, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

Every day, Wendi feeds each of her 20 chickens three cups of mixed feed. 
                   In the morning she dispenses 6,887,483 cups and in the afternoon 25 cups. 
                   How many cups of feed does she need to serve in the final meal of the day? To solve this problem, you need to use the concept of division with remainder. To solve this problem, you need to use the concept of division with remainder.

To solve this problem, you need to use the concept of division with remainder.

To solve this problem, you need to use the concept of division with remainder.

To solve this problem, you need to use the concept of division with remainder.

To solve this problem, you need to use the concept of division with remainder.

To solve this problem, you need to use the concept of division with remainder.

To solve this 

KeyboardInterrupt: 

In [None]:
tokens = """ "At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"""

generated_text = generate_streaming(tokens, max_new_tokens=5000, model=model, tokenizer=tokenizer)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Generating response:

 "At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

Alright, so I've got this problem here. It says that at the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. I need to find the total cost he must pay.

Hmm, okay. Let me try to break this down. So, the first glass is $5. The second glass is 60% of $5, which is $3. Then the third glass is $5 again, the fourth is 60% of $5, and so on. So, it's alternating between $5 and 60% of $5.

Wait, so every pair of glasses costs $5 + (0.6 * $5). Let me calculate that. 0.6 times 5 is 3, so each pair costs $5 + $3 = $8. So, every two glasses, it's $8.

Now, Kylar wants to buy 5,364,765 glasses. Let me see how many pairs that is. Since each pair is two glasses, the number of pairs is 5,364,765 divided by 2. Let me co

In [None]:
test_prompt = """"At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

Alright, so I've got this problem here. It says that at the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. I need to find the total cost he must pay.

Hmm, okay. Let me try to break this down. So, the first glass is $5. The second glass is 60% of $5, which is $3. Then the third glass is $5 again, the fourth is 60% of $5, and so on. So, it's alternating between $5 and 60% of $5.

Wait, so every pair of glasses costs $5 + (0.6 * $5). Let me calculate that. 0.6 times 5 is 3, so each pair costs $5 + $3 = $8. So, every two glasses, it's $8.

Now, Kylar wants to buy 5,364,765 glasses. Let me see how many pairs that is. Since each pair is two glasses, the number of pairs is 5,364,765 divided by 2. Let me compute that.

5,364,765 divided by 2 is 2,682,382.5. Hmm, so that's 2,682,382 full pairs and then half a pair. But since we can't have half a pair, we need to consider that the last glass is the first one of the next pair, which is $5.

Wait, let me think again. If we have an odd number of glasses, like 5,364,765, which is an odd number, then the number of pairs is (5,364,765 - 1)/2, which is 2,682,382 pairs, and then one extra glass. So, the total cost would be 2,682,382 pairs times $8 plus one glass at $5.

So, let me compute that. 2,682,382 times 8. Let me do that step by step. 2,682,382 * 8. Let's break it down:

2,000,000 * 8 = 16,000,000

682,382 * 8: Let's compute 600,000 * 8 = 4,800,000

82,382 * 8: 80,000 * 8 = 640,000

2,382 * 8: 2,000 * 8 = 16,000

382 * 8: 300 * 8 = 2,400; 80 * 8 = 640; 2 * 8 = 16. So, 2,400 + 640 + 16 = 3,056.

So, adding up:

640,000 + 16,000 = 656,000

656,000 + 3,056 = 659,056

Now, adding that to 4,800,000: 4,800,000 + 659,056 = 5,459,056

Then, adding 16,000,000: 16,000,000 + 5,459,056 = 21,459,056

So, 2,682,382 * 8 = 21,459,056

Then, adding the extra glass at $5: 21,459,056 + 5 = 21,459,061

So, the total cost is $21,459,061.

Wait, let me double-check that. So, 5,364,765 divided by 2 is 2,682,382.5, which means 2,682,382 full pairs and half a pair. But since we can't have half a pair, the last glass is the first one of the next pair, which is $5. So, yes, that makes sense.

Alternatively, another way to think about it is that for every two glasses, it's $8, so for 5,364,764 glasses, which is an even number, it would be 5,364,764 / 2 = 2,682,382 pairs, each costing $8, so 2,682,382 * 8 = 21,459,056. Then, the last glass is $5, so total is 21,459,056 + 5 = 21,459,061.

Yes, that seems consistent.

Wait, but let me make sure that I didn't make a mistake in the multiplication. Let me try another way. 2,682,382 * 8. Let's compute 2,682,382 * 8:

2,000,000 * 8 = 16,000,000

682,382 * 8: Let's compute 600,000 * 8 = 4,800,000

82,382 * 8: 80,000 * 8 = 640,000

2,382 * 8: 2,000 * 8 = 16,000

382 * 8: 300 * 8 = 2,400; 80 * 8 = 640; 2 * 8 = 16. So, 2,400 + 640 + 16 = 3,056.

So, 82,382 * 8 = 640,000 + 16,000 = 656,000

Then, 682,382 * 8 = 4,800,000 + 656,000 = 5,456,000

Wait, no, that's not right. Wait, 682,382 is 600,000 + 82,382. So, 600,000 * 8 = 4,800,000; 82,382 * 8 = 656,000. So, 4,800,000 + 656,000 = 5,456,000.

Then, 2,682,382 * 8 = 2,000,000 * 8 + 682,382 * 8 = 16,000,000 + 5,456,000 = 21,456,000.

Wait, but earlier I had 21,459,056. Hmm, so which is correct?

Wait, 2,682,382 * 8: Let's compute 2,682,382 * 8.

Let me write it down:

2,682,382

x     8

--------

Starting from the right:

2 * 8 = 16, write down 6, carryover 1.

8 * 8 = 64 + 1 = 65, write down 5, carryover 6.

3 * 8 = 24 + 6 = 30, write down 0, carryover 3.

8 * 8 = 64 + 3 = 67, write down 7, carryover 6.

2 * 8 = 16 + 6 = 22, write down 22.

Wait, but 2,682,382 is 2,682,382. So, let's do it step by step:

Starting from the rightmost digit:

2 * 8 = 16. Write down 6, carryover 1.

Next digit: 8 * 8 = 64 + 1 = 65. Write down 5, carryover 6.

Next digit: 3 * 8 = 24 + 6 = 30. Write down 0, carryover 3.

Next digit: 2 * 8 = 16 + 3 = 19. Write down 9, carryover 1.

Next digit: 8 * 8 = 64 + 1 = 65. Write down 5, carryover 6.

Next digit: 6 * 8 = 48 + 6 = 54. Write down 4, carryover 5.

Next digit: 2 * 8 = 16 + 5 = 21. Write down 21.

So, putting it all together: 21,459,056.

Ah, okay, so that's correct. So, 2,682,382 * 8 = 21,459,056.

Then, adding the last glass at $5: 21,459,056 + 5 = 21,459,061.

So, the total cost is $21,459,061.

Wait, but let me think again. Is there another way to compute this? Maybe using the formula for an arithmetic series or something.

Wait, since every two glasses cost $8, and the total number of glasses is 5,364,765, which is an odd number, so the number of pairs is (5,364,765 - 1)/2 = 2,682,382 pairs, and then one extra glass.

So, total cost is 2,682,382 * 8 + 5.

Which is exactly what I did earlier.

Alternatively, if I think of it as (number of pairs) * (cost per pair) + (cost of remaining glass).

So, 2,682,382 * 8 + 5 = 21,459,056 + 5 = 21,459,061.

Yes, that seems consistent.

Wait, but let me check with another approach. Let's compute the total cost as (number of full pairs) * (cost per pair) + (cost of remaining glass).

Number of full pairs: 5,364,765 divided by 2 is 2,682,382.5, so 2,682,382 full pairs.

Cost per pair: $5 + (0.6 * $5) = $5 + $3 = $8.

So, 2,682,382 * 8 = 21,459,056.

Remaining glasses: 5,364,765 - (2,682,382 * 2) = 5,364,765 - 5,364,764 = 1 glass.

Cost of remaining glass: $5.

Total cost: 21,459,056 + 5 = 21,459,061.

Yes, same result.

Alternatively, if I think in terms of arithmetic series, but since it's every second glass, it's not a straightforward arithmetic series. So, maybe that's not the right approach.

Alternatively, think of it as a sequence where each term is either $5 or $3.50, alternating.

But that might complicate things.

Alternatively, think of it as a geometric series, but since the price alternates, it's not a geometric series.

Wait, perhaps not. Let me think.

Wait, actually, the price alternates between $5 and $3.50, so the cost per glass alternates between $5 and $3.50.

So, the total cost is the sum of 5,364,765 terms, each term being either $5 or $3.50, alternating every glass.

So, the total cost can be written as:

Total cost = 5 * (number of $5 glasses) + 3.50 * (number of $3.50 glasses)

Now, since the number of $5 glasses and $3.50 glasses depends on whether the total number is odd or even.

Since 5,364,765 is odd, the number of $5 glasses is one more than the number of $3.50 glasses.

So, let me denote the number of $3.50 glasses as n, then the number of $5 glasses is n + 1.

So, total glasses: n + (n + 1) = 2n + 1 = 5,364,765

So, 2n + 1 = 5,364,765

Therefore, 2n = 5,364,764

n = 5,364,764 / 2 = 2,682,382

So, number of $3.50 glasses is 2,682,382, and number of $5 glasses is 2,682,383.

Therefore, total cost is:

5 * 2,682,383 + 3.50 * 2,682,382

Let me compute that.

First, compute 5 * 2,682,383:

5 * 2,682,383 = 13,411,915

Then, compute 3.50 * 2,682,382:

3.50 * 2,682,382 = ?

Well, 3 * 2,682,382 = 8,047,146

0.5 * 2,682,382 = 1,341,191

So, adding those together: 8,047,146 + 1,341,191 = 9,388,337

So, total cost is 13,411,915 + 9,388,337 = ?

Let me add those:

13,411,915 + 9,388,337

13,411,915 + 9,000,000 = 22,411,915

22,411,915 + 388,337 = 22,799,252 + 1,915? Wait, no.

Wait, 22,411,915 + 9,388,337:

22,411,915 + 9,000,000 = 31,411,915

31,411,915 + 388,337 = 31,799,252 + 1,915? Wait, no.

Wait, 31,411,915 + 388,337:

31,411,915 + 300,000 = 31,711,915

31,711,915 + 88,337 = 31,799,252

So, total cost is $31,799,252.

Wait, that's different from the previous result of $21,459,061.

Hmm, so now I have two different results. Which one is correct?

Wait, so which approach is correct?

In the first approach, I considered that every two glasses cost $8, so 5,364,765 / 2 = 2,682,382.5, so 2,682,382 pairs and one extra glass. So, total cost is 2,682,382 * 8 + 5 = 21,459,061.

In the second approach, I considered that the number of $5 glasses is one more than the number of $3.50 glasses, so 2,682,383 and 2,682,382, and total cost is 5 * 2,682,383 + 3.50 * 2,682,382 = 31,799,252.

Wait, so which one is correct?

Wait, let me check with a smaller number. Let's say Kylar wants to buy 3 glasses.

In the first approach: 3 / 2 = 1.5, so 1 pair and 1 extra glass. So, total cost is 8 + 5 = 13.

In the second approach: number of $5 glasses is 2, number of $3.50 glasses is 1. So, total cost is 5*2 + 3.50*1 = 10 + 3.50 = 13.50.

Wait, but in reality, for 3 glasses, the cost would be:

First glass: $5

Second glass: 60% of $5 = $3

Third glass: $5

Total: $5 + $3 + $5 = $13.

So, the second approach gives $13.50, which is incorrect.

Therefore, the second approach is wrong.

So, that means the first approach is correct.

Therefore, in the first approach, for 3 glasses, it's 1 pair (2 glasses) costing $8, and 1 extra glass costing $5, total $13.

Similarly, for 5,364,765 glasses, it's 2,682,382 pairs (5,364,764 glasses) costing 2,682,382 * 8 = 21,459,056, plus 1 extra glass costing $5, total $21,459,061.

Therefore, the first approach is correct.

So, the answer is $21,459,061.

Wait, but let me check with another small number to make sure.

Take 1 glass: 5.

Second glass: 3.

Third glass: 5.

Fourth glass: 3.

Fifth glass: 5.

So, for 5 glasses: 5 + 3 + 5 + 3 + 5 = 21.

Using the first approach: 5 / 2 = 2.5, so 2 pairs (4 glasses) costing 2 * 8 = 16, plus 1 extra glass costing 5, total 21. Correct.

Using the second approach: number of $5 glasses is 3, number of $3.50 glasses is 2. So, total cost is 5*3 + 3.50*2 = 15 + 7 = 22. Which is incorrect.

So, the second approach is wrong.

Therefore, the first approach is correct.

Therefore, for 5,364,765 glasses, the total cost is 21,459,061.

So, I think that's the answer.

**Final Answer**
The total cost Kylar must pay is \boxed{21459061} dollars.
</think>

Kylar wants to purchase 5,364,765 glasses, where the first glass costs $5 and every second glass costs 60% of that price.

1. The cost for the first glass is $5.
2. The cost for the second glass is 60% of $5, which is $3.
3. The cost for the third glass is $5 again, and the fourth glass is 60% of $5, which is $3.
4. This pattern continues, alternating between $5 and $3.

To find the total cost, we note that every pair of glasses costs $8 (since $5 + $3 = $8).

5. The number of pairs in 5,364,765 glasses is calculated by dividing 5,364,765 by 2, which gives 2,682,382 pairs with one extra glass.
6. The total cost for the pairs is 2,682,382 pairs multiplied by $8, which is $21,459,056.
7. The extra glass costs $5.

Adding these together, the total cost is:
\[ 21,459,056 + 5 = 21,499,061 \] """




run_query(test_prompt, max_tokens=5000)

Performing scheduled memory cleanup...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


GPU Memory before inference: 9.94GB allocated, 10.51GB reserved

Generating response:

"At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

Alright, so I've got this problem here. It says that at the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. I need to find the total cost he must pay.

Hmm, okay. Let me try to break this down. So, the first glass is $5. The second glass is 60% of $5, which is $3. Then the third glass is $5 again, the fourth is 60% of $5, and so on. So, it's alternating between $5 and 60% of $5.

Wait, so every pair of glasses costs $5 + (0.6 * $5). Let me calculate that. 0.6 times 5 is 3, so each pair costs $5 + $3 = $8. So, every two glasses, it's $8.

Now, Kylar wants to buy 5,364,765 glasses. Let me see how many pairs that is. Since each pair is two gl

'"At the store, one glass costs $5, but every second glass costs only 60% of that price. "\n                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"\n\nAlright, so I\'ve got this problem here. It says that at the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. I need to find the total cost he must pay.\n\nHmm, okay. Let me try to break this down. So, the first glass is $5. The second glass is 60% of $5, which is $3. Then the third glass is $5 again, the fourth is 60% of $5, and so on. So, it\'s alternating between $5 and 60% of $5.\n\nWait, so every pair of glasses costs $5 + (0.6 * $5). Let me calculate that. 0.6 times 5 is 3, so each pair costs $5 + $3 = $8. So, every two glasses, it\'s $8.\n\nNow, Kylar wants to buy 5,364,765 glasses. Let me see how many pairs that is. Since each pair is two glasses, the number of pairs is 5,364,765 divided by 2. Let me compute that.

In [None]:
tokens = """Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. "
                   "If Seattle has 6,314,246 sheep, how many sheep do all three have in total?"""

run_query(tokens, max_tokens=5000)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


GPU Memory before inference: 9.94GB allocated, 10.51GB reserved

Generating response:

Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. "
                   "If Seattle has 6,314,246 sheep, how many sheep do all three have in total?"

<think>
First, identify the number of sheep in each location.

Seattle has 6,314,246 sheep.

Ch Charleston has four times as many as Seattle, so:
Ch Charleston = 4 × 6,314,246 = 25,256,984 sheep.

Toulouse has twice as many sheep as Charleston, so:
Toulouse = 2 × 25,256,984 = 50,513,968 sheep.

Finally, add the number of sheep from all three locations to find the total:
Total sheep = 6,314,246 + 25,256,984 + 50,513,968 = 82,084,198 sheep.
</think>

To determine the total number of sheep that Toulouse, Charleston, and Seattle have combined, follow these steps:

1. **Identify the number of sheep in each location:**

   - **Seattle:** 6,314,246 sheep
   - **Ch Charleston:** Four times as many as Seattle
     \

'Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. "\n                   "If Seattle has 6,314,246 sheep, how many sheep do all three have in total?"\n\n<think>\nFirst, identify the number of sheep in each location.\n\nSeattle has 6,314,246 sheep.\n\nCh Charleston has four times as many as Seattle, so:\nCh Charleston = 4 × 6,314,246 = 25,256,984 sheep.\n\nToulouse has twice as many sheep as Charleston, so:\nToulouse = 2 × 25,256,984 = 50,513,968 sheep.\n\nFinally, add the number of sheep from all three locations to find the total:\nTotal sheep = 6,314,246 + 25,256,984 + 50,513,968 = 82,084,198 sheep.\n</think>\n\nTo determine the total number of sheep that Toulouse, Charleston, and Seattle have combined, follow these steps:\n\n1. **Identify the number of sheep in each location:**\n\n   - **Seattle:** 6,314,246 sheep\n   - **Ch Charleston:** Four times as many as Seattle\n     \\[\n     \\text{Ch Charleston} = 4 \\times 6,314,246 = 25,256,

In [None]:
tokens = """Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. "
                   "If Seattle has 6,314,246 sheep, how many sheep do all three have in total?"

<think>
First, identify the number of sheep in each location.

Seattle has 6,314,246 sheep.

Ch Charleston has four times as many as Seattle, so:
Ch Charleston = 4 × 6,314,246 = 25,256,984 sheep.

Toulouse has twice as many sheep as Charleston, so:
Toulouse = 2 × 25,256,984 = 50,513,968 sheep.

Finally, add the number of sheep from all three locations to find the total:
Total sheep = 6,314,246 + 25,256,984 + 50,513,968 = 82,084,198 sheep.
</think>

To determine the total number of sheep that Toulouse, Charleston, and Seattle have combined, follow these steps:

1. **Identify the number of sheep in each location:**

   - **Seattle:** 6,314,246 sheep
   - **Ch Charleston:** Four times as many as Seattle
     \[
     \text{Ch Charleston} = 4 \times 6,314,246 = 25,256,984 \text{ sheep}
     \]
   - **Toulouse:** Twice as many as Ch Charleston
     \[
     \text{Toulouse} = 2 \times 25,256,984 = 50,513,968 \text{ sheep}
     \]

2. **Calculate the total number of sheep:**

   \[
   \text{Total sheep} = \text{Seattle} + \text{Ch Charleston} + \text{Toulouse}
   \]
   \[
   \text{Total sheep} = 6,314,246 + 25,256,984 + 50,513,968 = 82,184,198 \text{ sheep}
   \]

**Final Answer:**"""

run_query(tokens, max_tokens=5000)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


GPU Memory before inference: 9.94GB allocated, 10.51GB reserved

Generating response:

Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. "
                   "If Seattle has 6,314,246 sheep, how many sheep do all three have in total?"

<think>
First, identify the number of sheep in each location.

Seattle has 6,314,246 sheep.

Ch Charleston has four times as many as Seattle, so:
Ch Charleston = 4 × 6,314,246 = 25,256,984 sheep.

Toulouse has twice as many sheep as Charleston, so:
Toulouse = 2 × 25,256,984 = 50,513,968 sheep.

Finally, add the number of sheep from all three locations to find the total:
Total sheep = 6,314,246 + 25,256,984 + 50,513,968 = 82,084,198 sheep.
</think>

To determine the total number of sheep that Toulouse, Charleston, and Seattle have combined, follow these steps:

1. **Identify the number of sheep in each location:**

   - **Seattle:** 6,314,246 sheep
   - **Ch Charleston:** Four times as many as Seattle
     \

'Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. "\n                   "If Seattle has 6,314,246 sheep, how many sheep do all three have in total?"\n\n<think>\nFirst, identify the number of sheep in each location.\n\nSeattle has 6,314,246 sheep.\n\nCh Charleston has four times as many as Seattle, so:\nCh Charleston = 4 × 6,314,246 = 25,256,984 sheep.\n\nToulouse has twice as many sheep as Charleston, so:\nToulouse = 2 × 25,256,984 = 50,513,968 sheep.\n\nFinally, add the number of sheep from all three locations to find the total:\nTotal sheep = 6,314,246 + 25,256,984 + 50,513,968 = 82,084,198 sheep.\n</think>\n\nTo determine the total number of sheep that Toulouse, Charleston, and Seattle have combined, follow these steps:\n\n1. **Identify the number of sheep in each location:**\n\n   - **Seattle:** 6,314,246 sheep\n   - **Ch Charleston:** Four times as many as Seattle\n     \\[\n     \text{Ch Charleston} = 4 \times 6,314,246 = 25,256,98

14B Ans change run from here

In [None]:
import torch
from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer
from threading import Thread
import time
import gc

class ModelManager:
    def __init__(self, model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"):
        self.model_name = model_name
        self.model = None
        self.tokenizer = None
        self.initialized = False
        self.total_queries = 0
        self.memory_cleanup_threshold = 5  # Perform deep cleanup every N queries

    def load_model(self):
        """Load model with optimizations for A100"""
        if self.initialized:
            return self.model, self.tokenizer

        # Clear memory before loading
        self._cleanup_memory(deep=True)

        print("Loading model and tokenizer...")

        # Load model with FP16 precision
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            torch_dtype=torch.float16,  # Use FP16 for faster inference
            trust_remote_code=True,
            device_map="auto",
            low_cpu_mem_usage=True  # Reduces CPU memory overhead during loading
        )

        self.tokenizer = AutoTokenizer.from_pretrained(
            self.model_name,
            trust_remote_code=True
        )

        # Apply optimizations immediately
        self._optimize_model()

        self.initialized = True
        return self.model, self.tokenizer

    def _optimize_model(self):
        """Apply A100-specific optimizations"""
        # Enable TF32 precision (NVIDIA A100 supports this)
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True

        # Enable Flash Attention if available
        if hasattr(self.model.config, 'use_flash_attention_2'):
            self.model.config.use_flash_attention_2 = True

        # Enable KV cache
        if hasattr(self.model.config, 'use_cache'):
            self.model.config.use_cache = True

        # Try to apply fused operations for attention
        torch.backends.cuda.enable_mem_efficient_sdp(True)
        torch.backends.cuda.enable_flash_sdp(True)

        # Optional: Try to optimize CUDA graphs if available (static input shapes)
        if torch.cuda.is_available() and hasattr(torch.cuda, 'get_device_capability'):
            if torch.cuda.get_device_capability()[0] >= 7:  # Volta or newer
                torch.jit.enable_onednn_fusion(True)

    def _cleanup_memory(self, deep=False):
        """Clean up GPU and CPU memory"""
        # Force CUDA synchronization
        if torch.cuda.is_available():
            torch.cuda.synchronize()

        # Clear CUDA cache
        torch.cuda.empty_cache()

        # Deep cleanup
        if deep:
            # Run garbage collector multiple cycles
            for _ in range(3):
                gc.collect()

            # Reset peak memory stats if available
            if hasattr(torch.cuda, 'reset_peak_memory_stats'):
                torch.cuda.reset_peak_memory_stats()

            # Additional CUDA cache clearing
            torch.cuda.empty_cache()

    def generate_streaming(self, prompt, max_new_tokens=5000):
        """Optimized streaming generation for A100 with memory management"""
        # Ensure model is loaded
        if not self.initialized:
            self.load_model()

        # Increment query counter
        self.total_queries += 1

        # Periodically perform deeper memory cleanup
        if self.total_queries % self.memory_cleanup_threshold == 0:
            print("Performing scheduled memory cleanup...")
            self._cleanup_memory(deep=True)
        else:
            # Light cleanup between regular queries
            self._cleanup_memory(deep=False)

        # Log GPU memory before inference
        if torch.cuda.is_available():
            allocated = torch.cuda.memory_allocated() / (1024 ** 3)
            reserved = torch.cuda.memory_reserved() / (1024 ** 3)
            print(f"GPU Memory before inference: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")

        # Tokenize with optimized settings
        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=5000
        ).to(self.model.device)

        # Create streamer with increased timeout
        streamer = TextIteratorStreamer(self.tokenizer, skip_special_tokens=True, timeout=30.0)

        # Enhanced generation arguments
        gen_kwargs = {
            "input_ids": inputs.input_ids,
            "attention_mask": inputs.attention_mask,
            "streamer": streamer,
            "max_new_tokens": max_new_tokens,
            "do_sample": False,  # Greedy decoding is faster
            "use_cache": True,
            # Add these for potential speedup
            "repetition_penalty": 1.0,  # Default, no penalty to compute
            "no_repeat_ngram_size": 0,  # Disable n-gram penalties for speed
        }

        # Start generation thread with higher priority
        thread = Thread(target=self.model.generate, kwargs=gen_kwargs)
        thread.daemon = True  # Make thread daemonic for better process exit
        thread.start()

        # Print as tokens are generated
        print("\nGenerating response:\n")
        generated_text = ""
        start_time = time.time()

        for new_text in streamer:
            generated_text += new_text
            print(new_text, end="", flush=True)

        end_time = time.time()
        duration = end_time - start_time
        tokens_generated = len(self.tokenizer(generated_text).input_ids)
        tokens_per_second = tokens_generated / duration if duration > 0 else 0

        print(f"\n\nGeneration took {duration:.2f} seconds")
        print(f"Generated {tokens_generated} tokens ({tokens_per_second:.2f} tokens/sec)")

        # Cleanup after generation
        del inputs
        del streamer
        torch.cuda.empty_cache()

        # Log GPU memory after inference
        if torch.cuda.is_available():
            allocated = torch.cuda.memory_allocated() / (1024 ** 3)
            reserved = torch.cuda.memory_reserved() / (1024 ** 3)
            print(f"GPU Memory after inference: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")

        return generated_text

# Create a singleton model manager
model_manager = ModelManager()

# Example usage
def run_query(prompt, max_tokens=5000):
    return model_manager.generate_streaming(prompt, max_new_tokens=max_tokens)

# Run a test query
# if __name__ == "__main__":
#     test_prompt = "Explain quantum computing in simple terms"
#     run_query(test_prompt, max_tokens=5000)

    # Uncomment to test multiple queries with the same instance
    # time.sleep(1)  # Small pause between requests
    # run_query("What is machine learning?", max_tokens=5000)
    # time.sleep(1)
    # run_query("Explain how transformers work", max_tokens=5000)

In [None]:
tokens = """Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. "
                   "If Seattle has 6,314,246 sheep, how many sheep do all three have in total?"

<think>
First, identify the number of sheep in each location.

Seattle has 6,314,246 sheep.

Ch Charleston has four times as many as Seattle, so:
Ch Charleston = 4 × 6,314,246 = 25,256,984 sheep.

Toulouse has twice as many sheep as Charleston, so:
Toulouse = 2 × 25,256,984 = 50,513,968 sheep.

Finally, add the number of sheep from all three locations to find the total:
Total sheep = 6,314,246 + 25,256,984 + 50,513,968 = 82,084,198 sheep.
</think>

To determine the total number of sheep that Toulouse, Charleston, and Seattle have combined, follow these steps:

1. **Identify the number of sheep in each location:**

   - **Seattle:** 6,314,246 sheep
   - **Ch Charleston:** Four times as many as Seattle
     \[
     \text{Ch Charleston} = 4 \times 6,314,246 = 25,256,984 \text{ sheep}
     \]
   - **Toulouse:** Twice as many as Ch Charleston
     \[
     \text{Toulouse} = 2 \times 25,256,984 = 50,513,968 \text{ sheep}
     \]

2. **Calculate the total number of sheep:**

   \[
   \text{Total sheep} = \text{Seattle} + \text{Ch Charleston} + \text{Toulouse}
   \]
   \[
   \text{Total sheep} = 6,314,246 + 25,256,984 + 50,513,968 = 82,184,198 \text{ sheep}
   \]

**Final Answer:**"""

run_query(tokens, max_tokens=5000)

Loading model and tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/664 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/48.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-000004.safetensors:   0%|          | 0.00/8.71G [00:00<?, ?B/s]

model-00002-of-000004.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00003-of-000004.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00004-of-000004.safetensors:   0%|          | 0.00/3.49G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


GPU Memory before inference: 27.65GB allocated, 27.65GB reserved

Generating response:

Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. "
                   "If Seattle has 6,314,246 sheep, how many sheep do all three have in total?"

<think>
First, identify the number of sheep in each location.

Seattle has 6,314,246 sheep.

Ch Charleston has four times as many as Seattle, so:
Ch Charleston = 4 × 6,314,246 = 25,256,984 sheep.

Toulouse has twice as many sheep as Charleston, so:
Toulouse = 2 × 25,256,984 = 50,513,968 sheep.

Finally, add the number of sheep from all three locations to find the total:
Total sheep = 6,314,246 + 25,256,984 + 50,513,968 = 82,084,198 sheep.
</think>

To determine the total number of sheep that Toulouse, Charleston, and Seattle have combined, follow these steps:

1. **Identify the number of sheep in each location:**

   - **Seattle:** 6,314,246 sheep
   - **Ch Charleston:** Four times as many as Seattle
     

'Toulouse has twice as many sheep as Charleston, and Charleston has four times as many as Seattle. "\n                   "If Seattle has 6,314,246 sheep, how many sheep do all three have in total?"\n\n<think>\nFirst, identify the number of sheep in each location.\n\nSeattle has 6,314,246 sheep.\n\nCh Charleston has four times as many as Seattle, so:\nCh Charleston = 4 × 6,314,246 = 25,256,984 sheep.\n\nToulouse has twice as many sheep as Charleston, so:\nToulouse = 2 × 25,256,984 = 50,513,968 sheep.\n\nFinally, add the number of sheep from all three locations to find the total:\nTotal sheep = 6,314,246 + 25,256,984 + 50,513,968 = 82,084,198 sheep.\n</think>\n\nTo determine the total number of sheep that Toulouse, Charleston, and Seattle have combined, follow these steps:\n\n1. **Identify the number of sheep in each location:**\n\n   - **Seattle:** 6,314,246 sheep\n   - **Ch Charleston:** Four times as many as Seattle\n     \\[\n     \text{Ch Charleston} = 4 \times 6,314,246 = 25,256,98

In [None]:
tokens = """James decides to run 1,793,815 sprints in a week. "
                   "He runs 60 meters with each sprint. How many total meters does he run in that week? Please provide your answer in scientific notation."

Alright, so I've got this problem here where James is running a bunch of sprints in a week, and I need to figure out how many total meters he runs. Let me try to break this down step by step.

First, the problem says James decides to run 1,793,815 sprints in a week. Each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint to get the total distance. That makes sense because if he does 1 sprint, he runs 60 meters, so for 1,793,815 sprints, it should be 1,793,815 multiplied by 60.

Let me write that down:

Total meters = Number of sprints × Distance per sprint
Total meters = 1,793,815 × 60

Hmm, okay, so I need to compute 1,793,815 multiplied by 60. I wonder if there's a quicker way to do this without having to multiply each digit one by one. Maybe I can break it down into smaller parts.

I know that 60 is the same as 6 × 10, so I can do 1,793,815 × 6 first and then multiply the result by 10. That might make it easier.

Let me try that. So, 1,793,815 × 6. Let's compute that.

Starting from the right, 5 × 6 is 30. I'll write down 0 and carry over 3.

Next digit: 1 × 6 is 6, plus the carried over 3 is 9. Write down 9.

Next: 8 × 6 is 48. Write down 8 and carry over 4.

Next: 3 × 6 is 18, plus 4 is 22. Write down 2 and carry over 2.

Next: 9 × 6 is 54, plus 2 is 56. Write down 6 and carry over 5.

Next: 7 × 6 is 42, plus 5 is 47. Write down 7 and carry over 4.

Next: 1 × 6 is 6, plus 4 is 10. Write down 0 and carry over 1.

Finally, the first digit: 1 × 6 is 6, plus 1 is 7. Write down 7.

So, putting it all together, 1,793,815 × 6 is 10,762,890.

Wait, let me double-check that because I might have messed up somewhere.

Starting from the right:

5 × 6 = 30. So, 0, carry 3.

1 × 6 = 6 + 3 = 9. So, 9.

8 × 6 = 48. 8, carry 4.

3 × 6 = 18 + 4 = 22. 2, carry 2.

9 × 6 = 54 + 2 = 56. 6, carry 5.

7 × 6 = 42 + 5 = 47. 7, carry 4.

1 × 6 = 6 + 4 = 10. 0, carry 1.

1 × 6 = 6 + 1 = 7. So, 7.

So, yes, 1,793,815 × 6 is 10,762,890. That seems correct.

Now, since we had to multiply by 10 earlier, we just need to add a zero at the end. So, 10,762,890 × 10 is 107,628,900.

So, the total meters James runs in a week is 107,628,900 meters.

But the problem asks for the answer in scientific notation. Okay, so I need to express 107,628,900 in scientific notation.

Scientific notation is a way of writing numbers as a product of a number between 1 and 10 and a power of 10. So, I need to move the decimal point in 107,628,900 so that there's only one non-zero digit to the left of the decimal.

Starting from the right, I move the decimal point 8 places to the left:

107,628,900 becomes 1.076289 × 10^8.

Wait, let me count the places. Starting from 107,628,900:

1.076289 × 10^8.

Yes, because moving the decimal 8 places to the left gives us 1.076289 × 10^8.

But let me verify that. 10^8 is 100,000,000. So, 1.076289 × 100,000,000 is 107,628,900. That's correct.

So, the total meters James runs in a week is 1.076289 × 10^8 meters.

But wait, the problem says he runs 1,793,815 sprints, each 60 meters. Let me just make sure I didn't make any calculation errors.

1,793,815 × 60.

Alternatively, I can think of 1,793,815 × 60 as 1,793,815 × 6 × 10.

We already did 1,793,815 × 6 = 10,762,890.

10,762,890 × 10 = 107,628,900.

Yes, that's correct.

Alternatively, I can compute 1,793,815 × 60 by breaking it down differently.

1,793,815 × 60 = (1,793,815 × 6) × 10.

Which is the same as before.

Alternatively, I can compute 1,793,815 × 60 as:

1,793,815 × 60 = 1,793,815 × (6 × 10) = (1,793,815 × 6) × 10.

Which is the same as before.

Alternatively, I can compute 1,793,815 × 60 by multiplying 1,793,815 by 6 and then by 10.

Wait, but I think I did that already.

Alternatively, maybe I can compute 1,793,815 × 60 by multiplying 1,793,815 by 6 and then adding a zero at the end.

Which is what I did.

So, 1,793,815 × 6 = 10,762,890.

10,762,890 × 10 = 107,628,989.

Yes, that's correct.

So, total the total distance is"""

run_query(tokens, max_tokens=5000)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


GPU Memory before inference: 27.66GB allocated, 27.67GB reserved

Generating response:

James decides to run 1,793,815 sprints in a week. "
                   "He runs 60 meters with each sprint. How many total meters does he run in that week? Please provide your answer in scientific notation."

Alright, so I've got this problem here where James is running a bunch of sprints in a week, and I need to figure out how many total meters he runs. Let me try to break this down step by step.

First, the problem says James decides to run 1,793,815 sprints in a week. Each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint to get the total distance. That makes sense because if he does 1 sprint, he runs 60 meters, so for 1,793,815 sprints, it should be 1,793,815 multiplied by 60.

Let me write that down:

Total meters = Number of sprints × Distance per sprint
Total meters = 1,793,815 × 60

Hmm, okay, so I need to compute 1,793,815 multiplied by 60.

'James decides to run 1,793,815 sprints in a week. "\n                   "He runs 60 meters with each sprint. How many total meters does he run in that week? Please provide your answer in scientific notation."\n\nAlright, so I\'ve got this problem here where James is running a bunch of sprints in a week, and I need to figure out how many total meters he runs. Let me try to break this down step by step.\n\nFirst, the problem says James decides to run 1,793,815 sprints in a week. Each sprint is 60 meters. So, I think I need to multiply the number of sprints by the distance per sprint to get the total distance. That makes sense because if he does 1 sprint, he runs 60 meters, so for 1,793,815 sprints, it should be 1,793,815 multiplied by 60.\n\nLet me write that down:\n\nTotal meters = Number of sprints × Distance per sprint\nTotal meters = 1,793,815 × 60\n\nHmm, okay, so I need to compute 1,793,815 multiplied by 60. I wonder if there\'s a quicker way to do this without having to multiply 

In [None]:
test_prompt = """"At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

Alright, so I've got this problem here. It says that at the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. I need to find the total cost he must pay.

Hmm, okay. Let me try to break this down. So, the first glass is $5. The second glass is 60% of $5, which is $3. Then the third glass is $5 again, the fourth is 60% of $5, and so on. So, it's alternating between $5 and 60% of $5.

Wait, so every pair of glasses costs $5 + (0.6 * $5). Let me calculate that. 0.6 times 5 is 3, so each pair costs $5 + $3 = $8. So, every two glasses, it's $8.

Now, Kylar wants to buy 5,364,765 glasses. Let me see how many pairs that is. Since each pair is two glasses, the number of pairs is 5,364,765 divided by 2. Let me compute that.

5,364,765 divided by 2 is 2,682,382.5. Hmm, so that's 2,682,382 full pairs and then half a pair. But since we can't have half a pair, we need to consider that the last glass is the first one of the next pair, which is $5.

Wait, let me think again. If we have an odd number of glasses, like 5,364,765, which is an odd number, then the number of pairs is (5,364,765 - 1)/2, which is 2,682,382 pairs, and then one extra glass. So, the total cost would be 2,682,382 pairs times $8 plus one glass at $5.

So, let me compute that. 2,682,382 times 8. Let me do that step by step. 2,682,382 * 8. Let's break it down:

2,000,000 * 8 = 16,000,000

682,382 * 8: Let's compute 600,000 * 8 = 4,800,000

82,382 * 8: 80,000 * 8 = 640,000

2,382 * 8: 2,000 * 8 = 16,000

382 * 8: 300 * 8 = 2,400; 80 * 8 = 640; 2 * 8 = 16. So, 2,400 + 640 + 16 = 3,056.

So, adding up:

640,000 + 16,000 = 656,000

656,000 + 3,056 = 659,056

Now, adding that to 4,800,000: 4,800,000 + 659,056 = 5,459,056

Then, adding 16,000,000: 16,000,000 + 5,459,056 = 21,459,056

So, 2,682,382 * 8 = 21,459,056

Then, adding the extra glass at $5: 21,459,056 + 5 = 21,459,061

So, the total cost is $21,459,061.

Wait, let me double-check that. So, 5,364,765 divided by 2 is 2,682,382.5, which means 2,682,382 full pairs and half a pair. But since we can't have half a pair, the last glass is the first one of the next pair, which is $5. So, yes, that makes sense.

Alternatively, another way to think about it is that for every two glasses, it's $8, so for 5,364,764 glasses, which is an even number, it would be 5,364,764 / 2 = 2,682,382 pairs, each costing $8, so 2,682,382 * 8 = 21,459,056. Then, the last glass is $5, so total is 21,459,056 + 5 = 21,459,061.

Yes, that seems consistent.

Wait, but let me make sure that I didn't make a mistake in the multiplication. Let me try another way. 2,682,382 * 8. Let's compute 2,682,382 * 8:

2,000,000 * 8 = 16,000,000

682,382 * 8: Let's compute 600,000 * 8 = 4,800,000

82,382 * 8: 80,000 * 8 = 640,000

2,382 * 8: 2,000 * 8 = 16,000

382 * 8: 300 * 8 = 2,400; 80 * 8 = 640; 2 * 8 = 16. So, 2,400 + 640 + 16 = 3,056.

So, 82,382 * 8 = 640,000 + 16,000 = 656,000

Then, 682,382 * 8 = 4,800,000 + 656,000 = 5,456,000

Wait, no, that's not right. Wait, 682,382 is 600,000 + 82,382. So, 600,000 * 8 = 4,800,000; 82,382 * 8 = 656,000. So, 4,800,000 + 656,000 = 5,456,000.

Then, 2,682,382 * 8 = 2,000,000 * 8 + 682,382 * 8 = 16,000,000 + 5,456,000 = 21,456,000.

Wait, but earlier I had 21,459,056. Hmm, so which is correct?

Wait, 2,682,382 * 8: Let's compute 2,682,382 * 8.

Let me write it down:

2,682,382

x     8

--------

Starting from the right:

2 * 8 = 16, write down 6, carryover 1.

8 * 8 = 64 + 1 = 65, write down 5, carryover 6.

3 * 8 = 24 + 6 = 30, write down 0, carryover 3.

8 * 8 = 64 + 3 = 67, write down 7, carryover 6.

2 * 8 = 16 + 6 = 22, write down 22.

Wait, but 2,682,382 is 2,682,382. So, let's do it step by step:

Starting from the rightmost digit:

2 * 8 = 16. Write down 6, carryover 1.

Next digit: 8 * 8 = 64 + 1 = 65. Write down 5, carryover 6.

Next digit: 3 * 8 = 24 + 6 = 30. Write down 0, carryover 3.

Next digit: 2 * 8 = 16 + 3 = 19. Write down 9, carryover 1.

Next digit: 8 * 8 = 64 + 1 = 65. Write down 5, carryover 6.

Next digit: 6 * 8 = 48 + 6 = 54. Write down 4, carryover 5.

Next digit: 2 * 8 = 16 + 5 = 21. Write down 21.

So, putting it all together: 21,459,056.

Ah, okay, so that's correct. So, 2,682,382 * 8 = 21,459,056.

Then, adding the last glass at $5: 21,459,056 + 5 = 21,459,061.

So, the total cost is $21,459,061.

Wait, but let me think again. Is there another way to compute this? Maybe using the formula for an arithmetic series or something.

Wait, since every two glasses cost $8, and the total number of glasses is 5,364,765, which is an odd number, so the number of pairs is (5,364,765 - 1)/2 = 2,682,382 pairs, and then one extra glass.

So, total cost is 2,682,382 * 8 + 5.

Which is exactly what I did earlier.

Alternatively, if I think of it as (number of pairs) * (cost per pair) + (cost of remaining glass).

So, 2,682,382 * 8 + 5 = 21,459,056 + 5 = 21,459,061.

Yes, that seems consistent.

Wait, but let me check with another approach. Let's compute the total cost as (number of full pairs) * (cost per pair) + (cost of remaining glass).

Number of full pairs: 5,364,765 divided by 2 is 2,682,382.5, so 2,682,382 full pairs.

Cost per pair: $5 + (0.6 * $5) = $5 + $3 = $8.

So, 2,682,382 * 8 = 21,459,056.

Remaining glasses: 5,364,765 - (2,682,382 * 2) = 5,364,765 - 5,364,764 = 1 glass.

Cost of remaining glass: $5.

Total cost: 21,459,056 + 5 = 21,459,061.

Yes, same result.

Alternatively, if I think in terms of arithmetic series, but since it's every second glass, it's not a straightforward arithmetic series. So, maybe that's not the right approach.

Alternatively, think of it as a sequence where each term is either $5 or $3.50, alternating.

But that might complicate things.

Alternatively, think of it as a geometric series, but since the price alternates, it's not a geometric series.

Wait, perhaps not. Let me think.

Wait, actually, the price alternates between $5 and $3.50, so the cost per glass alternates between $5 and $3.50.

So, the total cost is the sum of 5,364,765 terms, each term being either $5 or $3.50, alternating every glass.

So, the total cost can be written as:

Total cost = 5 * (number of $5 glasses) + 3.50 * (number of $3.50 glasses)

Now, since the number of $5 glasses and $3.50 glasses depends on whether the total number is odd or even.

Since 5,364,765 is odd, the number of $5 glasses is one more than the number of $3.50 glasses.

So, let me denote the number of $3.50 glasses as n, then the number of $5 glasses is n + 1.

So, total glasses: n + (n + 1) = 2n + 1 = 5,364,765

So, 2n + 1 = 5,364,765

Therefore, 2n = 5,364,764

n = 5,364,764 / 2 = 2,682,382

So, number of $3.50 glasses is 2,682,382, and number of $5 glasses is 2,682,383.

Therefore, total cost is:

5 * 2,682,383 + 3.50 * 2,682,382

Let me compute that.

First, compute 5 * 2,682,383:

5 * 2,682,383 = 13,411,915

Then, compute 3.50 * 2,682,382:

3.50 * 2,682,382 = ?

Well, 3 * 2,682,382 = 8,047,146

0.5 * 2,682,382 = 1,341,191

So, adding those together: 8,047,146 + 1,341,191 = 9,388,337

So, total cost is 13,411,915 + 9,388,337 = ?

Let me add those:

13,411,915 + 9,388,337

13,411,915 + 9,000,000 = 22,411,915

22,411,915 + 388,337 = 22,799,252 + 1,915? Wait, no.

Wait, 22,411,915 + 9,388,337:

22,411,915 + 9,000,000 = 31,411,915

31,411,915 + 388,337 = 31,799,252 + 1,915? Wait, no.

Wait, 31,411,915 + 388,337:

31,411,915 + 300,000 = 31,711,915

31,711,915 + 88,337 = 31,799,252

So, total cost is $31,799,252.

Wait, that's different from the previous result of $21,459,061.

Hmm, so now I have two different results. Which one is correct?

Wait, so which approach is correct?

In the first approach, I considered that every two glasses cost $8, so 5,364,765 / 2 = 2,682,382.5, so 2,682,382 pairs and one extra glass. So, total cost is 2,682,382 * 8 + 5 = 21,459,061.

In the second approach, I considered that the number of $5 glasses is one more than the number of $3.50 glasses, so 2,682,383 and 2,682,382, and total cost is 5 * 2,682,383 + 3.50 * 2,682,382 = 31,799,252.

Wait, so which one is correct?

Wait, let me check with a smaller number. Let's say Kylar wants to buy 3 glasses.

In the first approach: 3 / 2 = 1.5, so 1 pair and 1 extra glass. So, total cost is 8 + 5 = 13.

In the second approach: number of $5 glasses is 2, number of $3.50 glasses is 1. So, total cost is 5*2 + 3.50*1 = 10 + 3.50 = 13.50.

Wait, but in reality, for 3 glasses, the cost would be:

First glass: $5

Second glass: 60% of $5 = $3

Third glass: $5

Total: $5 + $3 + $5 = $13.

So, the second approach gives $13.50, which is incorrect.

Therefore, the second approach is wrong.

So, that means the first approach is correct.

Therefore, in the first approach, for 3 glasses, it's 1 pair (2 glasses) costing $8, and 1 extra glass costing $5, total $13.

Similarly, for 5,364,765 glasses, it's 2,682,382 pairs (5,364,764 glasses) costing 2,682,382 * 8 = 21,459,056, plus 1 extra glass costing $5, total $21,459,061.

Therefore, the first approach is correct.

So, the answer is $21,459,061.

Wait, but let me check with another small number to make sure.

Take 1 glass: 5.

Second glass: 3.

Third glass: 5.

Fourth glass: 3.

Fifth glass: 5.

So, for 5 glasses: 5 + 3 + 5 + 3 + 5 = 21.

Using the first approach: 5 / 2 = 2.5, so 2 pairs (4 glasses) costing 2 * 8 = 16, plus 1 extra glass costing 5, total 21. Correct.

Using the second approach: number of $5 glasses is 3, number of $3.50 glasses is 2. So, total cost is 5*3 + 3.50*2 = 15 + 7 = 22. Which is incorrect.

So, the second approach is wrong.

Therefore, the first approach is correct.

Therefore, for 5,364,765 glasses, the total cost is 21,459,061.

So, I think that's the answer.

**Final Answer**
The total cost Kylar must pay is \boxed{21459061} dollars.
</think>

Kylar wants to purchase 5,364,765 glasses, where the first glass costs $5 and every second glass costs 60% of that price.

1. The cost for the first glass is $5.
2. The cost for the second glass is 60% of $5, which is $3.
3. The cost for the third glass is $5 again, and the fourth glass is 60% of $5, which is $3.
4. This pattern continues, alternating between $5 and $3.

To find the total cost, we note that every pair of glasses costs $8 (since $5 + $3 = $8).

5. The number of pairs in 5,364,765 glasses is calculated by dividing 5,364,765 by 2, which gives 2,682,382 pairs with one extra glass.
6. The total cost for the pairs is 2,682,382 pairs multiplied by $8, which is $21,459,056.
7. The extra glass costs $5.

Adding these together, the total cost is:
\[ 21,459,056 + 5 = 21,499,061 \] """




run_query(test_prompt, max_tokens=5000)

Performing scheduled memory cleanup...


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


GPU Memory before inference: 9.94GB allocated, 10.51GB reserved

Generating response:

"At the store, one glass costs $5, but every second glass costs only 60% of that price. "
                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"

Alright, so I've got this problem here. It says that at the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. I need to find the total cost he must pay.

Hmm, okay. Let me try to break this down. So, the first glass is $5. The second glass is 60% of $5, which is $3. Then the third glass is $5 again, the fourth is 60% of $5, and so on. So, it's alternating between $5 and 60% of $5.

Wait, so every pair of glasses costs $5 + (0.6 * $5). Let me calculate that. 0.6 times 5 is 3, so each pair costs $5 + $3 = $8. So, every two glasses, it's $8.

Now, Kylar wants to buy 5,364,765 glasses. Let me see how many pairs that is. Since each pair is two gl

'"At the store, one glass costs $5, but every second glass costs only 60% of that price. "\n                   "Kylar wants to purchase 5,364,765 glasses. What is the total cost he must pay?"\n\nAlright, so I\'ve got this problem here. It says that at the store, one glass costs $5, but every second glass costs only 60% of that price. Kylar wants to purchase 5,364,765 glasses. I need to find the total cost he must pay.\n\nHmm, okay. Let me try to break this down. So, the first glass is $5. The second glass is 60% of $5, which is $3. Then the third glass is $5 again, the fourth is 60% of $5, and so on. So, it\'s alternating between $5 and 60% of $5.\n\nWait, so every pair of glasses costs $5 + (0.6 * $5). Let me calculate that. 0.6 times 5 is 3, so each pair costs $5 + $3 = $8. So, every two glasses, it\'s $8.\n\nNow, Kylar wants to buy 5,364,765 glasses. Let me see how many pairs that is. Since each pair is two glasses, the number of pairs is 5,364,765 divided by 2. Let me compute that.