In [1]:
%load_ext autoreload
%autoreload 2

# Search for search strategies and hyper-parameters


In [2]:
import argparse
import torch
import json
import os

import time
from collections import defaultdict
from datetime import datetime
from vllm import LLM, SamplingParams
from transformers import set_seed

from agent_search.prompt import PromptTemplate
from agent_search.agent import VLLMPythonMathAgent, AgentSearchCfg

In [3]:
GPU_DTYPE = torch.float16
LOCAL_GPU_MEM_GB = 80
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [4]:
parser = argparse.ArgumentParser(
    description="Search for search strategies and hyper-parameters", allow_abbrev=False
)

parser.add_argument(
    "--model_id_or_path",
    type=str,
    default="deepseek-ai/deepseek-math-7b-rl",
    help="The HF model ID or path to the model",
)
parser.add_argument(
    "--seed",
    type=int,
    default=42,
    help="The HF model ID or path to the model",
)
parser.add_argument(
    "--gpu_mem_util",
    type=float,
    default=0.9,
    help="The fraction of GPU memory to use",
)
parser.add_argument(
    "--gen_dset_fpath",
    type=str,
    help="The path to the dataset to generate the search space from",
)

parser.add_argument(
    "--temperature",
    type=float,
    default=0.6,
    help="The temperature to use for sampling",
)

parser.add_argument(
    "--code_temperature",
    type=float,
    default=0.6,
    help="The temperature to use for sampling code",
)

parser.add_argument(
    "--init_temperature",
    type=float,
    default=1.0,
    help="The temperature to use for init sampling",
)

parser.add_argument(
    "--top_p",
    type=float,
    default=0.95,
    help="The top_p to use for sampling",
)

parser.add_argument(
    "--allow_timeout",
    action="store_true",
    help="Whether to allow timeout when executing code",
)

parser.add_argument(
    "--allow_err",
    action="store_true",
    help="Whether to allow errors when executing code",
)

parser.add_argument(
    "--allow_empty_output",
    action="store_true",
    help="Whether to allow empty outputs when executing code",
)

parser.add_argument(
    "--n_fin_ans",
    type=int,
    default=8,
    help="The number of answers to sample per problem",
)


parser.add_argument(
    "--n_max_out_tok",
    type=int,
    default=4096,
    help="The maximum number of tokens to generate. Few completion are longer than 1024 tokens",
)

parser.add_argument(
    "--n_max_tot_tok",
    type=int,
    default=4096,
    help="The maximum number of tokens to generate",
)

parser.add_argument(
    "--n_init_gen",
    type=int,
    default=1,
    help="The number of init generations",
)

parser.add_argument(
    "--n_init_sample",
    type=int,
    default=16,
    help="The number of samples when init sampling",
)

parser.add_argument(
    "--n_max_sample",
    type=int,
    default=1024,
    help="The maximum number of samples for trial per problem",
)

parser.add_argument(
    "--beam_width",
    type=int,
    default=1,
    help="The beam width to use for sampling",
)

parser.add_argument(
    "--n_max_call",
    type=int,
    default=None,
    help="DeepSeekMath-7B-RL usually can not correctly solve problems with more than 2 calls",
)

parser.add_argument(
    "--instr",
    type=str,
    default="The final answer must be a non-negative integer.",
    # default="""To accomplish this, first determine a sympy-based approach for solving the problem by listing each step to take and what functions need to be called in each step.
    # Be clear so even an idiot can follow your instructions.
    # Write the entire script covering all the steps (use comments and document it well) and print the result.
    # The intermediary calculations may be free-form, but the final answer should always be an non-negative integer.
    # Your final answer should always be a non-negative integer, not an algebraic expression!
    # Your final answer should always be a non-negative integer, not an algebraic expression!
    # Your final answer should always be a non-negative integer, not an algebraic expression!
    # After solving the problem, output the final answer within \\boxed{}.""",
    help="The instruction to append to the problem.",
)

parser.add_argument(
    "--rag",
    action="store_true",
    help="Whether to use RAG model for generation",
)

args, _ = parser.parse_known_args()

In [5]:
set_seed(args.seed)

In [6]:
ICL_EG_QA_MAP = {
    """
Let $k, l > 0$ be parameters.
The parabola $y = kx^2 - 2kx + l$ intersects the line $y = 4$ at two points $A$ and $B$.
These points are distance 6 apart.
What is the sum of the squares of the distances from $A$ and $B$ to the origin?
    """: r'''
```python
from sympy import symbols, solve, sqrt

def sum_of_squares_of_distances():
    """Let $k, l > 0$ be parameters. The parabola $y = kx^2 - 2kx + l$ intersects the line $y = 4$ at two points $A$ and $B$. These points are distance 6 apart. What is the sum of the squares of the distances from $A$ and $B$ to the origin?
"""
    x, k, l = symbols('x k l')
    # Equation of the parabola
    parabola_eq = k*x**2 - 2*k*x + l - 4
    # Solve for x when y = 4
    x_values = solve(parabola_eq, x)
    # Distance from A and B to the origin
    distance_A = sqrt(x_values[0]**2 + 4**2)
    distance_B = sqrt(x_values[1]**2 + 4**2)
    # The sum of the squares of the distances
    sum_of_squares = distance_A**2 + distance_B**2
    # Substitute the condition that the distance between A and B is 6
    sum_of_squares = sum_of_squares.subs(sqrt((x_values[1] - x_values[0])**2), 6)

    return sum_of_squares

result = sum_of_squares_of_distances()
print(result)
```

```output
32 + (k - sqrt(k*(k - l + 4)))**2/k**2 + (k + sqrt(k*(k - l + 4)))**2/k**2
```

The code seem to fail to solve the equations directly. We need to further simplify the equation.

We know that the distance between $A$ and $B$ is $x_2-x_1=6$, and the Vieta's formulas give us that the sum of the roots of the equation $kx^2 - 2kx + l - 4 = 0$ is $x_1 +x_2 =-\frac{b}{a} = -\frac{-2k}{k} = 2$, and the product of the roots is $x_1x_2 = \frac{c}{a} = \frac{l - 4}{k}$. So we can use the identity $\left(x_1 - x_2\right)^2 = \left(x_1 + x_2\right)^2 - 4x_1x_2$ to find a relationship between $k$ and $l$.

By substitution, we have $\left(x_1 - x_2\right)^{2} = - 4\cdot\frac{l - 4}{k} = 6^2$, which simplifies to $4 - \frac{4l - 16}{k} = 36$, or $\frac{4l - 16}{k} = -32$. Multiplying both sides by $k$ gives $4l - 16 = -32k$, or $4l = 16 - 32k$. Dividing both sides by 4 gives $l = 4 - 8k$.

Now, we can substitute this expression for $l$ into the sum of squares of distances to get the final result.

```python
def sum_of_squares_of_distances_final():
    """Let $k, l > 0$ be parameters. The parabola $y = kx^2 - 2kx + l$ intersects the line $y = 4$ at two points $A$ and $B$. These points are distance 6 apart. What is the sum of the squares of the distances from $A$ and $B$ to the origin?
"""
    x, k = symbols('x k')
    # Equation of the parabola with l = 4 - 8k
    parabola_eq = k*x**2 - 2*k*x + 4 - 8*k - 4
    # Solve for x when y = 4
    x_values = solve(parabola_eq, x)
    # Distance from A and B to the origin
    distance_A = sqrt(x_values[0]**2 + 4**2)
    distance_B = sqrt(x_values[1]**2 + 4**2)
    # The sum of the squares of the distances
    sum_of_squares = distance_A**2 + distance_B**2
    # Substitute the condition that the distance between A and B is 6
    sum_of_squares = sum_of_squares.subs(sqrt((x_values[1] - x_values[0])**2), 6)

    return sum_of_squares

result = sum_of_squares_of_distances_final()
print(result)
```

```output
52
```

So, the sum of the squares of the distances from $A$ and $B$ to the origin is $52$. The answer is $\boxed{52}$.
'''
}

ICL_EG_QA_MAP = {
    k.replace("\n", " ").strip(): v.strip() for k, v in ICL_EG_QA_MAP.items()
}
for i, (q, a) in enumerate(ICL_EG_QA_MAP.items()):
    print(f"### Problem {i}")
    print(f"#### Problem")
    print(q)
    print(f"#### Solution")
    print(a)

### Problem 0
#### Problem
Let $k, l > 0$ be parameters. The parabola $y = kx^2 - 2kx + l$ intersects the line $y = 4$ at two points $A$ and $B$. These points are distance 6 apart. What is the sum of the squares of the distances from $A$ and $B$ to the origin?
#### Solution
```python
from sympy import symbols, solve, sqrt

def sum_of_squares_of_distances():
    """Let $k, l > 0$ be parameters. The parabola $y = kx^2 - 2kx + l$ intersects the line $y = 4$ at two points $A$ and $B$. These points are distance 6 apart. What is the sum of the squares of the distances from $A$ and $B$ to the origin?
"""
    x, k, l = symbols('x k l')
    # Equation of the parabola
    parabola_eq = k*x**2 - 2*k*x + l - 4
    # Solve for x when y = 4
    x_values = solve(parabola_eq, x)
    # Distance from A and B to the origin
    distance_A = sqrt(x_values[0]**2 + 4**2)
    distance_B = sqrt(x_values[1]**2 + 4**2)
    # The sum of the squares of the distances
    sum_of_squares = distance_A**2 + distance_B*

In [7]:
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

args.gen_dset_fpath = (
    "/ssddata/tongyx/projects/agent-search/data/iid_hardest_non_neg_int_ans_math.json"
)
args.allow_timeout = True
args.allow_err = True
args.allow_empty_output = True

args.rag = True

In [8]:
llm = LLM(
    model=args.model_id_or_path,
    tokenizer=args.model_id_or_path,
    tensor_parallel_size=torch.cuda.device_count(),
    dtype=GPU_DTYPE,
    seed=args.seed,
    trust_remote_code=True,
    gpu_memory_utilization=args.gpu_mem_util,
    swap_space=args.gpu_mem_util * LOCAL_GPU_MEM_GB,
)



INFO 06-25 06:35:23 llm_engine.py:161] Initializing an LLM engine (v0.5.0.post1) with config: model='deepseek-ai/deepseek-math-7b-rl', speculative_config=None, tokenizer='deepseek-ai/deepseek-math-7b-rl', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=42, served_model_name=deepseek-ai/deepseek-math-7b-rl)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


INFO 06-25 06:35:42 weight_utils.py:218] Using model weights format ['*.safetensors']
INFO 06-25 06:35:48 model_runner.py:160] Loading model weights took 12.8725 GB
INFO 06-25 06:35:48 gpu_executor.py:83] # GPU blocks: 7826, # CPU blocks: 9830
INFO 06-25 06:37:29 model_runner.py:889] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 06-25 06:37:29 model_runner.py:893] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INFO 06-25 06:37:38 model_runner.py:965] Graph capturing finished in 9 secs.


In [9]:
with open(args.gen_dset_fpath, "r") as f:
    dset = json.load(f)
print(f"Loaded dataset with {len(dset)} problems")

Loaded dataset with 151 problems


In [10]:
sampling_params = SamplingParams(
    n=args.beam_width,
    temperature=args.temperature,
    top_p=args.top_p,
    max_tokens=args.n_max_out_tok,
    skip_special_tokens=True,
    # prompt_logprobs=1,
    # logprobs=0,
    seed=args.seed,
)
print(f"{sampling_params=}")
search_cfg = AgentSearchCfg(
    sampling_params=sampling_params,
    n_fin_ans=args.n_fin_ans,
    n_init_sample=args.n_init_sample,
    n_max_sample=args.n_max_sample,
    n_init_gen=args.n_init_gen,
    init_temperature=args.init_temperature,
    code_temperature=args.code_temperature,
    n_max_tot_tok=args.n_max_tot_tok,
    n_max_call=args.n_max_call,
    allow_timeout=args.allow_timeout,
    allow_err=args.allow_err,
    allow_empty_output=args.allow_empty_output,
)
print(f"{search_cfg.__dict__=}")


agent = VLLMPythonMathAgent(
    llm,
    prompt_template="deepseekmath-tool",
    search_cfg=search_cfg,
    code_cell_template="deepseekmath",
    rag_eg_qa_map=ICL_EG_QA_MAP if args.rag else None,
    eq=lambda x, y: x % 1000 == y % 1000,
)

agent.ctx_template.prompt_template.prompt_after_query = (
    " " + args.instr + agent.ctx_template.prompt_template.prompt_after_query
)

sampling_params=SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.95, top_k=-1, min_p=0.0, seed=42, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=[], stop_token_ids=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4096, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None)
search_cfg.__dict__={'sampling_params': SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.95, top_k=-1, min_p=0.0, seed=42, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=[], stop_token_ids=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4096, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None), 'n_fin_ans': 8, '

In [11]:
test_home = os.path.splitext(args.gen_dset_fpath)[0]
test_timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
test_dir = os.path.join(test_home, f"test-{test_timestamp}")
domain_correct_idxs = defaultdict(list)

problem_times = []

try:
    for idx, dp in enumerate(dset[29:]):
        search_timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
        # search_dir = os.path.join(seed_dir, f"problem-{idx}-{search_timestamp}")
        search_dir = os.path.join(test_dir, f"problem-{idx}-{search_timestamp}")
        print(f"### {idx}")
        print("#### Problem")
        print(dp["query"])
        print("#### Solution")
        problem_start = time.time()
        ans = agent.search(init_input=dp["query"], output_dir=search_dir) % 1000
        problem_time = time.time() - problem_start
        problem_times.append(problem_time)
        mod_ref_ans = dp["ref_ans"] % 1000
        eq = ans == mod_ref_ans
        if eq and "domain" in dp:
            domain_correct_idxs[dp["domain"]].append(idx)
        print(f"Pred. ({ans}) {'=' if eq else '!'}= Ref. ({dp['ref_ans']} % 1000)")
        print(f"Problem time: {problem_time:.2f}s")
        print(
            f"Avg. Time so far: {sum(problem_times)/len(problem_times):.2f}s (= {sum(problem_times):.2f} / {len(problem_times)})"
        )
        all_correct_idxs = sum(domain_correct_idxs.values(), [])
        print(
            f"Acc. so far: {len(all_correct_idxs) / (idx + 1):.2%} (= {len(all_correct_idxs)} / {idx + 1})"
        )
except BaseException as e:
    raise e
finally:
    all_correct_idxs = sum(domain_correct_idxs.values(), [])
    domain_correct_cnt = {k: len(v) for k, v in domain_correct_idxs.items()}

    metrics = {
        "search_cfg": {
            **{k: v for k, v in search_cfg.__dict__.items() if k != "sampling_params"},
            "sampling_params": search_cfg.sampling_params.__repr__(),
        },
        "acc": len(all_correct_idxs) / len(dset),
        "domain_correct_cnt": domain_correct_cnt,
        "avg_time": sum(problem_times) / len(problem_times),
        "domain_correct_idxs": domain_correct_idxs,
        "problem_times": problem_times,
    }

    with open(os.path.join(test_dir, f"metrics.json"), "w") as f:
        json.dump(metrics, f, ensure_ascii=False, indent=2)

### 0
#### Problem
When the expression $-2x^2-20x-53$ is written in the form $a(x+d)^2+e$, where $a$, $d$, and $e$ are constants, what is the sum $a+d+e$?
#### Solution
len(trajs)=1
sampling_params.n=16


Processed prompts: 100%|██████████| 1/1 [00:04<00:00,  4.95s/it, est. speed input: 282.72 toks/s, output: 766.66 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 0 | continue: 16
After single selection, # of trajectories: finished: 0 | dropped: 0 | continue: 16
After multi-selection,  # of trajectories: finished: 0 | dropped: 0 | continue: 16



Executing: 100%|██████████| 16/16 [00:00<00:00, 29.97it/s]


After execution, # of trajectories: finished: 0 | dropped: 0 | continue: 16
len(trajs)=16
sampling_params.n=1


Processed prompts: 100%|██████████| 16/16 [00:11<00:00,  1.39it/s, est. speed input: 2304.84 toks/s, output: 479.18 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 0 | continue: 16
After single selection, # of trajectories: finished: 0 | dropped: 1 | continue: 15
After multi-selection,  # of trajectories: finished: 0 | dropped: 1 | continue: 15



Executing: 100%|██████████| 15/15 [00:00<00:00, 34.71it/s]


After execution, # of trajectories: finished: 0 | dropped: 1 | continue: 15
len(trajs)=15
sampling_params.n=1


Processed prompts: 100%|██████████| 15/15 [00:11<00:00,  1.29it/s, est. speed input: 2613.03 toks/s, output: 431.05 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 1 | continue: 15
After single selection, # of trajectories: finished: 0 | dropped: 1 | continue: 15
After multi-selection,  # of trajectories: finished: 0 | dropped: 1 | continue: 15



Executing: 100%|██████████| 15/15 [00:00<00:00, 25.12it/s]


After execution, # of trajectories: finished: 0 | dropped: 1 | continue: 15
len(trajs)=15
sampling_params.n=1


Processed prompts: 100%|██████████| 15/15 [00:12<00:00,  1.18it/s, est. speed input: 2793.37 toks/s, output: 401.43 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 1 | continue: 15
After single selection, # of trajectories: finished: 0 | dropped: 1 | continue: 15
After multi-selection,  # of trajectories: finished: 0 | dropped: 1 | continue: 15



Executing: 100%|██████████| 15/15 [00:00<00:00, 23.35it/s]


After execution, # of trajectories: finished: 0 | dropped: 1 | continue: 15
len(trajs)=15
sampling_params.n=1


Processed prompts: 100%|██████████| 15/15 [00:13<00:00,  1.11it/s, est. speed input: 3034.80 toks/s, output: 365.98 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 1 | continue: 15
After single selection, # of trajectories: finished: 0 | dropped: 1 | continue: 15
After multi-selection,  # of trajectories: finished: 0 | dropped: 1 | continue: 15



Executing: 100%|██████████| 15/15 [00:00<00:00, 19.19it/s]


After execution, # of trajectories: finished: 0 | dropped: 1 | continue: 15
len(trajs)=15
sampling_params.n=1


Processed prompts: 100%|██████████| 15/15 [00:14<00:00,  1.01it/s, est. speed input: 3136.93 toks/s, output: 343.01 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 1 | continue: 15
After single selection, # of trajectories: finished: 0 | dropped: 2 | continue: 14
After multi-selection,  # of trajectories: finished: 0 | dropped: 2 | continue: 14



Executing: 100%|██████████| 14/14 [00:00<00:00, 16.03it/s]


After execution, # of trajectories: finished: 0 | dropped: 2 | continue: 14
len(trajs)=14
sampling_params.n=1


Processed prompts: 100%|██████████| 14/14 [00:13<00:00,  1.05it/s, est. speed input: 3579.71 toks/s, output: 318.38 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 2 | continue: 14
After single selection, # of trajectories: finished: 0 | dropped: 4 | continue: 12
After multi-selection,  # of trajectories: finished: 0 | dropped: 4 | continue: 12



Executing: 100%|██████████| 12/12 [00:00<00:00, 13.36it/s]


After execution, # of trajectories: finished: 0 | dropped: 4 | continue: 12
len(trajs)=12
sampling_params.n=1


Processed prompts: 100%|██████████| 12/12 [00:10<00:00,  1.12it/s, est. speed input: 4118.56 toks/s, output: 261.71 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 4 | continue: 12
After single selection, # of trajectories: finished: 0 | dropped: 13 | continue: 3
After multi-selection,  # of trajectories: finished: 0 | dropped: 13 | continue: 3



Executing: 100%|██████████| 3/3 [00:00<00:00,  7.92it/s]


After execution, # of trajectories: finished: 0 | dropped: 13 | continue: 3
len(trajs)=3
sampling_params.n=1


Processed prompts: 100%|██████████| 3/3 [00:04<00:00,  1.52s/it, est. speed input: 2208.81 toks/s, output: 135.03 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 13 | continue: 3
After single selection, # of trajectories: finished: 0 | dropped: 13 | continue: 3
After multi-selection,  # of trajectories: finished: 0 | dropped: 13 | continue: 3



Executing: 100%|██████████| 3/3 [00:00<00:00, 10.68it/s]


After execution, # of trajectories: finished: 0 | dropped: 13 | continue: 3
len(trajs)=3
sampling_params.n=1


Processed prompts: 100%|██████████| 3/3 [00:04<00:00,  1.56s/it, est. speed input: 2302.94 toks/s, output: 147.93 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 13 | continue: 3
After single selection, # of trajectories: finished: 0 | dropped: 14 | continue: 2
After multi-selection,  # of trajectories: finished: 0 | dropped: 14 | continue: 2



Executing: 100%|██████████| 2/2 [00:00<00:00, 179.96it/s]


After execution, # of trajectories: finished: 0 | dropped: 14 | continue: 2
len(trajs)=2
sampling_params.n=1


Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.56s/it, est. speed input: 2360.09 toks/s, output: 111.45 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 14 | continue: 2
After single selection, # of trajectories: finished: 0 | dropped: 14 | continue: 2
After multi-selection,  # of trajectories: finished: 0 | dropped: 14 | continue: 2



Executing: 100%|██████████| 2/2 [00:00<00:00, 18.26it/s]


After execution, # of trajectories: finished: 0 | dropped: 14 | continue: 2
len(trajs)=2
sampling_params.n=1


Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.70s/it, est. speed input: 2283.55 toks/s, output: 112.17 toks/s]


After completion,       # of trajectories: finished: 0 | dropped: 14 | continue: 2
After single selection, # of trajectories: finished: 0 | dropped: 16 | continue: 0
After multi-selection,  # of trajectories: finished: 0 | dropped: 16 | continue: 0
len(trajs)=1
sampling_params.n=16


Processed prompts: 100%|██████████| 1/1 [00:04<00:00,  4.91s/it, est. speed input: 284.70 toks/s, output: 772.03 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 16 | continue: 16
After single selection, # of trajectories: finished: 0 | dropped: 16 | continue: 16
After multi-selection,  # of trajectories: finished: 0 | dropped: 16 | continue: 16



Executing: 100%|██████████| 16/16 [00:00<00:00, 44.78it/s]


After execution, # of trajectories: finished: 0 | dropped: 16 | continue: 16
len(trajs)=16
sampling_params.n=1


Processed prompts: 100%|██████████| 16/16 [00:11<00:00,  1.39it/s, est. speed input: 2310.83 toks/s, output: 480.45 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 16 | continue: 16
After single selection, # of trajectories: finished: 0 | dropped: 17 | continue: 15
After multi-selection,  # of trajectories: finished: 0 | dropped: 17 | continue: 15



Executing: 100%|██████████| 15/15 [00:00<00:00, 35.49it/s]


After execution, # of trajectories: finished: 0 | dropped: 17 | continue: 15
len(trajs)=15
sampling_params.n=1


Processed prompts: 100%|██████████| 15/15 [00:11<00:00,  1.29it/s, est. speed input: 2609.13 toks/s, output: 430.41 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 17 | continue: 15
After single selection, # of trajectories: finished: 0 | dropped: 17 | continue: 15
After multi-selection,  # of trajectories: finished: 0 | dropped: 17 | continue: 15



Executing: 100%|██████████| 15/15 [00:00<00:00, 24.55it/s]


After execution, # of trajectories: finished: 0 | dropped: 17 | continue: 15
len(trajs)=15
sampling_params.n=1


Processed prompts: 100%|██████████| 15/15 [00:12<00:00,  1.17it/s, est. speed input: 2782.41 toks/s, output: 399.85 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 17 | continue: 15
After single selection, # of trajectories: finished: 0 | dropped: 17 | continue: 15
After multi-selection,  # of trajectories: finished: 0 | dropped: 17 | continue: 15



Executing: 100%|██████████| 15/15 [00:00<00:00, 24.70it/s]


After execution, # of trajectories: finished: 0 | dropped: 17 | continue: 15
len(trajs)=15
sampling_params.n=1


Processed prompts: 100%|██████████| 15/15 [00:13<00:00,  1.10it/s, est. speed input: 3016.12 toks/s, output: 363.72 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 17 | continue: 15
After single selection, # of trajectories: finished: 0 | dropped: 17 | continue: 15
After multi-selection,  # of trajectories: finished: 0 | dropped: 17 | continue: 15



Executing: 100%|██████████| 15/15 [00:00<00:00, 18.95it/s]


After execution, # of trajectories: finished: 0 | dropped: 17 | continue: 15
len(trajs)=15
sampling_params.n=1


Processed prompts: 100%|██████████| 15/15 [00:14<00:00,  1.01it/s, est. speed input: 3129.15 toks/s, output: 342.16 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 17 | continue: 15
After single selection, # of trajectories: finished: 0 | dropped: 18 | continue: 14
After multi-selection,  # of trajectories: finished: 0 | dropped: 18 | continue: 14



Executing: 100%|██████████| 14/14 [00:00<00:00, 15.55it/s]


After execution, # of trajectories: finished: 0 | dropped: 18 | continue: 14
len(trajs)=14
sampling_params.n=1


Processed prompts: 100%|██████████| 14/14 [00:13<00:00,  1.04it/s, est. speed input: 3559.90 toks/s, output: 316.62 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 18 | continue: 14
After single selection, # of trajectories: finished: 0 | dropped: 20 | continue: 12
After multi-selection,  # of trajectories: finished: 0 | dropped: 20 | continue: 12



Executing: 100%|██████████| 12/12 [00:00<00:00, 12.80it/s]


After execution, # of trajectories: finished: 0 | dropped: 20 | continue: 12
len(trajs)=12
sampling_params.n=1


Processed prompts: 100%|██████████| 12/12 [00:10<00:00,  1.12it/s, est. speed input: 4114.41 toks/s, output: 261.45 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 20 | continue: 12
After single selection, # of trajectories: finished: 0 | dropped: 29 | continue: 3
After multi-selection,  # of trajectories: finished: 0 | dropped: 29 | continue: 3



Executing: 100%|██████████| 3/3 [00:00<00:00, 13.38it/s]


After execution, # of trajectories: finished: 0 | dropped: 29 | continue: 3
len(trajs)=3
sampling_params.n=1


Processed prompts: 100%|██████████| 3/3 [00:04<00:00,  1.52s/it, est. speed input: 2214.49 toks/s, output: 135.37 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 29 | continue: 3
After single selection, # of trajectories: finished: 0 | dropped: 29 | continue: 3
After multi-selection,  # of trajectories: finished: 0 | dropped: 29 | continue: 3



Executing: 100%|██████████| 3/3 [00:00<00:00, 12.47it/s]


After execution, # of trajectories: finished: 0 | dropped: 29 | continue: 3
len(trajs)=3
sampling_params.n=1


Processed prompts: 100%|██████████| 3/3 [00:04<00:00,  1.55s/it, est. speed input: 2308.86 toks/s, output: 148.31 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 29 | continue: 3
After single selection, # of trajectories: finished: 0 | dropped: 30 | continue: 2
After multi-selection,  # of trajectories: finished: 0 | dropped: 30 | continue: 2



Executing: 100%|██████████| 2/2 [00:00<00:00,  9.52it/s]


After execution, # of trajectories: finished: 0 | dropped: 30 | continue: 2
len(trajs)=2
sampling_params.n=1


Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.55s/it, est. speed input: 2371.99 toks/s, output: 112.02 toks/s]

After completion,       # of trajectories: finished: 0 | dropped: 30 | continue: 2
After single selection, # of trajectories: finished: 0 | dropped: 30 | continue: 2
After multi-selection,  # of trajectories: finished: 0 | dropped: 30 | continue: 2



Executing: 100%|██████████| 2/2 [00:00<00:00, 24.86it/s]


After execution, # of trajectories: finished: 0 | dropped: 30 | continue: 2
len(trajs)=2
sampling_params.n=1


Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.63s/it, est. speed input: 2383.19 toks/s, output: 117.07 toks/s]


After completion,       # of trajectories: finished: 0 | dropped: 30 | continue: 2
After single selection, # of trajectories: finished: 0 | dropped: 32 | continue: 0
After multi-selection,  # of trajectories: finished: 0 | dropped: 32 | continue: 0
len(trajs)=1
sampling_params.n=16


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

ZeroDivisionError: division by zero