In [1]:
import os, psutil, gc
import time 
import json
import pprint

import re

from collections import defaultdict
import random

import numpy as np 

import matplotlib.pyplot as plt 
import seaborn as sns
sns.set_style("ticks")
sns.set_palette("tab20")
colors = sns.color_palette("bright")

import warnings

In [3]:
import torch 
from torch.nn import CrossEntropyLoss
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer
from vllm import LLM, SamplingParams, PoolingParams

from sal.config import Config
from sal.models.reward_models import PRM
from sal.utils.score import aggregate_scores
from sal.search.utils import build_conv, generate_k_steps, last

from core.reward_models import RLHFFlow

from datasets import Dataset, load_dataset

from core import best_of_n
from utils.load_data import load_data_prm800k
from utils import grader 

In [6]:
# base_dir
base_dir = '/groups/kjun/tnn/datasets/'

# dataset path
data_dir = base_dir + "/prm800k/math_splits"
# data_dir = base_dir + "/math500"

# llm and prm path
llm_dir = base_dir + "/Llama-3.2-1B-Instruct-GGUF/Llama-3.2-1B-Instruct.Q4_K_M.gguf"
prm_dir = base_dir + "/Llama3.1-8B-PRM-Deepseek-Data-GGUF/Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf"

llm_tokenizer_dir = base_dir + "/Llama-3.2-1B-Instruct"
prm_tokenizer_dir = base_dir + "/Llama3.1-8B-PRM-Deepseek-Data"

In [11]:
dataset_name = "HuggingFaceH4/Llama-3.2-1B-Instruct-best-of-N-completions" 
dataset_split = 'train'
# config_name = "HuggingFaceH4_MATH-500--T-0.8--top_p-1.0--n-256--max_tokens-2048--bsz-8--seed-0--agg_strategy-last" 
config_name = "HuggingFaceH4_MATH-500--T-0.8--top_p-1.0--n-256--max_tokens-2048--bsz-8--seed-0--agg_strategy-last"
config_name = "bon--n-256--level-4--train--v01--chunk-0_200--trial-0"

# dataset = load_dataset(dataset_name, split=dataset_split, name=config_name, cache_dir=data_dir)
dataset = load_dataset("json", data_files = f"results/{config_name}.jsonl", split='train')

Generating train split: 0 examples [00:00, ? examples/s]

In [8]:
tokenizer = AutoTokenizer.from_pretrained(llm_tokenizer_dir)
llm_tf = AutoModelForCausalLM.from_pretrained(llm_tokenizer_dir).to("cuda:3")
# model_regular.generation_config.pad_token_id = tokenizer.eos_token_id
gc.collect();torch.cuda.empty_cache();
print('#--- memory:', torch.cuda.memory_allocated(3)/(1024**3))

#--- memory: 4.6037678718566895


In [53]:
def split_steps(text):
    # Find all start positions of steps
    step_starts = [match.start() for match in re.finditer(r'## Step \d+:', text)]
    step_starts.append(len(text))  # Add end of text as final boundary

    steps = []
    for i in range(len(step_starts) - 1):
        chunk = text[step_starts[i]:step_starts[i+1]]
        steps.append(chunk.strip())

    # Extend the last step to include final answer if present
    if steps and r'\boxed' in text:
        # Append the last boxed line to the final step if not already included
        final_answer_match = re.search(r'The final answer is:.*?\\boxed\{.*?\}', text)
        if final_answer_match:
            final_answer = final_answer_match.group(0).strip()
            if final_answer not in steps[-1]:
                steps[-1] += '\n\n' + final_answer

    return steps

def split_steps(text):
    # Find all step headers and their positions
    step_matches = list(re.finditer(r"## Step \d+", text))
    step_positions = [(m.start(), m.group(0)) for m in step_matches]

    steps = []

    # Extract each step block based on positions
    for i in range(len(step_positions)):
        start_idx = step_positions[i][0]
        end_idx = step_positions[i + 1][0] if i + 1 < len(step_positions) else len(text)
        # step_text = text[start_idx:end_idx]
        step_text = text[start_idx:end_idx].strip()
        steps.append(step_text)

    if len(steps) == 0:
        return steps 

    steps_adjusted = steps[:-1]
    # print(steps_adjusted)
    newlines = "\n\n"
    # print(text)
    # print(steps)
    last_step = steps[-1].split(newlines)
    # for step in last_step:
    #     print("\n->")
    #     print(step)
    # print(last_step)
    tmp_step = newlines.join(last_step[:-1])
    tmp_step = tmp_step.strip()
    steps_adjusted.append(tmp_step)
    steps_adjusted.append(last_step[-1].strip())
    
    # for step in steps_adjusted:
    #     print("\n->")
    #     print(step)
    # # print(steps)
    # stop
    # # Extract final answer sentence with \boxed{}
    # final_answer_match = re.search(r"The final answer is:.*?\\boxed\{.*?\}", text)
    # final_answer = final_answer_match.group(0).strip() if final_answer_match else None

    return steps_adjusted

text = '''
## Step 1: Understand the problem
The circle has center $Q$ and a radius of 14 inches. Two smaller semicircles are tangent to each other and to the larger semicircle. The problem asks for the radius of the smaller semicircle.

## Step 2: Recognize the relevant geometry
The right angle formed by the radii with the common tangent lines is crucial. It implies a right-angled triangle $\triangle ABQ$, where $A$ is the center of the larger circle, $B$ is the point of tangency on the smaller semicircle, $Q$ is the center of the smaller circle, and $Q$ is the point of tangency with the larger semicircle.

## Step 3: Identify the right triangle
To solve the problem efficiently, we can use the properties of right-angled triangles. We can see that $\triangle ABQ$ is an isosceles right triangle because both radii are of equal length.

## Step 4: Apply the Pythagorean Theorem
Let's denote the radius of the smaller semicircle as $x$. Using the Pythagorean Theorem, we can relate the sides of the triangle. We know that $AQ = QB = 14$, and $AB = \sqrt{14^2 + x^2}$. We can use the fact that $AB$ is the hypotenuse of the right triangle $\triangle ABQ$. We can thus write: $\sqrt{14^2 + x^2} = \sqrt{14^2 + 14^2}$

## Step 5: Solve for the radius of the smaller semicircle
Now we can simplify the equation and solve for $x$: $\sqrt{14^2 + x^2} = \sqrt{2 \cdot 14^2}$, $x = \sqrt{2 \cdot 14^2 - 14^2}$, $x = \sqrt{28 \cdot 14}$, $x = \sqrt{392}$, $x = 14\sqrt{2}$


The final answer is: $\boxed{14\sqrt{2}}$
'''
steps = split_steps(text)
for step in steps:
    print("\n->")
    print(step)


->
## Step 1: Understand the problem
The circle has center $Q$ and a radius of 14 inches. Two smaller semicircles are tangent to each other and to the larger semicircle. The problem asks for the radius of the smaller semicircle.

->
## Step 2: Recognize the relevant geometry
The right angle formed by the radii with the common tangent lines is crucial. It implies a right-angled triangle $	riangle ABQ$, where $A$ is the center of the larger circle, $B$ is the point of tangency on the smaller semicircle, $Q$ is the center of the smaller circle, and $Q$ is the point of tangency with the larger semicircle.

->
## Step 3: Identify the right triangle
To solve the problem efficiently, we can use the properties of right-angled triangles. We can see that $	riangle ABQ$ is an isosceles right triangle because both radii are of equal length.

->
## Step 4: Apply the Pythagorean Theorem
Let's denote the radius of the smaller semicircle as $x$. Using the Pythagorean Theorem, we can relate the sides 

In [12]:
level = 4
dataset_by_level = dataset.filter(lambda example: example['level'] == level)

Filter:   0%|          | 0/200 [00:00<?, ? examples/s]

In [13]:
print(dataset_by_level)

Dataset({
    features: ['problem', 'solution', 'answer', 'subject', 'level', 'unique_id', 'completions', 'scores', 'pred', 'completion_tokens', 'agg_scores', 'pred_weighted@4', 'pred_maj@4', 'pred_naive@4', 'pred_weighted@8', 'pred_maj@8', 'pred_naive@8', 'pred_weighted@16', 'pred_maj@16', 'pred_naive@16', 'pred_weighted@32', 'pred_maj@32', 'pred_naive@32', 'pred_weighted@64', 'pred_maj@64', 'pred_naive@64', 'pred_weighted@128', 'pred_maj@128', 'pred_naive@128'],
    num_rows: 200
})


In [63]:
config = Config()

all_data = []
for q_idx, data in enumerate(dataset_by_level):
    if q_idx > 2:
        continue
    # pprint.pprint(data)
    # print(len(data["scores"]))
    
    gt_cot, gt_answer  = parser.parse_ground_truth(data, 'math')
    
    cnt = 0
    for i_idx, completion in enumerate(data['completions']):
        # if depth >= len(scores):
        #     continue
        if i_idx > 5:
            continue

        steps = split_steps(completion)
        if len(steps) == 0:
            continue

        if len(steps) != len(scores):
            # print("errors")
            # print(len(scores))
            # print(completion)
            cnt += 1
            continue
        
        c_answer = grader.extract_last_boxed_answer(completion)
        # print(completion)
        # print(c_answer)
        is_correct = 0        
        with warnings.catch_warnings(record=True) as w:
            # warnings.simplefilter("always")  # Capture all warnings, even if ignored previously
            if grader.grade_answer(c_answer, gt_answer):
                is_correct = 1
                
            for warning in w:
                print(completion)
                print(warning)

        current_text = ""
        
        for s_idx, step in enumerate(steps):
            current_text += step 
            convs = [
                build_conv(problem, current_text, config.system_prompt)
            ]            

            templated_convs = tokenizer.apply_chat_template(
                convs,
                add_generation_prompt=False,
                continue_final_message=True,
                tokenize=False,
            )
    
            inputs = tokenizer(templated_convs[0], return_tensors="pt").to(llm_tf.device)
            outputs = llm_tf(**inputs, output_hidden_states=True)

            # Get last_token_embeds
            last_hidden_state = outputs.hidden_states[-1]
            last_token_embeds = last_hidden_state[:, -1, :].squeeze(0).detach().cpu().numpy()
            
            # Compute otuput_log_prob
            # Prepare labels: shift input_ids to the right by one
            labels = inputs['input_ids'][:, 1:]   
            shifted_logits = outputs.logits[:, :-1, :]
            loss_fct = CrossEntropyLoss(reduction='sum')
            completion_log_prob = -loss_fct(shifted_logits.view(-1, shifted_logits.size(-1)), labels.view(-1)).detach().cpu().numpy()
            # completion_ppl = np.exp(completion_log_prob/len(labels))
            # print(sent_ppl)
            # print(loss)
            # completion_log_probs.append(completion_log_prob)
            # completion_ppls.append(completion_ppl)

            x["problem"] = problem
            x["current_step"] = current_text
            x["step_num"] = s_idx
            x["is_correct"] = is_correct
            x["is_completed"] = 1 if s_idx == len(steps) - 1 else 0 
            x["gt"] = gt_answer
            x["pred"] = c_answer
            x["prm_score"] = scores[s_idx]
            x["embeds"] = [a for a in last_token_embeds]

            x = defaultdict()
            all_data.append(x)


with open(f"results/{config_name}.json", 'w', encoding = 'utf-8') as fout:
    json.dump(all_data, fout, indent=4)

TypeError: Object of type float32 is not JSON serializable