In [1]:
import os, psutil, gc
import time 
import json
import pprint

from collections import defaultdict
import random

import numpy as np
np.set_printoptions(4)

In [2]:
import torch 
from torch.nn import CrossEntropyLoss
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer
from vllm import LLM, SamplingParams, PoolingParams

from sal.config import Config
from sal.models.reward_models import PRM
from sal.utils.score import aggregate_scores
from sal.search.utils import build_conv, generate_k_steps, last

from core.reward_models import RLHFFlow

from core import best_of_n
from utils.load_data import load_data_prm800k
from utils import grader 

In [3]:
# base_dir
base_dir = '/groups/kjun/tnn/datasets/'

# dataset path
data_dir = base_dir + "/prm800k/math_splits"

# llm and prm path
llm_dir = base_dir + "/Llama-3.2-1B-Instruct-GGUF/Llama-3.2-1B-Instruct.Q4_K_M.gguf"
prm_dir = base_dir + "/Llama3.1-8B-PRM-Deepseek-Data-GGUF/Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf"

llm_tokenizer_dir = base_dir + "/Llama-3.2-1B-Instruct"
prm_tokenizer_dir = base_dir + "/Llama3.1-8B-PRM-Deepseek-Data"

In [4]:
tokenizer = AutoTokenizer.from_pretrained(llm_tokenizer_dir)
llm_tf = AutoModelForCausalLM.from_pretrained(llm_tokenizer_dir).to("cuda:3")
# model_regular.generation_config.pad_token_id = tokenizer.eos_token_id
gc.collect();torch.cuda.empty_cache();
print('#--- memory:', torch.cuda.memory_allocated(0)/(1024**3))

#--- memory: 0.0


In [5]:
# prm = RLHFFlow(model_path=prm_tokenizer_dir, device_map='cuda:2')

# gc.collect();torch.cuda.empty_cache();
# print('#--- memory:', torch.cuda.memory_allocated(0)/(1024**3))
# print('#--- memory:', torch.cuda.memory_allocated(1)/(1024**3))
# # print('#--- memory:', torch.cuda.memory_allocated(2)/(1024**3))
# # print('#--- memory:', torch.cuda.memory_allocated(3)/(1024**3))

In [6]:
#  load data 
data_by_levels = load_data_prm800k(data_dir)


# ds_completions = load_completions(completions_dir)

# load random_seeds     
# random_seeds = np.loadtxt("random_seeds.txt").astype("int64")
# random_seeds = [int(seed) for seed in random_seeds]

1: 43
2: 90
3: 105
4: 128
5: 134


In [8]:
def examine_completions(llm_tf, tokenizer, data_dir, data_by_levels, num_trials, num_questions, config, num_budgets=None):
    
    with open(data_dir, 'r', encoding='utf-8') as fin:
        # all_correctness = []
        # all_correctness = np.zeros((num_trials, len(data_by_levels)))
        # gt_answers = []
        trial_idx = 0
        for line in fin:
            # print(trial_idx)
            # print(num_trials)
            if trial_idx >= num_trials:
                break
                
            trial_data = json.loads(line)
            
            # scores = prm.score(batch_of_questions, trial_data["completions"][:num_questions])
            # agg_scores = [
            #     [aggregate_scores(s, config.agg_strategy) for s in score] for score in scores
            # ]
            # print(
            
            
            for q_idx in range(num_questions):
                print(f"\n-> question idx = {q_idx}")
                question = data_by_levels[q_idx]['problem']
                print(question)
                if num_budgets is not None:
                    completions = trial_data['completions'][q_idx][:num_budgets]
                else:
                    completions = trial_data['completions'][q_idx]

                # print(data_by_levels[q_idx]['problem'])
                # questions = [data_by_levels[q_idx]['problem']]
                # print(len(completions))

                gt_answer = data_by_levels[q_idx]['answer']
                print(f"actual answer = {gt_answer}")
                is_correct = False
                for cidx, completion in enumerate(completions):
                    print(f"\n -> completion idx = {cidx}")
                    c_answer = grader.extract_last_boxed_answer(completion)
                    
                    if grader.grade_answer(c_answer, gt_answer):
                        is_correct = True
                        break

                    double_newlines = "\n\n"
                    completion_steps = completion.split(double_newlines)
                    current_text = ""
                    completion_log_probs = []
                    completion_ppls = []
                    completion_ntokens = []
                    for s_idx, step in enumerate(completion_steps):
                        solution_tokens = tokenizer(step)
                        completion_ntokens.append(len(solution_tokens.input_ids))
                        # print(s_idx)
                        # print(step)
                        current_text += step
                        convs = [
                            build_conv(question, current_text, config.system_prompt)
                        ]

                        if config.custom_chat_template is not None:
                            tokenizer.chat_template = config.custom_chat_template
                            
                        templated_convs = tokenizer.apply_chat_template(
                            convs,
                            add_generation_prompt=False,
                            continue_final_message=True,
                            tokenize=False,
                        )

                        inputs = tokenizer(templated_convs[0], return_tensors="pt").to(llm_tf.device)
                        outputs = llm_tf(**inputs, output_hidden_states=True)
            
                        # Compute otuput_log_prob
                        # Prepare labels: shift input_ids to the right by one
                        labels = inputs['input_ids'][:, 1:]   
                        shifted_logits = outputs.logits[:, :-1, :]
                        loss_fct = CrossEntropyLoss(reduction='sum')
                        completion_log_prob = -loss_fct(shifted_logits.view(-1, shifted_logits.size(-1)), labels.view(-1)).detach().cpu().numpy()
                        completion_ppl = np.exp(completion_log_prob/len(labels))
                        # print(sent_ppl)
                        # print(loss)
                        completion_log_probs.append(completion_log_prob)
                        completion_ppls.append(completion_ppl)

                    completion_log_probs = np.array(completion_log_probs)
                    completion_ppls = np.array(completion_ppls)
                    completion_ntokens = np.array(completion_ntokens)
                    print(f"completion answer = {c_answer}")
                    print(f"completion_log_probs = {completion_log_probs}")
                    print(f"completion_ppls = {completion_ppls}")
                    print(f"completion_ntokens = {completion_ntokens}, total = {np.sum(completion_ntokens)}")
                    print(completion)
                    # print(completion)
        
                # all_correctness[trial_idx][q_idx] = is_correct
                # all_correctness.append(is_correct)

            trial_idx += 1
            

    # return all_correctness


# general params
config = Config()
config.agg_strategy = 'last'
config.n = 8
config.beam_width = 2
config.lookahead = 0
config.num_iterations = 2
config.sort_completed = False

# diverse_select params
config.lam = 10
config.normalize_embeds = True

level = '4'
num_questions = len(data_by_levels[level])
num_questions = 2
num_trials = 1
print(f"num_questions = {num_questions}")

bon_dir = "results/generate_bon_prm800k_level4_n16_v11.jsonl" 
# sd_dir = f"results/generate_sd_prm800k_level{level}_n{config.n}_bw{config.beam_width}_depth{config.num_iterations}_lam{config.lam}_{config.normalize_embeds}_v11.jsonl"

# sd_dir = f"results/generate_beam_prm800k_level4_n8_bw2_depth40_v11.jsonl"
# print(sd_dir)

res = examine_completions(llm_tf, tokenizer, bon_dir, data_by_levels[level], num_trials, num_questions, config, config.n)
# res = examine_completions(llm_tf, tokenizer, sd_dir, data_by_levels[level], num_trials, num_questions, config, config.n)


num_questions = 2

-> question idx = 0
The set of points $(x,y,z)$ that satisfy
\[2x = 3y = -z\]is a line.

The set of points $(x,y,z)$ that satisfy
\[6x = -y = -4z\]is another line.

Find the angle between these lines, in degrees.
actual answer = 90^\circ

 -> completion idx = 0
completion answer = 64.04
completion_log_probs = [-646.3187 -664.826  -679.482  -686.1777 -696.1884 -700.6152 -707.6687
 -731.0852 -741.6089]
completion_ppls = [2.0294e-281 1.8610e-289 8.0301e-296 9.9271e-299 4.4590e-303 5.3296e-305
 4.6067e-308 3.1169e-318 8.3991e-323]
completion_ntokens = [ 74  55 115 114 100  47  30  51  13], total = 599
## Step 1: To find the angle between the two lines, we first need to determine the direction vectors of each line.
The direction vector of the first line is given by the coefficients of x, y, and z in the equation 2x = 3y = -z. This can be represented as (2, 3, -1).

## Step 2: Similarly, the direction vector of the second line is given by the coefficients of x, y, and z in