In [1]:
import os, psutil, gc
import time 
import json
import pprint

from collections import defaultdict
import random

In [2]:
import torch 
from transformers import AutoModelForCausalLM, AutoTokenizer
from vllm import LLM, SamplingParams, PoolingParams

from sal.config import Config
from sal.models.reward_models import PRM
from sal.utils.score import score, aggregate_scores

from datasets import Dataset, load_dataset

from core.reward_models import RLHFFlow

from core import best_of_n
from utils.load_data import load_data_prm800k

from utils import load_data

In [3]:
# base_dir
base_dir = '/groups/kjun/tnn/datasets/'

# dataset path
data_dir = base_dir + "/prm800k/math_splits"

# llm and prm path
llm_dir = base_dir + "/Llama-3.2-1B-Instruct-GGUF/Llama-3.2-1B-Instruct.Q4_K_M.gguf"
prm_dir = base_dir + "/Llama3.1-8B-PRM-Deepseek-Data-GGUF/Llama3.1-8B-PRM-Deepseek-Data.Q4_K_M.gguf"

llm_tokenizer_dir = base_dir + "/Llama-3.2-1B-Instruct"
prm_tokenizer_dir = base_dir + "/Llama3.1-8B-PRM-Deepseek-Data"

In [4]:
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3"
if torch.cuda.is_available():
    GPUs = os.environ.get('CUDA_VISIBLE_DEVICES', "0").split(',')
    print(f"GPUs = {GPUs}")
else:
    print("CUDA is not available.") 

GPUs = ['0', '1', '2', '3']


In [5]:
prm = RLHFFlow(model_path=prm_tokenizer_dir, device_map='cuda:3')

gc.collect();torch.cuda.empty_cache();
print('#--- memory:', torch.cuda.memory_allocated(0)/(1024**3))
print('#--- memory:', torch.cuda.memory_allocated(1)/(1024**3))
# print('#--- memory:', torch.cuda.memory_allocated(2)/(1024**3))
# print('#--- memory:', torch.cuda.memory_allocated(3)/(1024**3))

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

#--- memory: 0.0
#--- memory: 0.0


In [6]:
# del(prm)
gc.collect();torch.cuda.empty_cache();
print('#--- memory:', torch.cuda.memory_allocated(0)/(1024**3))
print('#--- memory:', torch.cuda.memory_allocated(1)/(1024**3))

#--- memory: 0.0
#--- memory: 0.0


In [7]:
import importlib
importlib.reload(load_data)

<module 'utils.load_data' from '/home/u20/tnguyen9210/tnn1/LLMs/llm-reasoning-methods/utils/load_data.py'>

In [8]:
config = Config()

#  load data 
data_by_levels = load_data_prm800k(data_dir)
# print(data_by_levels)

# print(config.dataset_name)
orig_dataset = load_dataset(config.dataset_name, split='test', cache_dir=data_dir)
print(orig_dataset)
# stop
# for data in dataset:
#     pprint.pprint(data)
#     stop
# ds_completions = load_completions(completions_dir)

# load random_seeds     
# random_seeds = np.loadtxt("random_seeds.txt").astype("int64")
# random_seeds = [int(seed) for seed in random_seeds]

1: 43
2: 90
3: 105
4: 128
5: 134
Dataset({
    features: ['problem', 'solution', 'answer', 'subject', 'level', 'unique_id'],
    num_rows: 500
})


In [9]:
stop

NameError: name 'stop' is not defined

In [13]:
# general params
config = Config()
config.agg_strategy = 'last'
config.n = 8
config.beam_width = 4
config.lookahead = 0
config.num_iterations = 40
config.sort_completed = False
config.seed = 0
config.version = "v21"

# diverse_select params
config.lam = 10
config.normalize_embeds = True

config.num_proc = 12 

dataset_id = "tnguyen9210/LLM-Reasoning-Math-500"
level = 4
num_questions = len(data_by_levels[level])
# num_questions = 2
num_trials = 5
print(f"num_questions = {num_questions}")

# get batch of questions
batch_of_questions = [data_by_levels[level][q_idx]['problem'] for q_idx in range(num_questions)]
orig_dataset_by_level = orig_dataset.filter(lambda example: example['level'] == int(level))
orig_dataset_by_level = orig_dataset_by_level.select(range(num_questions))
print(orig_dataset_by_level)
# for data in orig_dataset_by_level:
#     print(data)

# load completions
# config_name = f"sd--n-{config.n}--bw-{config.beam_width}--depth-{config.num_iterations}--lam-{config.lam}--{config.normalize_embeds}--seed-{config.seed}--level-{level}--{config.version}"
config_name = "sd--n-16--bw-2--depth-2--lam-10--True--seed-0--level-4--v21"
config_name = "sd--n-8--bw-2--d-2--lam-10--True--level-4--v21"
config_name = "sd--n-8--bw-2--d-2--lam-10--True--level-4--v21"
config_name = "sd--n-8--bw-2--d-10--lam-10--True--level-4--v11"

completions_dir = f"results/generate_{config_name}.jsonl"
scores_dir = f"results/scores_{config_name}.jsonl"
start_idx = 1

# compute results
start_time = time.time()    
with open(completions_dir, 'r', encoding = 'utf-8') as fin:
    trial_idx = 0
    for line in fin:
        
        if trial_idx >= num_trials:
            break

        if trial_idx < start_idx:
            trial_idx += 1
            continue
            
        trial_data = json.loads(line)
        # print(len(trial_data["completions"][0]))
        # stop
        completions = [trial_data["completions"][q_idx] for q_idx in range(num_questions)]
        
        # print(f"{len(completions)}")
        # print(f"len = {len(completions[0])}")
        # stop

        scores = prm.score(batch_of_questions, completions)
        agg_scores = [
            [aggregate_scores(s, config.agg_strategy) for s in score] for score in scores
        ]
        print(len(scores))
        print(len(scores[0]))
        _orig_dataset_by_level = orig_dataset_by_level.add_column("completions", completions)
        _orig_dataset_by_level = _orig_dataset_by_level.add_column("scores", scores)
        print(_orig_dataset_by_level)

        # for data in orig_dataset_by_level:
        #     print(data.keys())
            # print(data["completions"])
                
        _orig_dataset_by_level = score(_orig_dataset_by_level, config)
        # for data in orig_dataset_by_level:
        #     print(data)
        #     stop

        # _orig_dataset_by_level.push_to_hub(dataset_id, config_name=f"{config_name}--trial-{trial_idx}", split='test')
        _orig_dataset_by_level.to_json(f"results/{config_name}--trial-{trial_idx}.jsonl")
        
        # compute the time
        if trial_idx % 1 == 0:
            total_time = time.time() - start_time
            time_per_trial = total_time/(trial_idx+1)
            time_per_question = time_per_trial/num_questions
            print(f"trial {trial_idx}")
            print(f"it takes {time_per_question:0.4f}s per question")
            print(f"it takes {time_per_trial:0.4f}s per trial")

        trial_idx += 1
        

total_time = time.time() - start_time
print(f"it takes {total_time:0.4f}s in total")

num_questions = 128
Dataset({
    features: ['problem', 'solution', 'answer', 'subject', 'level', 'unique_id'],
    num_rows: 128
})


OutOfMemoryError: CUDA out of memory. Tried to allocate 9.81 GiB. GPU 3 has a total capacity of 31.73 GiB of which 8.41 GiB is free. Including non-PyTorch memory, this process has 23.32 GiB memory in use. Of the allocated memory 15.59 GiB is allocated by PyTorch, and 7.36 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
dataset_id = "tnguyen9210/LLM-Reasoning-Math-500"
orig_dataset_by_level.push_to_hub(dataset_id, config_name=config_name, split='test')
orig_dataset_by_level.to_json(f"results/{config_name}.jsonl")