In [1]:
import torch
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7"
print(f"Available GPUs: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")


Available GPUs: 4
GPU 0: NVIDIA A100 80GB PCIe
GPU 1: NVIDIA A100 80GB PCIe
GPU 2: NVIDIA A100 80GB PCIe
GPU 3: NVIDIA A100 80GB PCIe


In [2]:
import torch
import torch.nn as nn
import torch.distributed as dist

# Define model
model = nn.Linear(10, 5)  # Example model

# Move model to GPUs
if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model)  # Wrap model for multi-GPU support

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(next(model.parameters()).device)

Using 4 GPUs!
cuda:0


In [3]:
print(f"Total available GPUs: {torch.cuda.device_count()}")
print(f"Model is using {len(model.device_ids)} GPUs: {model.device_ids}")

Total available GPUs: 4
Model is using 4 GPUs: [0, 1, 2, 3]


In [8]:
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer, AutoModel

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
class MergedModel(torch.nn.Module):
    def __init__(self, tokenizer, base_model, reward_model, causal_model, beginning_offset, total_tokens, device):
        super().__init__()
        self.tokenizer = tokenizer
        self.base_model = base_model.to(device)
        self.reward_model = reward_model
        self.causal_model = causal_model
        self.lm_head = causal_model.lm_head.to(device)
        self.reward_score = reward_model.score.to(device)
        self.beginning_offset = beginning_offset
        self.total_tokens = total_tokens
        self.device = device
    def get_log_probs(self, lm_outputs, input_ids):
        probs = []
        lm_prob_dist = F.softmax(lm_outputs, dim=-1)
        for offset in range(self.beginning_offset, self.beginning_offset+self.total_tokens):
            prob = lm_prob_dist[0,-offset-1,input_ids[0][-offset]].item()
            probs.append(prob)
        return probs
    
    def get_reward_scores(self, input_ids, base_outputs):
        reward_outputs = self.reward_score(base_outputs[0])
        step_sep_id = self.tokenizer.encode("<extra_0>")[0]
        token_masks = (input_ids == step_sep_id)
        logits = reward_outputs[0]
        probabilities = F.softmax(logits, dim=-1)
        probabilities = probabilities * token_masks.unsqueeze(-1) # bs, seq_len, num_labels

        all_scores_res = []
        for i in range(probabilities.size(0)):
            sample = probabilities[i] # seq_len, num_labels
            positive_probs = sample[sample != 0].view(-1, 2)[:, 1] # valid_tokens, num_labels
            non_zero_elements_list = positive_probs.cpu().tolist()
            all_scores_res.append(non_zero_elements_list)   
        return all_scores_res
    
    def forward(self, input_ids):
        base_outputs = self.base_model(input_ids)
        lm_outputs = self.lm_head(base_outputs[0])
        reward_outputs = self.reward_score(base_outputs[0])
        return base_outputs, lm_outputs, reward_outputs

    def run_merged_model(self, input_ids):
        base_outputs = self.base_model(input_ids)
        lm_outputs = self.lm_head(base_outputs[0])
        rewards = self.get_reward_scores(input_ids, base_outputs)
        probs = self.get_log_probs(lm_outputs, input_ids)
        return rewards, probs

In [11]:
base_model_path = "/data/user_data/mert/spec/models_merged/Qwen2--qwen_7b_merged"
reward_model_name = "Qwen/Qwen2.5-Math-PRM-7B"
causal_model_name = "Qwen/Qwen2.5-7B-Instruct"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
# Load model (automatically detects and uses safetensors)
base_model = AutoModel.from_pretrained(base_model_path).eval()

reward_model = AutoModel.from_pretrained(
    reward_model_name, 
    trust_remote_code=True,
).eval()
causal_model = AutoModelForCausalLM.from_pretrained(
    causal_model_name,
).eval()



Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  1.77it/s]
Loading checkpoint shards: 100%|██████████| 4/4 [00:01<00:00,  2.12it/s]
Some weights of the model checkpoint at Qwen/Qwen2.5-Math-PRM-7B were not used when initializing Qwen2ForProcessRewardModel: {'lm_head.weight'}
- This IS expected if you are initializing Qwen2ForProcessRewardModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Qwen2ForProcessRewardModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.88it/s]


In [13]:
merged_model_reward = MergedModel(tokenizer=tokenizer, base_model=reward_model.model, reward_model=reward_model, 
                           causal_model=causal_model, beginning_offset=9, total_tokens=20, device='cpu') #29899 GB

In [14]:
if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs!")
    merged_model_reward = nn.DataParallel(merged_model_reward) 

Using 4 GPUs!


In [33]:
device = "cuda:6"
merged_model_lm = MergedModel(tokenizer=tokenizer, base_model=causal_model.model, reward_model=reward_model, 
                           causal_model=causal_model, beginning_offset=9, total_tokens=20, device=device) #29899 GB

In [35]:
device = "cuda:7"
merged_model_true = MergedModel(tokenizer=tokenizer, base_model=base_model, reward_model=reward_model, 
                           causal_model=causal_model, beginning_offset=9, total_tokens=20, device=device) #29899 GB

In [15]:
data = {
    "system": "Please reason step by step, and put your final answer within \\boxed{}.",
    "query": "Sue lives in a fun neighborhood.  One weekend, the neighbors decided to play a prank on Sue.  On Friday morning, the neighbors placed 18 pink plastic flamingos out on Sue's front yard.  On Saturday morning, the neighbors took back one third of the flamingos, painted them white, and put these newly painted white flamingos back out on Sue's front yard.  Then, on Sunday morning, they added another 18 pink plastic flamingos to the collection. At noon on Sunday, how many more pink plastic flamingos were out than white plastic flamingos?",
    "response": [
      "To find out how many more pink plastic flamingos were out than white plastic flamingos at noon on Sunday, we can break down the problem into steps. First, on Friday, the neighbors start with 18 pink plastic flamingos.",
      "On Saturday, they take back one third of the flamingos. Since there were 18 flamingos, (1/3 \\times 18 = 6) flamingos are taken back. So, they have (18 - 6 = 12) flamingos left in their possession. Then, they paint these 6 flamingos white and put them back out on Sue's front yard. Now, Sue has the original 12 pink flamingos plus the 6 new white ones. Thus, by the end of Saturday, Sue has (12 + 6 = 18) pink flamingos and 6 white flamingos.",
      "On Sunday, the neighbors add another 18 pink plastic flamingos to Sue's front yard. By the end of Sunday morning, Sue has (18 + 18 = 36) pink flamingos and still 6 white flamingos.",
      "To find the difference, subtract the number of white flamingos from the number of pink flamingos: (36 - 6 = 30). Therefore, at noon on Sunday, there were 30 more pink plastic flamingos out than white plastic flamingos. The answer is (\\boxed{30})."
    ]
}

messages = [
    {"role": "system", "content": data['system']},
    {"role": "user", "content": data['query']},
    {"role": "assistant", "content": "<extra_0>"},
]
conversation_str = tokenizer.apply_chat_template(
    messages, 
    tokenize=False, 
    add_generation_prompt=False
)

input_ids = tokenizer.encode(
    conversation_str, 
    return_tensors="pt", 
).to(device)

In [9]:
merged_model_outputs = merged_model(input_ids)

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


In [11]:
merged_model_outputs[0][0].shape

torch.Size([1, 152, 3584])

In [12]:
merged_model_outputs[1].shape,merged_model_outputs[2].shape

(torch.Size([1, 152, 152064]), torch.Size([1, 152, 2]))

In [13]:
rewards = merged_model.get_reward_scores(input_ids, merged_model_outputs[0])

In [14]:
rewards[0]

[0.9999293088912964]

In [27]:
input_ids = tokenizer.encode(
    conversation_str, 
    return_tensors="pt", 
).to('cuda:5')
full_outputs = merged_model.run_merged_model(input_ids)

In [16]:
full_outputs[0][0]

[0.9999293088912964]

In [16]:
merged_model_lm_outputs = merged_model_lm.run_merged_model(input_ids)

In [19]:
torch.sum(torch.log(torch.tensor(merged_model_lm_outputs[1])))

tensor(-37.7755)

In [28]:
torch.sum(torch.log(torch.tensor(full_outputs[1])))

tensor(-74.7696)

In [20]:
merged_model_outputs = merged_model.run_merged_model(input_ids)

In [14]:
merged_model_outputs[0]

[[0.987575113773346]]

In [21]:
merged_model_outputs[1]

[0.04701383039355278,
 0.9998300075531006,
 0.9999212026596069,
 0.007626536302268505,
 0.3927419185638428,
 0.0262359119951725,
 0.13296392560005188,
 0.17616400122642517,
 0.9989714622497559,
 0.999958872795105,
 0.0020326192025095224,
 0.9732446670532227,
 3.063670703795651e-07,
 0.9358073472976685,
 0.007398264016956091,
 0.05035797879099846,
 0.9429806470870972,
 0.20274925231933594,
 0.037123166024684906,
 0.00041125129791907966]

In [30]:
import json
file_path = "/home/mert/spec/search-and-learn/data_iclr/beam_search/AMead10Llama-3.2-3B-Instruct-AWQ-_n4_b0.1_q500_period0_model_len10000_1.jsonl"
# Read the JSON file
with open(file_path, 'r') as f:
    data = [json.loads(line) for line in f]

In [None]:
scores = []
from tqdm import tqdm
for i in tqdm(range(len(data))):
    messages = [
        {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
        {"role": "user", "content": data[i]['problem']},
        {"role": "assistant", "content": data[i]['solution']+"<extra_0>"},
    ]
    conversation_str = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=False
    )
    input_ids = tokenizer.encode(
    conversation_str, 
    return_tensors="pt", 
    ).to(device)
    merged_model_outputs = merged_model.run_merged_model(input_ids)
    score = merged_model_outputs[0][0][0]
    scores.append(score)

  0%|          | 0/500 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


100%|██████████| 500/500 [02:44<00:00,  3.04it/s]


reward model original

In [8]:
print(np.mean(scores))
print(np.std(scores))

0.6182246066136285
0.3414839653945324


reward model for merged

In [29]:
print(np.mean(scores))
print(np.std(scores))

0.8356026131510734
0.11564540849264689


In [11]:
scores_of_answers = []
from tqdm import tqdm
for i in tqdm(range(len(data))):
    messages = [
        {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
        {"role": "user", "content": data[i]['problem']},
        {"role": "assistant", "content": data[i]['answer']+"<extra_0>"},
    ]
    conversation_str = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=False
    )
    input_ids = tokenizer.encode(
    conversation_str, 
    return_tensors="pt", 
    ).to(device)
    merged_model_outputs = merged_model.run_merged_model(input_ids)
    score = merged_model_outputs[0][0][0]
    scores_of_answers.append(score)
    del merged_model_outputs
    del input_ids
    torch.cuda.empty_cache()

100%|██████████| 500/500 [01:18<00:00,  6.38it/s]


original reward model rewards

In [12]:
print(np.mean(scores_of_answers))
print(np.std(scores_of_answers))

0.32676101229246707
0.2880349981034212


merged model rewards

In [31]:
print(np.mean(scores_of_answers))
print(np.std(scores_of_answers))

0.6660591802299023
0.14266165240892736


In [37]:
probs = []
from tqdm import tqdm
for i in tqdm(range(len(data))):
    messages = [
        {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
        {"role": "user", "content": data[i]['problem']},
        {"role": "assistant", "content": data[i]['solution']+"<extra_0>"},
    ]
    conversation_str = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=False
    )
    input_ids = tokenizer.encode(
    conversation_str, 
    return_tensors="pt", 
    ).to(merged_model_lm.device)
    merged_model_outputs = merged_model_lm.run_merged_model(input_ids)
    prob = merged_model_outputs[1][0]
    probs.append(prob)
    del input_ids
    del merged_model_outputs
    torch.cuda.empty_cache() 
print(np.sum(np.log(np.array(probs))))
print(np.mean(probs))
print(np.std(probs))


  0%|          | 0/500 [00:00<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 6 has a total capacity of 79.14 GiB of which 5.19 MiB is free. Including non-PyTorch memory, this process has 48.66 GiB memory in use. Process 2379404 has 30.45 GiB memory in use. Of the allocated memory 47.80 GiB is allocated by PyTorch, and 384.50 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [33]:
import pandas as pd

# Define the statistics for answers with 3 decimal points
stats_answers = {
    "Model": ["qwen (merged) math answers", "mistral answers", "deepseek answers"],
    "Average PRM score": [0.666, 0.401, 0.648],
    "Std of PRM scores": [0.143, 0.182, 0.309]
}

# Define the statistics for solutions with 3 decimal points
stats_solutions = {
    "Model": ["qwen (merged) math solutions", "mistral solutions", "deepseek solutions"],
    "Average PRM score": [0.836, 0.634, 0.481],
    "Std of PRM scores": [0.116, 0.213, 0.348]
}

# Create DataFrames
df_stats_answers = pd.DataFrame(stats_answers)
df_stats_solutions = pd.DataFrame(stats_solutions)

# Display the DataFrames
print("Answers Statistics:")
print(df_stats_answers)
print("\nSolutions Statistics:")
print(df_stats_solutions)


Answers Statistics:
                        Model  Average PRM score  Std of PRM scores
0  qwen (merged) math answers              0.666              0.143
1             mistral answers              0.401              0.182
2            deepseek answers              0.648              0.309

Solutions Statistics:
                          Model  Average PRM score  Std of PRM scores
0  qwen (merged) math solutions              0.836              0.116
1             mistral solutions              0.634              0.213
2            deepseek solutions              0.481              0.348
