In [1]:
import os
import json 
import time 
import re
import random
import numpy as np 
from tqdm.auto import tqdm
from util.utils import set_seed, read_data, save_result, get_answer_from_text, chat_huggingface
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
os.environ["CUDA_VISIBLE_DEVICES"] = "7"

seed = 42
set_seed(seed)

  from .autonotebook import tqdm as notebook_tqdm


## 1. Sample Failure

In [2]:
path_input = "dataset/GSM8K_train.jsonl"
path_result = "output/GSM8K_Llama-3-8B-Instruct_zeroshot_CoT_train.jsonl"

data = read_data(path_input)
results = read_data(path_result)

sample_portion = 1.0  # Set the portion of the dataset 
sample_k = int(np.floor(sample_portion * len(results)))
sample_indices = np.random.choice(np.arange(len(data)), size=sample_k, replace=False)

path_output = f"failure/GSM8K_Llama-3-8B-Instruct_zeroshot_CoT_train_seed{seed}_portion{sample_portion}.jsonl"
print(f"{len(sample_indices)} samples")

7473 samples


In [3]:
path_tensor = f"memory/GSM8K_Llama-3-8B-Instruct_zeroshot_CoT_train_seed{seed}_portion{sample_portion}.pt"
path_revision = f"memory/GSM8K_Llama-3-8B-Instruct_zeroshot_CoT_train_seed{seed}_portion{sample_portion}.jsonl"

i = 0
fail_list = []
for index in tqdm(sample_indices):
    answer = results[index].get("answer", "")
    pred_ans = results[index].get("pred_ans", "")
    
    if answer != pred_ans:
        fail_list.append({"index": int(index),
                          "question": data[results[index].get("index", 0)]["question"],
                          "answer": answer,
                          "reasnoing": data[results[index].get("index", 0)]["reasoning"],
                          "fail_answer": pred_ans,
                          "fail_reasnoing": results[index].get("A", "").get("content", ""),
                          "tensor_index": i, 
                          "tensor_path": path_tensor,
                          "revision_path": path_revision})
        i += 1

print(f"Total samples: {len(sample_indices)}, Failures: {len(fail_list)}, Percentage: {len(fail_list) / len(sample_indices) * 100:.2f}%")

for fail in fail_list:
    save_result(fail, path_output)

100%|██████████| 7473/7473 [00:00<00:00, 550532.79it/s]

Total samples: 7473, Failures: 1751, Percentage: 23.43%





## 2. Get Advice from GPT-o3

__< Used Prompt ( + failure output .jsonl file) >__

Each dictionary has a correct "reasoning" and "answer" for the "question" and a "fail_reasoning" and "fail_answer" that the model incorrectly generated. 

"answer" 10086100100.0 means no answer was given.

For each question,  please give advice on how to correct the errors in fail_reasoning compared to reasoning so that an answer other than fail_answer can be derived.  

Advice include effective methods to make question easier to deduce the correct answer without mentioning answer, fail_answer and numbers in reasoning and fail reasoning directly. 

Also please give as variable advice as possible. 

Give the generated advice in the form of a .jsonl file.

## 3. Extract Fail Question Latent Vector


Using Llama-3-8B-Instruct model

In [4]:
HUGGINGFACE_TOKEN = "XXX"

model_path = "meta-llama/Meta-Llama-3-8B-Instruct"
device = "cuda:0" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_path, token=HUGGINGFACE_TOKEN)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id
    
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    token=HUGGINGFACE_TOKEN,
    torch_dtype=torch.bfloat16,
)
model.generation_config.temperature=None
model.generation_config.top_p=None
model.eval()
model = model.to(device)

Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 81.70it/s]


In [5]:
# extract the last latent representation(vector) from the question
fail_question_vectors = []

for fail in tqdm(fail_list):
    question = fail["question"]
    inputs = tokenizer(question, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
        hidden_states = outputs.hidden_states[-1]  # Get the last layer's hidden states
        hidden = hidden_states.mean(dim=1).squeeze(0)      # -> [dim], still bfloat16
        hidden = hidden.to(torch.float32)                  # bfloat16 → float32
        question_vector = hidden.cpu().numpy()  
        #question_vector = hidden_states.mean(dim=1).squeeze().cpu().numpy()  # Average pooling
        
    fail_question_vectors.append(question_vector)
    
# Save the question vectors to a tensor file
torch.save(torch.tensor(fail_question_vectors), path_tensor)

100%|██████████| 1751/1751 [01:10<00:00, 24.72it/s]
  torch.save(torch.tensor(fail_question_vectors), path_tensor)


In [6]:
fail_memories = torch.load(path_tensor)
print(type(fail_memories), fail_memories.shape, fail_memories.dtype)

<class 'torch.Tensor'> torch.Size([1751, 4096]) torch.float32
