In [1]:
import torch
import os 
import json
json_path = 'env_config.json'
with open(json_path, 'r') as file:
    env_config = json.load(file)

hf_home = env_config['HF_HOME']
# Set the HF_HOME environment variable
os.environ['HF_HOME'] = hf_home
# Set the access token to huggingface hub
access_token = env_config['access_token']
os.environ['HUGGINGFACE_HUB_TOKEN'] = access_token


In [None]:
from llmexp.llm.smollm import LLMWrapper, Template
from accelerate import Accelerator

accelerator = Accelerator()
device = accelerator.device

# checkpoint = "meta-llama/Llama-3.2-1B-Instruct"
checkpoint = "HuggingFaceTB/SmolLM-1.7B-Instruct"
# checkpoint = "meta-llama/Llama-3.2-3B-Instruct"
# checkpoint = "meta-llama/Meta-Llama-3-8B-Instruct"
llm = LLMWrapper(checkpoint, device=device, access_token=access_token)
tokenizer = llm.tokenizer

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [3]:
from llmexp.explainer.mab_explainer import MABExplainer
from llmexp.utils.data_utils import LLMDataset
import numpy as np
from llmexp.utils.hotpot_helper import HotpotHelper, HotpotSample

mab_explainer = MABExplainer(llm, tokenizer)

dataset = LLMDataset("hotpot_qa", split="test")

In [4]:
import random 

random.seed(42)
K = 1000
total_samples = len(dataset)
sampled_indices = random.sample(range(total_samples), K)
test_data = [dataset[i] for i in sampled_indices]


In [None]:
from tqdm import tqdm
from typing import List
from llmexp.utils.evaluation import calculate_avg_log_prob_diff, calculate_bertscore

def evaluate_sentence_scores(sentences: List[str], response: str, theta: np.ndarray, k: int = 3):
    # original log likelihood
    full_sentence_log_likelihood = mab_explainer.get_log_likelihood(sentences, response)
    # remove the top k sentences
    non_top_k_indices = np.argsort(theta)[:-k]
    non_top_k_sentences = [sentences[i] for i in non_top_k_indices]
    
    # get the log likelihood of the non-top k sentences
    non_top_k_log_likelihood = mab_explainer.get_log_likelihood(non_top_k_sentences, response)

    # get the average log probability difference
    return calculate_avg_log_prob_diff(non_top_k_log_likelihood, full_sentence_log_likelihood)

def random_sampling(sentences: List[str], response: str, k: int = 3):
    # random sample K sentences
    random_indices = np.random.choice(len(sentences), k, replace=False)
    random_sentences = [sentences[i] for i in random_indices]
    # get the response logits
    response_logits = mab_explainer.get_response_logits(random_sentences, response)
    # get the rewards
    rewards = mab_explainer.get_reward(response_logits, response)
    return calculate_avg_log_prob_diff(rewards, full_sentence_rewards)

def evaluate_lds(sentences: List[str], response: str, theta: np.ndarray, k: int = 3):
    # original rewards
    full_sentence_rewards, empty_sentence_rewards = mab_explainer.get_baseline_rewards(sentences, response)
    # remove the top k sentences
    top_k_indices = np.argsort(theta)[-k:]
    top_k_sentences = [sentences[i] for i in top_k_indices]
    # get the response logits
    response_logits = mab_explainer.get_response_logits(top_k_sentences, response)
    # get the rewards
    rewards = mab_explainer.get_reward(response_logits, response)
    return linear_datamodeling_score(full_sentence_rewards.exp().squeeze(0).cpu().numpy(), rewards.exp().squeeze(0).cpu().numpy())

scores = []

for idx, sample in enumerate(test_data):
    hpsample = HotpotSample(sample)
    sentences = hpsample.flattened_contexts
    question = hpsample.question

    response = mab_explainer.get_response(sentences + [question])
    theta = mab_explainer.thompson_sampling(sentences, response, n_iter=256)
    diff = evaluate_sentence_scores(sentences, response, theta)
    # lds = evaluate_lds(sentences, response, theta)
    # scores.append(lds)
    scores.append(diff.cpu().numpy())
    
    if idx % 100 == 0:
        print(np.mean(scores))

    

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


-0.09017444


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [7]:
print(np.mean(scores))

-0.4799199
