In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (n

In [4]:
def compute_log_likelihood_for_choice(context, choice):
    full_input_text = context + choice
    input_ids = tokenizer.encode(full_input_text, return_tensors='pt').to(device)
    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        log_likelihood = -outputs.loss.item()
    
    return log_likelihood

def calculate_log_likelihoods_for_choices(context, choices):
    log_likelihoods = {}
    for choice in choices:
        log_likelihoods[choice] = compute_log_likelihood_for_choice(context, choice)
    return log_likelihoods

In [5]:
#question = ("콜라독립 815(815 Cola)는 코카콜라를 대한민국에서 라이선스로 만들어 오던 범양식품이 1998년 4월 1일에 출시한 탄산음료다. 답변:")
#choices = [" 아니오", " 예"]

question = ("맥주의 저장 시 숙성기간 동안 단백질은 무엇과 결합하여 침전하는가? 답변:")
choices = [" 맥아", " 세균", " 탄닌", " 효모"]

In [6]:
log_likelihoods = calculate_log_likelihoods_for_choices(question, choices)
print(log_likelihoods)

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


{' 맥아': -3.5799646377563477, ' 세균': -3.668670177459717, ' 탄닌': -3.43269681930542, ' 효모': -3.569061040878296}
