In [1]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

import os, sys, gc

if IN_COLAB:
    from google.colab import drive
    drive.mount("/content/gdrive", force_remount=True)
    %cd /content/gdrive/MyDrive/mres/GH/colab

Mounted at /content/gdrive
/content/gdrive/MyDrive/mres/GH/colab


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch as t
from torch import nn, Tensor
import torch.nn.functional as F

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm, trange
from IPython.display import clear_output

In [3]:
device = t.device("cuda" if t.cuda.is_available() else "cpu")

def free_mem(vars):
    for v in vars: del v
    gc.collect()
    t.cuda.empty_cache()

models = {
    "llama2": "meta-llama/Llama-2-7b-chat-hf",
    "llama3": "meta-llama/Meta-Llama-3-8B-Instruct",
    "mistral": "mistralai/Mistral-7B-Instruct-v0.1"
}

model_name = "mistral"
model = AutoModelForCausalLM.from_pretrained(models[model_name], torch_dtype=t.float16).to(device); model.eval()
tokenizer = AutoTokenizer.from_pretrained(models[model_name])

# find logit indices of answer choices
tk_A = tokenizer.encode("A", add_special_tokens=False)[0]
tk_B = tokenizer.encode("B", add_special_tokens=False)[0]
aspects = ["coherence", "consistency", "fluency", "relevance"]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
for dataset in ["summeval", "newsroom"]:
    prompts = pd.read_json(f"../prompts_short/{model_name}/{dataset}_theirs.jsonl", orient="records", lines=True)
    for aspect in aspects:
        p_s1, p_s2 = [], []
        for i in trange(len(prompts), desc=f"{dataset}:{aspect}"):
            prompt = prompts.at[i, f"prompt_{aspect}"]
            tks = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False).to(device)
            with t.no_grad(): out = model(tks)
            # extract the logits for "A" and "B"
            logits = out["logits"][:, -1, [tk_A, tk_B]]
            # softmax to obtain probabilities which we can save
            P = F.softmax(logits, dim=-1)
            p_s1.append(P[:, 0].item())
            p_s2.append(P[:, 1].item())
            free_mem([tks, logits, P])
        prompts["p_s1"] = p_s1
        prompts["p_s2"] = p_s2
        prompts.to_json(f"../logits_short/{model_name}/{dataset}_{aspect}.jsonl", orient="records", lines=True)

summeval:coherence:   0%|          | 0/12000 [00:00<?, ?it/s]

summeval:consistency:   0%|          | 0/12000 [00:00<?, ?it/s]

summeval:fluency:   0%|          | 0/12000 [00:00<?, ?it/s]

summeval:relevance:   0%|          | 0/12000 [00:00<?, ?it/s]

newsroom:coherence:   0%|          | 0/1260 [00:00<?, ?it/s]

newsroom:consistency:   0%|          | 0/1260 [00:00<?, ?it/s]

newsroom:fluency:   0%|          | 0/1260 [00:00<?, ?it/s]

newsroom:relevance:   0%|          | 0/1260 [00:00<?, ?it/s]