In [1]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

import os, sys, gc

if IN_COLAB:
    from google.colab import drive
    drive.mount("/content/gdrive", force_remount=True)
    %cd /content/gdrive/MyDrive/mres/GH/colab

Mounted at /content/gdrive
/content/gdrive/MyDrive/mres/GH/colab


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch as t
from torch import nn, Tensor
import torch.nn.functional as F

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm, trange
from IPython.display import clear_output

In [3]:
device = t.device("cuda" if t.cuda.is_available() else "cpu")

def free_mem(vars):
    for v in vars: del v
    gc.collect()
    t.cuda.empty_cache()

models = {
    "llama2": "meta-llama/Llama-2-7b-chat-hf",
    "llama3": "meta-llama/Meta-Llama-3-8B-Instruct",
    "mistral": "mistralai/Mistral-7B-Instruct-v0.1"
}

model_name = "llama2"
model = AutoModelForCausalLM.from_pretrained(models[model_name], torch_dtype=t.float16).to(device); model.eval()
tokenizer = AutoTokenizer.from_pretrained(models[model_name])

special_char = "Ġ" if model_name == "llama3" else "▁"
tks_A = tokenizer.convert_tokens_to_ids(["A", f"{special_char}A"])
tks_B = tokenizer.convert_tokens_to_ids(["B", f"{special_char}B"])
aspects = ["coherence", "consistency", "fluency", "relevance"]

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [4]:
for dataset in ["summeval"]:
    prompts = pd.read_json(f"../prompts_short/{model_name}/{dataset}_theirs.jsonl", orient="records", lines=True)
    for aspect in ["relevance"]:
        p_s1, p_s2 = [], []
        for i in trange(len(prompts), desc=f"{dataset}:{aspect}"):
            prompt = prompts.at[i, f"prompt_{aspect}"]
            tks = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False).to(device)
            with t.no_grad():
                out = model.generate(
                    inputs=tks,
                    return_dict_in_generate=True,
                    output_logits=True,
                    max_new_tokens=32,
                    do_sample=False,
                    temperature=None,
                    top_p=None
                )
            for ix, tk in enumerate(out["sequences"].squeeze(0)[-len(out["logits"]):]):
                if tk.item() in tks_A + tks_B:
                    P = F.softmax(out["logits"][ix], dim=-1)
                    p_s1.append(P[:, tks_A].sum().item())
                    p_s2.append(P[:, tks_B].sum().item())
                    break
            else:
                p_s1.append(0.5)
                p_s2.append(0.5)
            free_mem([tks, out, P])
        prompts["p_s1"] = p_s1
        prompts["p_s2"] = p_s2
        prompts.to_json(f"../logits_short/{model_name}/{dataset}_{aspect}.jsonl", orient="records", lines=True)

summeval:relevance:   0%|          | 0/12000 [00:00<?, ?it/s]

In [5]:
# for dataset in ["summeval", "newsroom"]:
#     prompts = pd.read_json(f"../prompts_short/{model_name}/{dataset}_theirs.jsonl", orient="records", lines=True)
#     for aspect in aspects:
#         p_s1, p_s2 = [], []
#         for i in trange(len(prompts), desc=f"{dataset}:{aspect}"):
#             prompt = prompts.at[i, f"prompt_{aspect}"]
#             tks = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False).to(device)
#             with t.no_grad():
#                 out = model.generate(
#                     inputs=tks,
#                     return_dict_in_generate=True,
#                     output_logits=True,
#                     max_new_tokens=32,
#                     do_sample=False,
#                     temperature=None,
#                     top_p=None
#                 )
#             for ix, tk in enumerate(out["sequences"].squeeze(0)[-len(out["logits"]):]):
#                 if tk.item() in tks_A + tks_B:
#                     P = F.softmax(out["logits"][ix], dim=-1)
#                     p_s1.append(P[:, tks_A].sum().item())
#                     p_s2.append(P[:, tks_B].sum().item())
#                     break
#             else:
#                 p_s1.append(0.5)
#                 p_s2.append(0.5)
#             free_mem([tks, out, P])
#         prompts["p_s1"] = p_s1
#         prompts["p_s2"] = p_s2
#         prompts.to_json(f"../logits_short/{model_name}/{dataset}_{aspect}.jsonl", orient="records", lines=True)