In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained('gpt2-xl')
model = AutoModelForCausalLM.from_pretrained('gpt2-xl')



In [5]:
def decoded_ids_align(inputs, decoded, outputs):
    inputs_ids = inputs['input_ids'].squeeze()
    # make outputs as keys of tok_ids
    tok_ids = {}

    for tok, ids in zip(decoded, inputs_ids):
        if any(f in tok for f in outputs):
            tok_ids[tok] = ids

    return tok_ids

In [14]:
# shuffle a list n times
def list_shuffle(lst, n):
    import random
    new_lsts = []
    for _ in range(n):
        lst_cp = lst.copy()
        random.shuffle(lst_cp)
        new_lsts.append(lst_cp)
    return new_lsts


[['b', 'd', 'c', 'a', 'e'], ['b', 'e', 'a', 'd', 'c'], ['e', 'b', 'd', 'a', 'c'], ['c', 'a', 'b', 'e', 'd'], ['b', 'a', 'd', 'e', 'c']]


In [64]:
import json
def load_data_json(file):
    """Load data from json file"""
    with open(file, "r") as f:
        data = json.load(f)
        prompt_data = data["prompts_list"]
    
    prompt_ult = []
    preds_ult = []
    outputs_ult = []

    for prompts in prompt_data:
        prompt_shuffled = list_shuffle(prompts, 50)
        prompt_list = []
        preds = []
        outputs = []
        for prompt in prompt_shuffled:
            outputs.append(set([de.split()[-1] for de in prompt]))
            p = " ".join(prompt[:-1])
            last_prompt = prompt[-1].split()
            for i in range(0, len(last_prompt)-1):
                p += " " + last_prompt[i]
            prompt_list.append(p)
            preds.append(last_prompt[-1])
        prompt_ult.append(prompt_list)
        preds_ult.append(preds)
        outputs_ult.append(outputs)
    return prompt_ult, preds_ult, outputs_ult

In [65]:
data_path = 'data/info_17/selection_17.json'

prompt_list, labels_list, outputs_list = load_data_json(data_path)

In [66]:
from tqdm import tqdm

preds = []
for prompts, labels, outputs in zip(prompt_list, labels_list, outputs_list):
    pred = []
    for prompt, label, output in tqdm(zip(prompts, labels, outputs)):
        inputs = tokenizer(prompt, return_tensors="pt")
        decoded_tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
        tok_ids_dict = decoded_ids_align(inputs, decoded_tokens, output)
        logits = model(**inputs).logits
        scores = [logits[0][-1][list(tok_ids_dict.values())[i]] for i in range(0, len(tok_ids_dict))]
        # get the index of the max score
        max_score_index = scores.index(max(scores))
        # get the token of the max score
        max_score_token = list(tok_ids_dict.keys())[max_score_index]
        # get the predicted token
        max_score_token = next((o for o in output if o in max_score_token), max_score_token)
        pred.append(max_score_token)
    preds.append(pred)

from sklearn.metrics import accuracy_score
acc = []
for labels, pred in zip(labels_list, preds):
    acc.append(accuracy_score(labels, pred))
print(sum(acc)/len(acc))


50it [01:15,  1.50s/it]
50it [01:15,  1.51s/it]
50it [01:13,  1.47s/it]
50it [01:16,  1.53s/it]
50it [01:15,  1.51s/it]
50it [01:13,  1.47s/it]
50it [01:16,  1.52s/it]
50it [01:14,  1.49s/it]
50it [01:13,  1.48s/it]
50it [01:14,  1.49s/it]
50it [01:13,  1.48s/it]

0.8418181818181819





In [74]:
data_path = 'data/info_15/selection_15.json'

prompt_list, labels_list, outputs_list = load_data_json(data_path)

In [76]:
from tqdm import tqdm

preds = []
for prompts, labels, outputs in zip(prompt_list, labels_list, outputs_list):
    pred = []
    for prompt, label, output in tqdm(zip(prompts, labels, outputs)):
        inputs = tokenizer(prompt, return_tensors="pt")
        decoded_tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
        tok_ids_dict = decoded_ids_align(inputs, decoded_tokens, output)
        logits = model(**inputs).logits
        scores = [logits[0][-1][list(tok_ids_dict.values())[i]] for i in range(0, len(tok_ids_dict))]
        # get the index of the max score
        max_score_index = scores.index(max(scores))
        # get the token of the max score
        max_score_token = list(tok_ids_dict.keys())[max_score_index]
        # get the predicted token
        max_score_token = next((o for o in output if o in max_score_token), max_score_token)
        pred.append(max_score_token)
    preds.append(pred)

from sklearn.metrics import accuracy_score
acc = []
for labels, pred in zip(labels_list, preds):
    acc.append(accuracy_score(labels, pred))
print(sum(acc)/len(acc))

50it [01:33,  1.88s/it]
50it [01:39,  2.00s/it]
50it [01:39,  1.99s/it]
50it [01:35,  1.92s/it]
50it [01:39,  1.99s/it]
50it [01:35,  1.91s/it]
50it [01:34,  1.89s/it]
50it [01:34,  1.89s/it]
50it [01:35,  1.91s/it]
50it [01:40,  2.01s/it]
50it [01:35,  1.91s/it]

0.41818181818181815



