In [1]:
import json
import pandas as pd
from data_processing import pers_labels

In [3]:
PWD = os.environ["WORKSPACE_PATH"]

data_type = "mbpt_0_top_lbl"

label_mode = pers_labels.MBPT if pers_labels.MBPT.lower() in data_type else pers_labels.BIG_5

index = [idx for idx in range(5) if str(idx) in data_type][0]

pers_defs = {
    pers_labels.MBPT: {
        0: (("I", "introverted"), ("E", "extroverted")),
        1: (("S", "sensing"), ("N", "intuitive")),
        2: (("F", "feeling"), ("T", "thinking")),
        3: (("J", "judging"), ("P", "perceiving")),
    },
    pers_labels.BIG_5: {
        0: (("S", "social"), ("R", "reserved")),
        1: (("L", "limbic"), ("C", "calm")),
        2: (("O", "organized"), ("U", "unstructured")),
        3: (("A", "agreeable"), ("E", "egocentric")),
        4: (("N", "non-curious"), ("I", "inquisitive")),
    }
}

((label1, label1_def), (label2, label2_def)) = pers_defs[label_mode][index]

In [4]:
with open(f"{PWD}/data/cornell_movies/speakers.json", "r+") as fp:
    fp_parsed = json.load(fp)
    chars_meta = {}
    chars_meta_rows = []
    for char in fp_parsed:
        meta = fp_parsed[char]["meta"]
        meta["character_name"] = meta["character_name"].lower()
        meta["char_id"] = char
        chars_meta[char] = meta
        chars_meta_rows.append(meta)

In [5]:
dataset = pd.read_json(f"{PWD}/data/model_datasets/{data_type}.jsonl", lines=True)

len(dataset)

1932

In [6]:
def create_prompt(text, char):
    _, scene = text.split("\n", 1)

    return f"""
    Read the scenes below and then categorize {char}'s personality as {label1} for {label1_def} or "{label2}" for {label2_def}, according to the {label_mode.lower()} personality typology. Response with only one word.

    scenes:
    {scene}
    """

In [7]:
prompts = []
for row in dataset.iloc:
    char_name = chars_meta[row.char_id]["character_name"]
    scene = row.text
    prompts.append(create_prompt(scene, char_name))

dataset["prompt"] = prompts

In [8]:
def to_messages(df):
    messages = []
    df.to_dict(orient="records")
    for row in df.iloc:
        messages.append(([{
            "role": "user",
            "content": row.prompt
        }], row.label))
    return messages

In [9]:
messages = to_messages(dataset)

## evaluation time!!!

In [13]:
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import Dataset

In [11]:
model_id = "meta-llama/Meta-Llama-3-70B-Instruct"

tkr = AutoTokenizer.from_pretrained(model_id)
# tkr.add_special_tokens({'pad_token': '[PAD]'})
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    cache_dir="/nlp/scr/neigbe/.cache")

model.generation_config.pad_token_ids = tkr.pad_token_id

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]



model.safetensors.index.json:   0%|          | 0.00/59.6k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/30 [00:00<?, ?it/s]

model-00001-of-00030.safetensors:   0%|          | 0.00/4.58G [00:00<?, ?B/s]

model-00002-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00003-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00005-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00006-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00007-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00008-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00009-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00010-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00011-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00012-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00013-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00014-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00015-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00016-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00017-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00018-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00019-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00020-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00021-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00022-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00023-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00024-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00025-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00026-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00027-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00028-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00029-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00030-of-00030.safetensors:   0%|          | 0.00/2.10G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/30 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [None]:
from tqdm.notebook import tqdm_notebook as tqdm
preds = []
labels = []

for msg, lbl in tqdm(messages):
    input_ids = tkr.apply_chat_template(msg, add_generation_prompt=True, return_tensors="pt").to(model.device)
    terminators = [tkr.eos_token_id, tkr.convert_tokens_to_ids("<|eot_id|>")]
    outputs = model.generate(
        input_ids,
        max_new_tokens=256,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )
    response = outputs[0][input_ids.shape[-1]:]
    preds.append(tkr.decode(response, skip_special_tokens=True))
    labels.append(lbl)

In [21]:
import json
with open(f"{PWD}/model_preds/llama-70b-zero-shot.jsonl", "w+") as fp:
    for pdt, lbl in zip(preds, labels):
        fp.write(json.dumps({"pred": pdt, "label": lbl}) + "\n")

In [25]:
filt_preds = []
filt_labels = []

for pdt, lb in zip(preds, labels):
    if pdt in ["E", "I"]:
        filt_preds.append(pdt)
        filt_labels.append(lb)

In [26]:
from sklearn.preprocessing import LabelEncoder

label_enc = LabelEncoder()

final_labels = label_enc.fit_transform(filt_labels)
final_preds = label_enc.transform(filt_preds)

## results!

overall metrics

In [27]:
import evaluate
import numpy as np

args = {"predictions": final_preds,  "references": final_labels, "average": "macro"}
f1 = evaluate.load("f1")
acc = evaluate.load("recall")
prec = evaluate.load("precision")

scores = {}
scores.update(f1.compute(**args))
scores.update(acc.compute(**args))
scores.update(prec.compute(**args))

print(scores)

{'f1': 0.6016322013970538, 'recall': 0.6235828993691643, 'precision': 0.6525918886278064}


per class metrics

In [32]:
args = {"predictions": final_preds,  "references": final_labels, "average": None}
scores = {}
scores.update(f1.compute(**args))
scores.update(acc.compute(**args))
scores.update(prec.compute(**args))


class_scores = np.concatenate([val.reshape(-1, 1) for val in scores.values()], axis=1)

class_names = np.array(label_enc.inverse_transform(range(2))).reshape(-1, 1)
per_class_df = pd.DataFrame(np.concatenate([class_names, class_scores], axis=1), columns=["label", *scores.keys()])

per_class_df.style.hide(axis="index")

label,f1,recall,precision
E,0.519053876478318,0.4026503567787971,0.7301293900184843
I,0.6842105263157895,0.8445154419595314,0.5750543872371283
