In [11]:
import pandas as pd
import torch

from data_processing import util
from model_utils import evaluate
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM

In [12]:
DATA_TYPE = "mbpt_0_top"
MAX_SEQ_LENGTH = 8192
CACHE_DIR = "/nlp/scr/neigbe/.cache"
MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
model = ["llama3-8b-instruct", "llama3-70b-instruct"][0]
MODEL_PATH = util.get_most_recent_model_path(model, DATA_TYPE)

/nlp/scr/neigbe/pers_proj/models/llama3-8b-instruct/mbpt_0_top/2024-05-25|22:58:56


In [13]:
_, _, test_df = util.get_data_splits(DATA_TYPE, .90, .5)

In [15]:
tkr = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR, model_max_length=MAX_SEQ_LENGTH, pad_token="<|pad_id|>")

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16,
    attn_implementation = "flash_attention_2",
    cache_dir=CACHE_DIR,
    device_map="auto"
)

model.resize_token_embeddings(len(tkr))
model.config.pad_token_id = tkr.pad_token_id

model = PeftModel.from_pretrained(model, MODEL_PATH, device_map="auto")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [16]:
from tqdm.notebook import tqdm_notebook as tqdm
preds = []
labels = []

import warnings
warnings.filterwarnings('ignore')

for row in tqdm(test_df.iloc):
    input_ids = tkr.apply_chat_template(util.row_to_msg(row), add_generation_prompt=True, return_tensors="pt").to(model.device)
    terminators = [tkr.eos_token_id, tkr.convert_tokens_to_ids("<|eot_id|>")]
    outputs = model.generate(
        input_ids,
        max_new_tokens=256,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )
    response = outputs[0][input_ids.shape[-1]:]
    preds.append(tkr.decode(response, skip_special_tokens=True))
    labels.append(row.label)

0it [00:00, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end gene

In [17]:
filt_preds = []
filt_labels = []

for pdt, lb in zip(preds, labels):
    if pdt in ["E", "I"]:
        filt_preds.append(pdt)
        filt_labels.append(lb)

In [18]:
from sklearn.preprocessing import LabelEncoder

label_enc = LabelEncoder()

final_labels = label_enc.fit_transform(filt_labels)
final_preds = label_enc.transform(filt_preds)

## results!

overall metrics

In [19]:
evaluate.get_overall_metrics(final_preds, final_labels)

{'f1': 0.5246363805333547,
 'recall': 0.5504806690830313,
 'precision': 0.5657469486863836}

per class metrics

In [20]:
evaluate.get_class_metrics(final_preds, final_labels, DATA_TYPE)

Unnamed: 0,label,f1,recall,precision
0,E,0.642596,0.790776,0.541186
1,I,0.406677,0.310185,0.590308
