In [1]:
import sys
sys.path.append('../')

import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from util import load_jsonl

In [2]:
data_path='/home/bonbak/kobaco/data/kobaco.csv'
device='cuda:3'

In [3]:
save_dir = "/SSL_NAS/bonbak/model/models--yanolja--EEVE-Korean-Instruct-2.8B-v1.0/snapshots/482db2d0ba911253d09342c34d0e42ac871bfea3"
tokenizer = AutoTokenizer.from_pretrained(save_dir)
model = AutoModelForCausalLM.from_pretrained(save_dir).to(device)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, data_path):
        self.data = load_jsonl(data_path)
        self.input_text_list = self.set_input_text_list()
        self.answer_list = self.set_answer_list()
        self.continuous_prompt_input_list = self.set_continuous_prompt_input_list()

    def set_input_text_list(self):
        input_text_list = []
        for data in self.data:
            user = data['user_id']
            prompt =f'사용자 {user}의 TV 프로그램 시청 기록:\n'
            for idx, item in enumerate(data['interacted_items']):
                prompt += f'{idx}. {item}\n'
            prompt +='\n타겟 TV 프로그램:\n* ' + data['target_item'] + '\n\n'
            prompt += data['question']
            input_text_list.append(prompt)
        return input_text_list

    def set_answer_list(self):
        return [x['answer'] for x in self.data]
    

    def set_continuous_prompt_input_list(self):
        # return [{'input_text_list':'\n'.join([x['node_information'],x['edge_information']])} for x in self.data]
        return

    def __len__(self):
        return len(self.input_text_list)

    def __getitem__(self, idx):
        # return self.input_text_list[idx], self.continuous_prompt_input_list[idx], self.answer_list[idx]
        return self.input_text_list[idx], self.answer_list[idx]

In [5]:
model.eval()
pred = []
label = []

test_dataset = Dataset('../data/test.jsonl')
for input_text, answer_list in tqdm(test_dataset):
    # input_text = '\n'.join((continuous_prompt_input['input_text_list'], input_text))
    input_ids = tokenizer(input_text, return_tensors="pt").to(device)
    outputs = model.generate(**input_ids, pad_token_id=tokenizer.eos_token_id, max_new_tokens=1)
    answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    answer = answer.replace(input_text, '')

    pred.append(answer)
    label.append(answer_list)

100%|██████████| 5666/5666 [14:16<00:00,  6.62it/s]


In [30]:
import numpy as np
def convert_answer(answer):
    converted = []
    for a in answer:
        a = a.strip()
        if a == '예':
            converted.append(1)
        elif a == '아니':
            converted.append(0)
        else:
            converted.append(-1)
    return np.array(converted)

In [43]:
y_pred = convert_answer(pred)
y_true = convert_answer(test_dataset.answer_list)
new_y_pred = y_pred[np.where(y_pred!=-1)]
new_y_true = y_true[np.where(y_pred!=-1)]
missed = np.where(y_pred==-1)
print('miss rate:',len(missed[0]) / len(y_true))

miss rate: 0.05206494881750794


In [44]:
from sklearn.metrics import accuracy_score, f1_score
accuracy = accuracy_score(new_y_true, new_y_pred)
f1 = f1_score(new_y_true, new_y_pred)

print(accuracy)
print(f1)

0.6252094582014522
0.5176132278936018
