In [1]:
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
import torch
import os

In [5]:
# 모델 및 데이터 경로 설정
model_id = "nvidia/ChatQA-1.5-8B"  # 또는 커스텀 모델 경로
data_folder = "/home/dudaji/Jun/npu-service-zoo/data/chatbot/coqa/"
output_folder = "/home/dudaji/Jun/npu-service-zoo/results/chatbot/coqa"
eval_dataset = "coqa"  # 사용하려는 데이터셋 이름

# 기타 매개변수
num_ctx = 3  # context 개수
out_seq_len = 128  # 출력 시퀀스 길이
max_tokens = 200  # 최대 토큰 수

In [6]:
from dataset import load_data, get_inputs
def get_prompt_list():
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    
    # 데이터셋 경로 설정
    dataset_paths = {
        "coqa": "dev.json",
    }

    if eval_dataset in dataset_paths:
        input_datapath = os.path.join(data_folder, dataset_paths[eval_dataset])
    else:
        raise Exception("Please input a correct eval_dataset name!")
    
    # 데이터 로드 (사용자 정의 로드 함수)
    data_list = load_data(input_datapath)  
    print(f"Number of samples in the dataset: {len(data_list)}")
    
    # 프롬프트 생성 (사용자 정의 함수)
    prompt_list = get_inputs(data_list, eval_dataset, tokenizer, num_ctx=num_ctx, max_output_len=out_seq_len)
    return prompt_list

In [7]:
prompt_list = get_prompt_list()


loading data from /home/dudaji/Jun/npu-service-zoo/data/chatbot/coqa/dev.json
Number of samples in the dataset: 7983


['System: This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\'s questions based on the context. The assistant should also indicate when the answer cannot be found in the context.\n\ntitle: , source: Once upon a time, in a barn near a farm house, there lived a little white kitten named Cotton. Cotton lived high up in a nice warm place above the barn where all of the farmer\'s horses slept. But Cotton wasn\'t alone in her little home above the barn, oh no. She shared her hay bed with her mommy and 5 other sisters. All of her sisters were cute and fluffy, like Cotton. But she was the only white one in the bunch. The rest of her sisters were all orange with beautiful white tiger stripes like Cotton\'s mommy. Being different made Cotton quite sad. She often wished she looked like the rest of her family. So one day, when Cotton found a can of the old farmer\'s orange paint, she used it to paint herself

In [9]:
print(prompt_list[0])

System: This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context.

title: , source: Once upon a time, in a barn near a farm house, there lived a little white kitten named Cotton. Cotton lived high up in a nice warm place above the barn where all of the farmer's horses slept. But Cotton wasn't alone in her little home above the barn, oh no. She shared her hay bed with her mommy and 5 other sisters. All of her sisters were cute and fluffy, like Cotton. But she was the only white one in the bunch. The rest of her sisters were all orange with beautiful white tiger stripes like Cotton's mommy. Being different made Cotton quite sad. She often wished she looked like the rest of her family. So one day, when Cotton found a can of the old farmer's orange paint, she used it to paint herself like the

In [11]:
## bos token for llama-3
bos_token = "<|begin_of_text|>"


In [14]:
from llama_index.llms.ollama import Ollama
from llama_index.core.prompts import PromptTemplate

llm_model = 'llama3.1'
gpu_llm = Ollama(
            model=llm_model, 
            temperature=0, 
            request_timeout=600, 
            max_tokens=max_tokens
        )

In [21]:
gpu_llm.complete(prompt).text

'White.'

In [24]:
sampling_params = SamplingParams(temperature=0, top_k=1, max_tokens=200)
output_list = []
for prompt in prompt_list[:100]:
    # Format the prompt and generate the translation
    prompt = bos_token + prompt
    generated_text = gpu_llm.complete(prompt).text
    generated_text = generated_text.strip().replace("\n", " ")

    output_list.append(generated_text)
    
print("writing to %s" % output_folder)
with open(f'{output_folder}/output.txt', "w") as f:
    for output in output_list:
        f.write(output + "\n")

writing to /home/dudaji/Jun/npu-service-zoo/results/chatbot/coqa


In [25]:
from get_scores import *

prediction_file = "/home/dudaji/Jun/npu-service-zoo/results/chatbot/coqa/output.txt"
ground_truth_file = "/home/dudaji/Jun/npu-service-zoo/data/chatbot/coqa/dev.json"
print("-"*80)
print(prediction_file)
print(ground_truth_file)
evaluate_f1(ground_truth_file, prediction_file)

ModuleNotFoundError: No module named 'evaluation_utils'