In [None]:
import json
from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast
import evaluate
import torch
from tqdm import tqdm  # Progress Bar 라이브러리

# 모델과 토크나이저 로드
model_path = "./kobart-finetuned-jeju"
model = BartForConditionalGeneration.from_pretrained(model_path)
tokenizer = PreTrainedTokenizerFast.from_pretrained(model_path)

# JSON 파일에서 평가 데이터 로드
with open("test_data_random.json", "r", encoding="utf-8-sig") as file:
    test_data = json.load(file)

# 입력과 참조 데이터 준비
inputs = [item['source'] for item in test_data]
references = [[item['target']] for item in test_data]  # 중첩 리스트 형태, 나중에 정답 여러 개인 데이터 사용 시 필요
 
# 모델 예측 생성 (배치 처리)
batch_size = 32  # 배치 크기 설정
predictions = []

print("Generating predictions...")
for i in tqdm(range(0, len(inputs), batch_size)):  # Progress Bar 추가
    batch_inputs = inputs[i:i + batch_size]
    inputs_encoded = tokenizer(batch_inputs, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        output_ids = model.generate(inputs_encoded["input_ids"])
    batch_predictions = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    predictions.extend(batch_predictions)

# BLEU 점수 계산
print("Calculating BLEU score...")
bleu = evaluate.load("sacrebleu")
results = bleu.compute(predictions=predictions, references=references)

# 결과 출력
print(f"BLEU score: {results['score']:.2f}")


Generating predictions...
  0%|          | 0/32 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|██████████| 32/32 [04:59<00:00,  9.37s/it]
Calculating BLEU score...
BLEU score: 59.38


In [None]:
import json
from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast
import evaluate
import torch
from tqdm import tqdm  # Progress Bar 라이브러리

# 모델과 토크나이저 로드
model_path = "./kobart-finetuned-kyeongsang"
model = BartForConditionalGeneration.from_pretrained(model_path)
tokenizer = PreTrainedTokenizerFast.from_pretrained(model_path)

# JSON 파일에서 평가 데이터 로드
with open("kyeongsang_data\kyeongsang_test_data_random.json", "r", encoding="utf-8-sig") as file:
    test_data = json.load(file)

# 입력과 참조 데이터 준비
inputs = [item['source'] for item in test_data]
references = [[item['target']] for item in test_data]  # 리스트의 리스트 형태

# 모델 예측 생성 (배치 처리)
batch_size = 32  # 배치 크기 설정
predictions = []

print("Generating predictions...")
for i in tqdm(range(0, len(inputs), batch_size)):  # Progress Bar 추가
    batch_inputs = inputs[i:i + batch_size]
    inputs_encoded = tokenizer(batch_inputs, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        output_ids = model.generate(inputs_encoded["input_ids"], max_length=250)
    batch_predictions = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    predictions.extend(batch_predictions)

# BLEU 점수 계산
print("Calculating BLEU score...")
bleu = evaluate.load("sacrebleu")
results = bleu.compute(predictions=predictions, references=references)

# 결과 출력
print(f"BLEU score: {results['score']:.2f}")

Generating predictions...
  0%|          | 0/32 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|██████████| 32/32 [05:10<00:00,  9.70s/it]
Calculating BLEU score...
BLEU score: 91.48
