In [None]:
import warnings
warnings.filterwarnings('ignore')

In [ ]:
import torch
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu
from vllm import LLM, SamplingParams
from peft import PeftModel
from transformers import BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM

In [ ]:
full_data_test = pd.read_csv('../dataset/full_test_data_summarization.csv')

In [ ]:
model_name = 'mistralai/Mistral-7B-Instruct-v0.2'

In [ ]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    load_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [ ]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',
    trust_remote_code=True,
    attn_implementation="flash_attention_2",
    quantization_config=bnb_config
)

In [ ]:
checkpoint = './model_checkpoint/'
model_dir = './peft_model'

In [ ]:
tokenizer = AutoTokenizer.from_pretrained(model_dir)

In [ ]:
model = PeftModel(base_model, checkpoint)
model = model.merge_and_unload()

In [ ]:
model.save_pretrained(model_dir, safe_serialization=True)
tokenizer.save_pretrained(model_dir)

In [ ]:
llm = LLM(model='./peft_model')

In [ ]:
llm.get_tokenizer()

In [ ]:
sample_params = SamplingParams(max_tokens=1024,
                               temperature=0.7,
                               top_p=0.9,
                               repetition_penalty=1.2,
                               skip_special_tokens=True)

In [ ]:
template = """<s>[INST] Bạn là một trợ lý AI. Bạn sẽ được giao một nhiệm vụ. Hãy tóm lược ngắn gọn nội dung sau bằng tiếng Việt:
{} [/INST] """

In [ ]:
def create_prompt(sample):
    prompt = template.format(sample)
    return prompt

In [ ]:
full_prompt = []
for context in full_data_test['context'].tolist():
    prompt = create_prompt(context)
    full_prompt.append(prompt)

In [ ]:
outputs = llm.generate(full_prompt, sample_params)

In [ ]:
summarized_predictions = []
for summarization in outputs:
    summarized_predictions.append(summarization.outputs[0].text)

In [ ]:
full_data_test['summarization_predictions'] = summarized_predictions

In [ ]:
bleu_scores_ngram_1 = []
bleu_scores_ngram_2 = []
bleu_scores_ngram_3 = []
bleu_scores_ngram_4 = []
bleu_scores_ngram_avg = []
for i, row in full_data_test.iterrows():
    bleu_score_ngram_1 = sentence_bleu([row['context']], hypothesis=row['summarization_predictions'], weights=(1, 0, 0, 0))
    bleu_score_ngram_2 = sentence_bleu([row['context']], hypothesis=row['summarization_predictions'], weights=(0, 1, 0, 0))
    bleu_score_ngram_3 = sentence_bleu([row['context']], hypothesis=row['summarization_predictions'], weights=(0, 0, 1, 0))
    bleu_score_ngram_4 = sentence_bleu([row['context']], hypothesis=row['summarization_predictions'], weights=(0, 0, 0, 1))
    bleu_score_ngram_avg = sentence_bleu([row['context']], hypothesis=row['summarization_predictions'], weights=(0.25, 0.25, 0.25, 0.25))
    bleu_scores_ngram_1.append(bleu_score_ngram_1)
    bleu_scores_ngram_2.append(bleu_score_ngram_2)
    bleu_scores_ngram_3.append(bleu_score_ngram_3)
    bleu_scores_ngram_4.append(bleu_score_ngram_4)
    bleu_scores_ngram_avg.append(bleu_score_ngram_avg)
bleu_scores = {
    'bleu@1': sum(bleu_scores_ngram_1) / len(bleu_scores_ngram_1),
    'bleu@2': sum(bleu_scores_ngram_2) / len(bleu_scores_ngram_2),
    'bleu@3': sum(bleu_scores_ngram_3) / len(bleu_scores_ngram_3),
    'bleu@4': sum(bleu_scores_ngram_4) / len(bleu_scores_ngram_4),
    'bleu@avg': sum(bleu_scores_ngram_avg) / len(bleu_scores_ngram_avg)
}

In [ ]:
bleu_scores