In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import torch
import evaluate
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu
from vllm import LLM, SamplingParams
from peft import PeftModel
from transformers import BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM

In [None]:
full_data_test = pd.read_csv('../dataset/full_test_data_summarization.csv')

In [None]:
model_name = 'mistralai/Mistral-7B-Instruct-v0.2'

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    load_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',
    trust_remote_code=True,
    attn_implementation="flash_attention_2",
    quantization_config=bnb_config
)

In [None]:
checkpoint = './model_checkpoint/'
model_dir = './peft_model'

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_dir)

In [None]:
model = PeftModel(base_model, checkpoint)
model = model.merge_and_unload()

In [None]:
model.save_pretrained(model_dir, safe_serialization=True)
tokenizer.save_pretrained(model_dir)

In [None]:
llm = LLM(model='./peft_model')

In [None]:
llm.get_tokenizer()

In [None]:
sample_params = SamplingParams(max_tokens=1024,
                               temperature=0.7,
                               top_p=0.9,
                               repetition_penalty=1.2,
                               skip_special_tokens=True)

In [None]:
template = """<s>[INST] Bạn là một trợ lý AI. Bạn sẽ được giao một nhiệm vụ. Hãy tóm lược ngắn gọn nội dung sau bằng tiếng Việt:
{} [/INST] """

In [None]:
def create_prompt(sample):
    prompt = template.format(sample)
    return prompt

In [None]:
full_prompt = []
for context in full_data_test['context'].tolist():
    prompt = create_prompt(context)
    full_prompt.append(prompt)

In [None]:
outputs = llm.generate(full_prompt, sample_params)

In [None]:
summarized_predictions = []
for summarization in outputs:
    summarized_predictions.append(summarization.outputs[0].text)

In [None]:
full_data_test['summarization_predictions'] = summarized_predictions

In [None]:
rouge_metric = evaluate.load("rouge")
rouge_scores = rouge_metric.compute(references=full_data_test['summarization'].tolist(), predictions=full_data_test['summarization_predictions'].tolist())

In [None]:
rouge_scores