In [None]:
#entity

In [None]:
from dataclasses import dataclass
from pathlib import Path
@dataclass
class ModelEvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file: Path

config manager : 

In [None]:
from src.Text_summarizer.utils.common import read_yaml, create_directories
from  src.Text_summarizer.constants import *
from src.Text_summarizer.entity import ModelEvaluationConfig

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        # root directory    
        create_directories([config.root_dir]) 
        # return data ingestion config
        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            data_path = config.data_path,
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            metric_file = config.metric_file
        )
        return model_evaluation_config

components : 

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_from_disk 
import torch
from tqdm import tqdm
import os

from evaluate import load


In [None]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

def giveme_batch_sized_chunks(list_of_elements, batch_size):
    """
    Yield successive batch-sized chunks from list_of_
    from list_of_elements.
    """
    for i in range(0, len(list_of_elements), batch_size):
        yield list_of_elements[i:i + batch_size]

def calculate_metric_on_test_ds(dataset, metric, model, tokenizer,
                               batch_size=16, device="cuda", # changed to use string literal "cuda"
                               column_text="article",
                               column_summary="highlights"):
    article_batches = list(giveme_batch_sized_chunks(dataset[column_text], batch_size))
    target_batches = list(giveme_batch_sized_chunks(dataset[column_summary], batch_size))

    for article_batch, target_batch in tqdm(
        zip(article_batches, target_batches), total=len(article_batches)):

        inputs = tokenizer(article_batch, max_length=512,  # Reduced max_length
                        truncation=True,
                        padding="max_length", return_tensors="pt")

        summaries = model.generate(input_ids=inputs["input_ids"].to(device),
                         attention_mask=inputs["attention_mask"].to(device),
                         length_penalty=0.8, num_beams=4,  # Reduced num_beams
                         max_length=128)
        ''' parameter for length penalty ensures that the model does not generate sequences that are too long. '''

        # Finally, we decode the generated texts,
        # replace the  token, and add the decoded texts with the references to the metric.
        decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,
                                clean_up_tokenization_spaces=True)
               for s in summaries]

        decoded_summaries = [d.replace("", " ") for d in decoded_summaries]

        metric.add_batch(predictions=decoded_summaries, references=target_batch)

        # compute score

    score = metric.compute()

    return score

In [None]:
def evaluate(self):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
    model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)
    dataset_samsum_pt = load_from_disk(self.config.data_path)
    rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
    rouge_metric = load('rouge')

    score = self.calculate_metric_on_test_ds(
    dataset_samsum['test'][0:15], rouge_metric, trainer.model, tokenizer, batch_size = 1, column_text = 'dialogue', column_summary= 'summary')

    rouge_dict = dict((rn, score[rn]) for rn in rouge_names)

    pd.DataFrame(rouge_dict, index = [f'pegasus'] )

    df.to_csv(self.config.metric_file, index=False)


main : 

In [None]:
# evaluate 
try : 
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation_config = ModelEvaluation(config=model_evaluation_config)
    model_evaluation_config.evaluate()
except Exception as e:
    raise e