In [1]:
import os

In [2]:
%pwd

'd:\\TextSummarizationProject\\End-to-end-Text-Summarizer-Project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\TextSummarizationProject\\End-to-end-Text-Summarizer-Project'

In [5]:
"""README > 3. Update entity """

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationEntity:
    root_dir: Path 
    data_path: Path 
    model_path: Path 
    tokenizer_path:Path
    metric_file_name: Path
    


In [6]:
"""README > 4. Update the configuration manager in src config """
from textSummarizer.constants import *
from textSummarizer.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__( self, config_filepath = FILE_PATH_CONFIG, params_filepath = FILE_PATH_PARAMS):
        
        #common.py dosyasındaki read_yaml() fonksiyonu ile yaml dosyasını okuyoruz.
        self.config = read_yaml(config_filepath) #Aslında Path("config/config.yaml")
        self.params = read_yaml(params_filepath) #Aslında Path("params.yaml")

        create_directories([self.config.artifacts_root]) #artifacts_root: artifacts. Bu method ile "artifacts" isimli folder otomatik olarak oluşturulur.
        #"." kullanarak çağırmayı "ConfigBox"a borçluyuz.

    def get_config_model_evaluation(self) -> ModelEvaluationEntity:

        config_model_evaluation = self.config.model_evaluation
        create_directories([config_model_evaluation.root_dir])
        return ModelEvaluationEntity(
            root_dir = config_model_evaluation.root_dir,
            data_path = config_model_evaluation.data_path,
            model_path = config_model_evaluation.model_path,
            tokenizer_path = config_model_evaluation.tokenizer_path,
            metric_file_name = config_model_evaluation.metric_file_name
        )
        
        

In [7]:
from transformers import  AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset, load_from_disk, load_metric
import torch
import pandas as pd
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


[2024-05-24 09:54:06,801: INFO: config: PyTorch version 2.3.0 available.]


In [10]:
"""README > 5. Update components """


class ModelEvaluation:
    def __init__(self, config: ModelEvaluationEntity):
        self.config = config

    def generate_batches_from_the_list(self, list_elements, batch_size):
        for i in range(0,len(list_elements), batch_size):
            yield list_elements[i:i+batch_size]

    def calculate_metrics(self, dataset, metric, model, tokenizer, batch_size = 16, device = "cpu",
                      column_text="article",
                      column_summary="highlights"):
        article_batches = list(self.generate_batches_from_the_list(dataset[column_text], batch_size))
        target_batches = list(self.generate_batches_from_the_list(dataset[column_summary], batch_size))

        for article_batch, target_batch in tqdm(zip(article_batches, target_batches), total=len(article_batches)):
            inputs = tokenizer(article_batch, max_length=1024, truncation=True, padding="max_length", return_tensors = "pt")
            summaries = model.generate(input_ids = inputs["input_ids"].to(device), attention_mask=inputs["attention_mask"].to(device), 
                                       length_penalty=0.8, num_beams=8, max_length=128)

            decoded_summaries=[tokenizer.decode(s,skip_special_tokens=True, clean_up_tokenization_spaces=True) for s in summaries]

            decoded_summaries = [d.replace("", " ") for d in decoded_summaries]

            metric.add_batch(predictions=decoded_summaries, references=target_batch)

        #  Finally compute and return the ROUGE scores.
        score = metric.compute()
        return score
    
    def evaluate_model(self):
        device = "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)

        dataset_samsum_pt = load_from_disk(self.config.data_path)

        rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
        rouge_metric = load_metric('rouge')

        score = self.calculate_metrics(
        dataset_samsum_pt['test'][0:10], rouge_metric, model_pegasus, tokenizer, batch_size = 2, column_text = 'dialogue', column_summary= 'summary')
        print(score)

        rouge_dict = dict((rn, score[rn].mid.fmeasure ) for rn in rouge_names )

        df = pd.DataFrame(rouge_dict, index = ['pegasus'] )
        df.to_csv(self.config.metric_file_name, index=False)

In [11]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_config_model_evaluation()
    model_evaluation_config = ModelEvaluation(config=model_evaluation_config)
    model_evaluation_config.evaluate_model()
except Exception as e:
    raise e

[2024-05-24 10:07:16,211: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-24 10:07:16,214: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-24 10:07:16,215: INFO: common: Created directory at artifacts]
[2024-05-24 10:07:16,216: INFO: common: Created directory at artifacts/model_evaluation]


You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
100%|██████████| 5/5 [03:06<00:00, 37.24s/it]


[2024-05-24 10:13:22,577: INFO: rouge_scorer: Using default tokenizer.]
{'rouge1': AggregateScore(low=Score(precision=0.00723041015030432, recall=0.0298759309309762, fmeasure=0.011427972548075122), mid=Score(precision=0.014122995163645909, recall=0.06381940717873272, fmeasure=0.022634826399746552), high=Score(precision=0.021228754013506795, recall=0.094997947454844, fmeasure=0.033989252766741314)), 'rouge2': AggregateScore(low=Score(precision=0.0, recall=0.0, fmeasure=0.0), mid=Score(precision=0.0, recall=0.0, fmeasure=0.0), high=Score(precision=0.0, recall=0.0, fmeasure=0.0)), 'rougeL': AggregateScore(low=Score(precision=0.0075411557434813285, recall=0.03476371801094557, fmeasure=0.012258534600444706), mid=Score(precision=0.01396279513981264, recall=0.0635042523108091, fmeasure=0.02237212401492758), high=Score(precision=0.02103281766694625, recall=0.094194957622605, fmeasure=0.03325453766609232)), 'rougeLsum': AggregateScore(low=Score(precision=0.007420691833699572, recall=0.033699801