In [4]:
import os


In [5]:
os.chdir("../")

In [6]:
%pwd

'c:\\Users\\91885\\Desktop\\Text_summarization\\text-summarizer'

In [7]:
from dataclasses import dataclass
from pathlib import  Path


In [8]:
@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir : Path
    data_path : Path
    model_path : Path
    tokenizer_path : Path
    metric_file_name : Path

In [9]:
from textSummarizer.utils.common import read_yaml ,create_directories
from textSummarizer.constants import *

In [10]:
class ConfigurationManager:
    def __init__(self , param_filepath = PARAM_FILE_PATH, config_filepath = CONFIG_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.param = read_yaml(param_filepath)
        
        create_directories([self.config.artifacts_root])
        
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        
        create_directories([config.root_dir])
        
        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir ,
            data_path = config.data_path ,
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            metric_file_name = config.metric_file_name
        )
        
        return model_evaluation_config
            
        

In [11]:
from dataclasses import dataclass
from pathlib import Path
import logging
import torch
import pandas as pd
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_from_disk
from evaluate import load
from tqdm import tqdm

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file_name: Path

class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
        logging.basicConfig(
            level=logging.INFO,
            format='[%(asctime)s: %(levelname)s: %(module)s: %(message)s]'
        )
    
    def clean_text(self, text):
        return text.strip().replace("\n", " ").replace("\r", " ")
    
    def generate_batch_sized_chunks(self, list_of_elements, batch_size):
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i:i + batch_size]
            
    def calculate_metric_on_test_ds(self, dataset, metric, model, tokenizer, 
                                  batch_size=16, device="cuda" if torch.cuda.is_available() else "cpu", 
                                  column_text="dialogue", column_summary="summary"):
        
        article_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))
        target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary], batch_size))
        
        all_decoded_summaries = []
        all_target_summaries = []
        
        for article_batch, target_batch in tqdm(zip(article_batches, target_batches), total=len(article_batches)):
            inputs = tokenizer(
                article_batch,
                max_length=1024,
                truncation=True,
                padding="max_length",
                return_tensors="pt"
            ).to(device)
            
            with torch.no_grad():
                summaries = model.generate(
                    input_ids=inputs["input_ids"],
                    attention_mask=inputs["attention_mask"],
                    length_penalty=0.8,
                    num_beams=8,
                    max_length=128,
                    min_length=30,
                    early_stopping=True,
                    no_repeat_ngram_size=3
                )
            
            decoded_summaries = [
                self.clean_text(tokenizer.decode(s, skip_special_tokens=True))
                for s in summaries
            ]
            
            target_summaries = [self.clean_text(t) for t in target_batch]
            
            all_decoded_summaries.extend(decoded_summaries)
            all_target_summaries.extend(target_summaries)
        
        metric.add_batch(predictions=all_decoded_summaries, references=all_target_summaries)
        return metric.compute()
    
    def evaluate(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        
        # Load tokenizer and model
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        model = AutoModelForSeq2SeqLM.from_pretrained(
            self.config.model_path,
            forced_bos_token_id=0
        ).to(device)
        model.eval()
        
        # Load dataset and metric
        dataset_samsum_pt = load_from_disk(self.config.data_path)
        
        rouge_metric = load('rouge', use_stemmer=True)
        
        # Calculate scores
        score = self.calculate_metric_on_test_ds(
            dataset_samsum_pt['test'][0:10],
            rouge_metric,
            model,
            tokenizer,
            batch_size=2
        )
        
        # Process and save results
        rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
        rouge_dict = {rn: float(score[rn]) for rn in rouge_names}

        
        df = pd.DataFrame(rouge_dict, index=['pegasus'])
        df.to_csv(self.config.metric_file_name, index=False)
        
        logging.info(f"ROUGE Scores: {rouge_dict}")

def main():
    try:
        config = ConfigurationManager()
        model_evaluation_config = config.get_model_evaluation_config()
        model_evaluation = ModelEvaluation(config=model_evaluation_config)
        model_evaluation.evaluate()
    except Exception as e:
        logging.error(f"Error in model evaluation: {str(e)}")
        raise e

if __name__ == "__main__":
    main()


100%|██████████| 5/5 [01:21<00:00, 16.27s/it]
