In [2]:
import os

In [3]:
%pwd
os.chdir('../')

In [9]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
  root_dir: Path
  data_path: Path
  model_ckpt: Path
  tokenizer_path: Path
  metric_file_name: Path

In [6]:
from src.textSummarizer.constant import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from src.textSummarizer.utils.common import read_yaml, create_directories

In [10]:
class ConfigurationManager:
    def __init__(self, 
                 config_file_path = CONFIG_FILE_PATH, 
                 params_file_path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)
        
        create_directories([self.config.artifacts_root])
    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        create_directories([config.root_dir])
        
        return ModelEvaluationConfig(
            root_dir = Path(config.root_dir),
            data_path = Path(config.data_path),
            model_ckpt = Path(config.model_ckpt),
            tokenizer_path = Path(config.tokenizer_path),
            metric_file_name = Path(config.metric_file_name)
        )

In [20]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset, load_from_disk
import torch
import pandas as pd
from tqdm import tqdm
import evaluate
from rouge_score import rouge_scorer, scoring

In [25]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
    
    
    def generate_batch_size_chunks(self, list_of_elements, batch_size):
        "split the dataset into smaller batches that we can process simultaneously"
        
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i: i + batch_size]
            
        
    def calculate_metric_on_test_ds(self, 
                                    dataset, 
                                    metric, 
                                    model, 
                                    tokenizer, 
                                    device,
                                    batch_size=16, 
                                    column_text = 'article',
                                    column_summary = 'highlights'):
        article_batches = list(self.generate_batch_size_chunks(dataset[column_text], batch_size))
        target_batches = list(self.generate_batch_size_chunks(dataset[column_summary], batch_size))
        
        
        for article_batch, target_batch in tqdm(
            zip(article_batches, target_batches), total=len(articles_batches)):
            
            inputs = tokenizer(article_batch, 
                               max_length = 1024, 
                               truncation = True, 
                               padding='max_length', 
                               return_tensors = 'pt')
            
            summaries = model.generate(input_ids = inputs['input_ids'].to(device),
                                       attention_mask = inputs['attention_maks'].to(device),
                                       length_penalty = 0.8,
                                       num_beams = 8,
                                       max_length = 128)
            
            decoded_summaries = [tokenizer.decode(s, skip_special_tokens = True, clean_up_tokenization_spaces = True,) for s in summaries]
            
            
            decoded_summaries = [d.replace("", " ") for d in decoded_summaries]
            
            metric.add_batch(predictions = decoded_summaries, refrences = target_batch)
            
        score = metric.compute()
        return score
    
    def evaluate(self):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        tokeinizer = AutoTokenizer.from_pretrained(self.config.tokeinizer_path)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)
        
        dataset_samsum_pt = load_from_disk(self.config.data_path)
        
        rogue_names = ['rogue1', 'rogue2', 'rogueL', 'rogueLSum']
        
        rogue_metric = evaluate.load('rouge')
        
        score = self.calculate_metric_on_test_ds(dataset = dataset_samsum_pt['test'], 
                                                 metric = rogue_metric,
                                                 model = model_pegasus,
                                                 tokeinizer = tokeinizer,
                                                 device = device,
                                                 column_text = 'dialogue',
                                                 column_summary = 'summary')
        
        rouge_dict = dict( (rn, score[rn].mid.fmeasure) for rn in rouge_names )

        df = pd.DataFrame(rouge_dict, index = ['pegasus'] )
        df.to_csv(self.config.metric_file_name, index=False)

In [None]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelTrainer(config=model_evaluation_config)
    model_evaluation.evaluate()
except Exception as e:
    logger.error(f"Error in model trainer: {str(e)}")