In [1]:
import os
from dataclasses import dataclass
from pathlib import Path
from textSummarizer.logging import logger
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
%pwd
os.chdir('../')

In [3]:
%pwd

'd:\\Data Science\\Text-summarizer'

In [4]:
@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file_name: Path

In [5]:
from textSummarizer.constants import CONFIG_FILE_PATH,PARAMS_FILE_PATH
from textSummarizer.utils.common import read_yaml_config,create_directorys

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml_config(config_filepath)
        self.params = read_yaml_config(params_filepath)

        create_directorys([self.config.artifacts_root])


    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directorys([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            metric_file_name = config.metric_file_name
           
        )

        return model_evaluation_config

In [7]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset, load_from_disk,load_metric
from tqdm import tqdm
import pandas as pd
import torch

  from .autonotebook import tqdm as notebook_tqdm


[2024-08-22 16:02:45,233: INFO: config: PyTorch version 2.3.1 available.]


In [8]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
    
    def generate_batch_sized_chunks(self,list_of_elements, batch_size):
        for i in range(0,len(list_of_elements), batch_size):
            yield list_of_elements[i:i+batch_size]
    
    def calculate_metrics(self,dataset,metric,model,tokenizer,batch_size=16,device='cuda' if torch.cuda.is_available() else 'cpu',column_text='artical',column_summary='highlights'):
        artical_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))
        target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary], batch_size))

        for artical_batches, target_batches in tqdm(zip(artical_batches,target_batches),total=len(artical_batches)):

            inputs = tokenizer(artical_batches, max_length=1024, truncation=True, padding='max_length',return_tensors='pt')

            summaries = model.generate(input_ids=inputs['input_ids'].to(device), attention_mask=inputs['attention_mask'].to(device), max_length=128, num_beams=8,length_penalty=0.8)

            # decode the summaries
            # replace the token, and add the decoded texts with the references to metric
            decoded_summaries = [tokenizer.decode(summary, skip_special_tokens=True) for summary in summaries]
            decoded_summaries = [decoded_sum.replace('',' ') for decoded_sum in decoded_summaries]

            metric.add_batch(predictions=decoded_summaries, references=target_batches)

            # calculate the metric
            metric_score = metric.compute()
            return metric_score
        
    def evaluate_model(self):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        model = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)
        
        # loading dataset
        dataset_samsum_pt = load_from_disk(self.config.data_path)

        rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
        rough_metric = load_metric('rouge',trust_remote_code=True)

        score = self.calculate_metrics(dataset_samsum_pt['test'][0:10], rough_metric, model,tokenizer=tokenizer,batch_size=2,device=device,column_text = 'dialogue', column_summary= 'summary')
        
        rough_dic = dict((rn,score[rn].mid.fmeasure)for rn in rouge_names)
        df = pd.DataFrame(rough_dic,index=['pegasus'])
        df.to_csv(self.config.metric_file_name, index=False)


    


In [9]:
# pipeline
try:
    config = ConfigurationManager()
    data_evalution_config = config.get_model_evaluation_config()
    data_evalution = ModelEvaluation(config=data_evalution_config)
    data_evalution.evaluate_model()
except Exception as e:
    raise e

[2024-08-22 16:02:45,610: INFO: common: Successfully loaded configuration from config\config.yaml]
[2024-08-22 16:02:45,613: INFO: common: Successfully loaded configuration from params.yaml]
[2024-08-22 16:02:45,615: INFO: common: Created directory: artifacts]
[2024-08-22 16:02:45,616: INFO: common: Created directory: artifacts/model_evaluation]


  rough_metric = load_metric('rouge',trust_remote_code=True)
  0%|          | 0/5 [00:00<?, ?it/s]

[2024-08-22 16:06:12,804: INFO: rouge_scorer: Using default tokenizer.]


  0%|          | 0/5 [03:20<?, ?it/s]
