In [2]:
import os

In [3]:
%pwd

'd:\\A_Category\\iNeuron\\End-To-End-NLP-Project-News-Article-Sorting\\research'

In [4]:
os.chdir("../")

In [5]:
%pwd

'd:\\A_Category\\iNeuron\\End-To-End-NLP-Project-News-Article-Sorting'

In [6]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen= True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path:Path
    model_path:  Path
    tokenizer_path: Path
    metric_file_name: Path


In [7]:
from ArticleSorting.constants import *
from ArticleSorting.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
      
        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            test_data_path = config.test_data_path,
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            metric_file_name = config.metric_file_name

        )
        return model_evaluation_config


In [27]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_from_disk, load_metric
import torch
import pandas as pd
from tqdm import tqdm
import evaluate
from torch.utils.data import DataLoader
import numpy as np

In [36]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig) :
        self.config = config
        
    
    def evaluate(self):

        device = "cuda" if torch.cuda.is_available() else "cpu"
        kwargs = {'num_workers': 1, 'pin_memory': True} if device=='cuda' else {}
        torch.cuda.empty_cache() # Empty cache

        # Loading data
        test_dataset = load_from_disk(self.config.test_data_path)
        #print(test_dataset)

        # DataLoader
        test_dataloader = DataLoader(dataset=test_dataset, shuffle=True, batch_size=4, **kwargs)

    
        #Loading the model 
        model = AutoModelForSequenceClassification.from_pretrained(self.config.model_path).to(device)


        final_output = []

        for b_idx, data in enumerate(test_dataloader):
            with torch.no_grad():
                for key, value in data.items():
                    data[key] = value.to(device)
                output = model(**data)
                output = output.logits.detach().cpu().numpy()
                final_output.extend(output)
        
        torch.cuda.empty_cache()
        preds = np.vstack(final_output)

        preds = np.argmax(preds, axis=1)
        acc = (preds == test_dataset['labels']).sum()
        Test_Accuracy = acc / len(test_dataset)
        print(preds)
        print(f'Test Accuracy: {Test_Accuracy: .3f}')


In [37]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config= model_evaluation_config)
    model_evaluation.evaluate()

except Exception as e:
    raise e

[2023-11-23 00:48:13,920:  INFO: common: yaml file:config\config.yaml loaded successfully]
[2023-11-23 00:48:13,927:  INFO: common: yaml file:params.yaml loaded successfully]
[2023-11-23 00:48:13,930:  INFO: common: created directory at : artifacts]
[2023-11-23 00:48:13,932:  INFO: common: created directory at : artifacts/model_evaluation]


AttributeError: 'bool' object has no attribute 'sum'