In [2]:
import os

In [3]:
%pwd

'd:\\A_Category\\iNeuron\\End-To-End-NLP-Project-News-Article-Sorting\\research'

In [4]:
os.chdir("../")

In [5]:
%pwd

'd:\\A_Category\\iNeuron\\End-To-End-NLP-Project-News-Article-Sorting'

In [6]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen= True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path:Path
    model_path:  Path
    tokenizer_path: Path
    metric_file_name: Path


In [7]:
from ArticleSorting.constants import *
from ArticleSorting.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
      
        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            test_data_path = config.test_data_path,
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            metric_file_name = config.metric_file_name

        )
        return model_evaluation_config


In [27]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_from_disk, load_metric
import torch
import pandas as pd
from tqdm import tqdm
import evaluate
from torch.utils.data import DataLoader
import numpy as np

In [32]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig) :
        self.config = config
        
    #def evaluate():
     #   accuracy = evaluate.load("accuracy")

    def evaluate(self):

        device = "cuda" if torch.cuda.is_available() else "cpu"
        kwargs = {'num_workers': 1, 'pin_memory': True} if device=='cuda' else {}
        torch.cuda.empty_cache() # Empty cache

        # Loading data
        test_dataset = load_from_disk(self.config.test_data_path)
        #print(test_dataset)

        # DataLoader
        test_dataloader = DataLoader(dataset=test_dataset, shuffle=True, batch_size=4, **kwargs)

    
        #Loading the model 
        model = AutoModelForSequenceClassification.from_pretrained(self.config.model_path).to(device)


        '''
        with torch.no_grad():
            for test_input, test_label in enumerate(test_dataloader):
                test_label = test_label.to(device)
                mask = test_input['attention_mask'].to(device)
                input_id = test_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)

                acc = (output.argmax(dim=1) == test_label).sum().item()
                total_acc_test += acc
                
                # add original labels
                true_labels += test_label.cpu().numpy().flatten().tolist()
                # get predicitons to list
                predictions_labels += output.argmax(dim=1).cpu().numpy().flatten().tolist()
        
        print(f'Test Accuracy: {total_acc_test / len(test_dataset): .3f}')
        return true_labels, predictions_labels
         '''
        final_output = []

        for b_idx, data in enumerate(test_dataloader):
            with torch.no_grad():
                for key, value in data.items():
                    data[key] = value.to(device)
                output = model(**data)
                output = output.logits.detach().cpu().numpy()
                final_output.extend(output)
        
        torch.cuda.empty_cache()
        preds = np.vstack(final_output)
       
        print(preds)






In [33]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config= model_evaluation_config)
    model_evaluation.evaluate()

except Exception as e:
    raise e

[2023-11-22 21:53:25,068:  INFO: common: yaml file:config\config.yaml loaded successfully]
[2023-11-22 21:53:25,072:  INFO: common: yaml file:params.yaml loaded successfully]
[2023-11-22 21:53:25,074:  INFO: common: created directory at : artifacts]
[2023-11-22 21:53:25,078:  INFO: common: created directory at : artifacts/model_evaluation]


Dataset({
    features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 149
})
0
{'labels': tensor([2, 3, 4, 2], device='cuda:0'), 'input_ids': tensor([[  101,  4419,  4491,  ...,  2002,  2056,   102],
        [  101,  9106,  2003,  ...,     0,     0,     0],
        [  101,  4773,  2557,  ...,  2279,  2502,   102],
        [  101, 23094,  3940,  ...,     0,     0,     0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 0, 0, 0]])}
0
{'labels': tensor([2, 3, 4, 2], device='cuda:0'), 'input_ids': tensor([[  101,  4419,  4491,  ...,  2002,  2056,   102],
        [  101,  9106,  2003,  ...,     0,     0,     0],
        [  101,  4773,  2557,  ...,  2279,  2502,   102],
        [  101, 23094,  3940,  ...,     0,  