In [2]:
import os

In [3]:
%pwd

'd:\\A_Category\\iNeuron\\End-To-End-NLP-Project-News-Article-Sorting\\research'

In [4]:
os.chdir("../")

In [5]:
%pwd

'd:\\A_Category\\iNeuron\\End-To-End-NLP-Project-News-Article-Sorting'

In [6]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen= True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path:Path
    model_path:  Path
    tokenizer_path: Path
    metric_file_name: Path


In [7]:
from ArticleSorting.constants import *
from ArticleSorting.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
      
        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            test_data_path = config.test_data_path,
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            metric_file_name = config.metric_file_name

        )
        return model_evaluation_config


In [27]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_from_disk, load_metric
import torch
import pandas as pd
from tqdm import tqdm
import evaluate
from torch.utils.data import DataLoader
import numpy as np

In [69]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig) :
        self.config = config
        
    
    def evaluate(self):

        device = "cuda" if torch.cuda.is_available() else "cpu"
        kwargs = {'num_workers': 1, 'pin_memory': True} if device=='cuda' else {}
        torch.cuda.empty_cache() # Empty cache

        # Loading data
        test_dataset = load_from_disk(self.config.test_data_path)
        #print(test_dataset)

        # DataLoader
        test_dataloader = DataLoader(dataset=test_dataset, shuffle=True, batch_size=4, **kwargs)

    
        #Loading the model 
        model = AutoModelForSequenceClassification.from_pretrained(self.config.model_path).to(device)


        final_output = []
        total_acc_test = 0
        for b_idx, data in enumerate(test_dataloader):
            with torch.no_grad():
                for key, value in data.items():
                    data[key] = value.to(device)
                output = model(**data)
                output = output.logits.detach().cpu().numpy()
                final_output.extend(output)
                
                #output = np.argmax(output, axis=1)
                #acc = 1 if output == data['labels'] else 0
                #total_acc_test += acc


        
        preds = np.vstack(final_output)
        preds = np.argmax(preds, axis=1)
        for i in range(len(test_dataset)):
            if (preds[i] == test_dataset["labels"][i]):
                acc = 1 
            else:
                acc = 0
            total_acc_test += acc

        Test_Accuracy = total_acc_test / len(test_dataset)
        print(f'Predictions : {preds}')
        print(f'Labels : {test_dataset["labels"]}')
        
        print(f'Test Accuracy: {Test_Accuracy: .3f}')
        torch.cuda.empty_cache()

        
        df = pd.DataFrame([Test_Accuracy], index=['bert'])
        df.to_csv(self.config.metric_file_name, index=False)



In [70]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config= model_evaluation_config)
    model_evaluation.evaluate()

except Exception as e:
    raise e

[2023-11-23 21:13:34,710:  INFO: common: yaml file:config\config.yaml loaded successfully]
[2023-11-23 21:13:34,776:  INFO: common: yaml file:params.yaml loaded successfully]
[2023-11-23 21:13:34,788:  INFO: common: created directory at : artifacts]
[2023-11-23 21:13:34,807:  INFO: common: created directory at : artifacts/model_evaluation]
Predictions : [1 1 1 4 0 1 4 3 0 3 0 1 1 0 1 1 2 4 3 3 4 1 0 3 0 1 3 1 1 4 1 3 3 1 4 0 2
 2 4 1 4 2 2 0 3 1 4 3 3 0 1 4 3 1 2 0 0 4 1 4 0 3 1 2 3 1 0 1 4 0 0 2 2 1
 0 4 0 0 2 3 2 4 2 4 1 0 0 4 0 2 3 1 4 3 2 0 0 0 2 3 4 0 3 0 4 4 4 3 3 0 1
 3 1 3 2 2 1 2 2 4 0 2 3 3 3 0 2 1 3 3 2 2 0 2 4 0 4 4 1 0 3 1 0 1 4 3 4 2
 3]
Labels : tensor([0, 2, 0, 1, 3, 1, 1, 2, 0, 3, 1, 0, 3, 0, 3, 0, 2, 4, 3, 1, 0, 1, 2, 0,
        1, 4, 3, 1, 2, 3, 2, 0, 2, 3, 2, 3, 0, 3, 0, 3, 3, 0, 3, 1, 0, 1, 2, 1,
        3, 2, 2, 1, 1, 4, 3, 1, 4, 4, 0, 4, 4, 1, 3, 1, 0, 2, 3, 4, 0, 1, 1, 0,
        0, 1, 1, 4, 4, 0, 4, 2, 2, 4, 1, 0, 1, 1, 4, 0, 2, 1, 3, 1, 4, 1, 2, 2,
        0, 

ValueError: DataFrame constructor not properly called!

In [60]:
torch.cuda.empty_cache()