In [3]:
import os

In [4]:
%pwd

'd:\\A_Category\\iNeuron\\End-To-End-NLP-Project-News-Article-Sorting\\research'

In [5]:
os.chdir("../")

In [6]:
%pwd

'd:\\A_Category\\iNeuron\\End-To-End-NLP-Project-News-Article-Sorting'

In [7]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen= True)
class ModelEvaluationConfig:
    root_dir: Path
    train_data_path:Path
    test_data_path: Path
    model_path:  Path
    tokenizer_path: Path
    metric_file_name: Path


In [8]:
from ArticleSorting.constants import *
from ArticleSorting.utils.common import read_yaml, create_directories

In [9]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
      
        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            metric_file_name = config.metric_file_name

        )
        return model_evaluation_config


In [12]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset, load_from_disk, load_metric
import torch
import pandas as pd
from tqdm import tqdm
import evaluate

In [11]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig) :
        self.config = config
        
    #def evaluate():
     #   accuracy = evaluate.load("accuracy")

    def evaluate(model, test_data):

        test = Dataset(test_data)

        test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)

        use_cuda = torch.cuda.is_available()
        device = torch.device("cuda" if use_cuda else "cpu")

        if use_cuda:

            model = model.cuda()

            
        # Tracking variables
        predictions_labels = []
        true_labels = []
        
        total_acc_test = 0
        with torch.no_grad():
            for test_input, test_label in test_dataloader:

                test_label = test_label.to(device)
                mask = test_input['attention_mask'].to(device)
                input_id = test_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)

                acc = (output.argmax(dim=1) == test_label).sum().item()
                total_acc_test += acc
                
                # add original labels
                true_labels += test_label.cpu().numpy().flatten().tolist()
                # get predicitons to list
                predictions_labels += output.argmax(dim=1).cpu().numpy().flatten().tolist()
        
        print(f'Test Accuracy: {total_acc_test / len(test_data): .3f}')
        return true_labels, predictions_labels
        
true_labels, pred_labels = evaluate(model, df_test)
