In [74]:
import os

In [75]:
%pwd

'd:\\A_Category\\iNeuron\\End-To-End-NLP-Project-News-Article-Sorting'

In [4]:
os.chdir("../")

In [5]:
%pwd

'd:\\A_Category\\iNeuron\\End-To-End-NLP-Project-News-Article-Sorting'

In [76]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen= True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path:Path
    model_path:  Path
    tokenizer_path: Path
    metric_file_name: Path


In [77]:
from ArticleSorting.constants import *
from ArticleSorting.utils.common import read_yaml, create_directories

In [78]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
      
        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            test_data_path = config.test_data_path,
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            metric_file_name = config.metric_file_name

        )
        return model_evaluation_config


In [85]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_from_disk, load_metric
import torch
import pandas as pd
from tqdm import tqdm
import evaluate
from torch.utils.data import DataLoader
import numpy as np
from transformers import pipeline

In [99]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig) :
        self.config = config
        
    
    def evaluate(self):

        device = "cuda" if torch.cuda.is_available() else "cpu"
        kwargs = {'num_workers': 1, 'pin_memory': True} if device=='cuda' else {}
        torch.cuda.empty_cache() # Empty cache

        # Loading data
        test_dataset = load_from_disk(self.config.test_data_path)
        #print(test_dataset)

        # DataLoader
        test_dataloader = DataLoader(dataset=test_dataset, shuffle=True, batch_size=4, **kwargs)

    
        #Loading the model 
        model = AutoModelForSequenceClassification.from_pretrained(self.config.model_path).to(device)


        final_output = []
        total_acc_test = 0
        for b_idx, data in enumerate(test_dataloader):
            with torch.no_grad():
                for key, value in data.items():
                    data[key] = value.to(device)
                output = model(**data)
                output = output.logits.detach().cpu().numpy()
                final_output.extend(output)
                
        
        preds = np.vstack(final_output)
        preds = np.argmax(preds, axis=1)
        total_acc_test += sum(1 if preds[i] == test_dataset["labels"][i] else 0 for i in range(len(test_dataset)))
        Test_Accuracy = total_acc_test / len(test_dataset)
        
        print(f'Predictions : {preds}')
        print(f'Labels : {test_dataset["labels"]}')
        print(f'Test Accuracy: {Test_Accuracy: .3f}')
        
        
        torch.cuda.empty_cache()

        
        df = pd.DataFrame([Test_Accuracy], index=['bert'])
        df.to_csv(self.config.metric_file_name, index=False)



'''
    #prediction code
    def predict(self,text):
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        

        pipe = pipeline("text-classification", model=self.config.model_path,tokenizer=tokenizer)

        print("Text: ")
        print(text)

        output = pipe(text)
        print("\nText Category:")
        print(output)

        return output

'''



In [100]:
'''
# Trying out the prediction using the model and tokenizer path
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config= model_evaluation_config)
    output = model_evaluation.predict("software watching while you work software that can not only monitor every keystroke and action performed at a pc but also be used as legally binding evidence of wrong-doing has been unveiled.  worries about cyber-crime and sabotage have prompted many employers to consider monitoring employees. the developers behind the system claim it is a break-through in the way data is monitored and stored. but privacy advocates are concerned by the invasive nature of such software.  the system is a joint venture between security firm 3ami and storage specialists bridgehead software. they have joined forces to create a system which can monitor computer activity  store it and retrieve disputed files within minutes. more and more firms are finding themselves in deep water as a result of data misuse")

except Exception as e:
    raise e

'''

[2023-11-25 22:16:39,375:  INFO: common: yaml file:config\config.yaml loaded successfully]
[2023-11-25 22:16:39,380:  INFO: common: yaml file:params.yaml loaded successfully]
[2023-11-25 22:16:39,383:  INFO: common: created directory at : artifacts]
[2023-11-25 22:16:39,387:  INFO: common: created directory at : artifacts/model_evaluation]


Text: 
software watching while you work software that can not only monitor every keystroke and action performed at a pc but also be used as legally binding evidence of wrong-doing has been unveiled.  worries about cyber-crime and sabotage have prompted many employers to consider monitoring employees. the developers behind the system claim it is a break-through in the way data is monitored and stored. but privacy advocates are concerned by the invasive nature of such software.  the system is a joint venture between security firm 3ami and storage specialists bridgehead software. they have joined forces to create a system which can monitor computer activity  store it and retrieve disputed files within minutes. more and more firms are finding themselves in deep water as a result of data misuse

Text Category:
[{'label': 'tech', 'score': 0.9918599724769592}]


In [81]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config= model_evaluation_config)
    model_evaluation.evaluate()

except Exception as e:
    raise e

[2023-11-23 22:21:28,917:  INFO: common: yaml file:config\config.yaml loaded successfully]
[2023-11-23 22:21:28,922:  INFO: common: yaml file:params.yaml loaded successfully]
[2023-11-23 22:21:28,925:  INFO: common: created directory at : artifacts]
[2023-11-23 22:21:28,932:  INFO: common: created directory at : artifacts/model_evaluation]


Predictions : [4 2 1 3 1 2 3 0 1 3 2 1 0 1 1 4 2 4 4 1 4 1 0 0 2 3 0 2 4 0 3 0 4 3 2 4 4
 1 3 1 0 1 0 3 2 1 2 2 1 4 4 1 0 3 3 1 1 2 0 0 1 3 1 2 0 3 0 4 2 3 0 3 4 0
 3 4 3 0 4 1 3 4 0 3 3 3 0 2 2 2 1 1 1 4 0 4 1 4 0 2 0 4 4 4 1 2 4 4 0 2 0
 0 2 1 0 1 0 3 3 2 2 1 4 3 3 2 3 0 3 1 0 2 3 1 3 2 4 0 1 0 3 3 0 0 3 0 1 4
 2]
Labels : tensor([0, 2, 0, 1, 3, 1, 1, 2, 0, 3, 1, 0, 3, 0, 3, 0, 2, 4, 3, 1, 0, 1, 2, 0,
        1, 4, 3, 1, 2, 3, 2, 0, 2, 3, 2, 3, 0, 3, 0, 3, 3, 0, 3, 1, 0, 1, 2, 1,
        3, 2, 2, 1, 1, 4, 3, 1, 4, 4, 0, 4, 4, 1, 3, 1, 0, 2, 3, 4, 0, 1, 1, 0,
        0, 1, 1, 4, 4, 0, 4, 2, 2, 4, 1, 0, 1, 1, 4, 0, 2, 1, 3, 1, 4, 1, 2, 2,
        0, 3, 0, 2, 0, 2, 0, 4, 2, 0, 3, 0, 3, 0, 1, 3, 1, 2, 2, 4, 0, 4, 1, 3,
        4, 0, 0, 3, 0, 4, 0, 4, 1, 2, 4, 3, 3, 4, 4, 2, 3, 2, 3, 3, 0, 4, 3, 4,
        3, 4, 2, 4, 1])
Test Accuracy:  0.228


In [83]:
torch.cuda.empty_cache()

In [84]:
from transformers import pipeline
