In [1]:
import os

In [2]:
%pwd

'c:\\Users\\Vincent\\Desktop\\text-classification-Trials\\notebook'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\Vincent\\Desktop\\text-classification-Trials'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
  root_dir: Path
  model_data_path: Path
  tokenizer_data_path: Path

In [6]:
from src.constants import *
from src.utils.common import read_yaml, create_directories

In [7]:
## Update the configuration manager in src config

class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH):
        
        self.config = read_yaml(config_filepath)

        create_directories([self.config.output_root])
    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            model_data_path=config.model_data_path,
            tokenizer_data_path=config.tokenizer_data_path
        )

        return model_evaluation_config

In [8]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import pandas as pd
from transformers import TFAutoModelForSequenceClassification
import os
import sys
from src.utils.common import load_tokenizer
from src.exception import CustomException
from src import logger

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
## 5. Update the components

class ModelEvaluation:
    def __init__(self, config:ModelEvaluationConfig):
        self.config=config


    def evaluate_model(self):
        '''
        This function is responsible for testing model on unseen datasets
        '''
        try:
            tokenizer = load_tokenizer(self.config.tokenizer_data_path)

            model = TFAutoModelForSequenceClassification.from_pretrained(self.config.model_data_path)
            
            text = "Pop star to start fashion company"
            logger.info("Text: {}".format(text))
            inputs = tokenizer(text, return_tensors="tf")
            output = model(inputs)
            pred_prob = tf.nn.softmax(output.logits, axis=-1)
            logger.info("{}".format(pred_prob))
            labels = ['entertainment', 'science/tech', 'business', 'health']
            logger.info("Predicted Label: {}".format(labels[np.argmax(pred_prob)]))

            text = "Revolutionary methods for discovering new materials"
            logger.info("Text: {}".format(text))
            inputs = tokenizer(text, return_tensors="tf")
            output = model(inputs)
            pred_prob = tf.nn.softmax(output.logits, axis=-1)
            logger.info("{}".format(pred_prob))
            labels = ['entertainment', 'science/tech', 'business', 'health']
            logger.info("Predicted Label: {}".format(labels[np.argmax(pred_prob)]))

            text = "Rebranded bank will target global growth"
            logger.info("Text: {}".format(text))
            inputs = tokenizer(text, return_tensors="tf")
            output = model(inputs)
            pred_prob = tf.nn.softmax(output.logits, axis=-1)
            logger.info("{}".format(pred_prob))
            labels = ['entertainment', 'science/tech', 'business', 'health']
            logger.info("Predicted Label: {}".format(labels[np.argmax(pred_prob)]))

            text = "A new sustainable vaccination against Ebola developed."
            logger.info("Text: {}".format(text))
            inputs = tokenizer(text, return_tensors="tf")
            output = model(inputs)
            pred_prob = tf.nn.softmax(output.logits, axis=-1)
            logger.info("{}".format(pred_prob))
            labels = ['entertainment', 'science/tech', 'business', 'health']
            logger.info("Predicted Label: {}".format(labels[np.argmax(pred_prob)]))

        except Exception as e:
            raise CustomException(e,sys)

In [10]:
## 6. Update the pipeline

try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.evaluate_model()
except Exception as e:
  raise e

[2024-07-15 11:41:17,192: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-07-15 11:41:17,192: INFO: common: created directory at: output]
[2024-07-15 11:41:17,192: INFO: common: created directory at: output/model_evaluation]


All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at output/finetuned_model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


[2024-07-15 11:41:19,179: INFO: 1191191246: Text: Pop star to start fashion company]
[2024-07-15 11:41:19,357: INFO: 1191191246: [[0.8871288  0.03015455 0.06201571 0.02070092]]]
[2024-07-15 11:41:19,357: INFO: 1191191246: Labels: entertainment]
[2024-07-15 11:41:19,357: INFO: 1191191246: Text: Revolutionary methods for discovering new materials]
[2024-07-15 11:41:19,458: INFO: 1191191246: [[0.09031415 0.36010823 0.19604594 0.35353172]]]
[2024-07-15 11:41:19,458: INFO: 1191191246: Labels: science/tech]
[2024-07-15 11:41:19,458: INFO: 1191191246: Text: Rebranded bank will target global growth]
[2024-07-15 11:41:19,557: INFO: 1191191246: [[0.00365722 0.01373794 0.97854096 0.00406382]]]
[2024-07-15 11:41:19,557: INFO: 1191191246: Labels: business]
[2024-07-15 11:41:19,557: INFO: 1191191246: Text: A new sustainable vaccination against Ebola developed.]
[2024-07-15 11:41:19,674: INFO: 1191191246: [[0.00118971 0.00141545 0.00420589 0.993189  ]]]
[2024-07-15 11:41:19,674: INFO: 1191191246: Lab