In [1]:
import os

In [2]:
%pwd

'c:\\Users\\Vincent\\Desktop\\Text-Data-Translation-Trials\\notebook'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\Vincent\\Desktop\\Text-Data-Translation-Trials'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    model_data_path: Path
    test_array_path: Path
    X_test_array_path: Path
    eng_tokenizer_data_path: Path
    results_data_path: Path

In [6]:
from src.constants import *
from src.utils.common import read_yaml, create_directories

In [7]:
## Update the configuration manager in src config

class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH):
        
        self.config = read_yaml(config_filepath)

        create_directories([self.config.output_root])
    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            model_data_path=config.model_data_path,
            test_array_path=config.test_array_path,
            X_test_array_path=config.X_test_array_path,
            eng_tokenizer_data_path=config.eng_tokenizer_data_path,
            results_data_path=config.results_data_path
        )

        return model_evaluation_config

In [8]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import pandas as pd
from transformers import TFAutoModelForSequenceClassification
import os
import sys
from src.utils.common import load_tokenizer, load_object, get_word
from src.exception import CustomException
from src import logger

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
## 5. Update the components

class ModelEvaluation:
    def __init__(self, config:ModelEvaluationConfig):
        self.config=config


    def evaluate_model(self):
        '''
        This function is responsible for testing model on unseen datasets
        '''
        try:
            logger.info("Loading model, tokenizer and test dataset")
            model = load_object(self.config.model_data_path)

            test = np.load(self.config.test_array_path)
            
            X_test = np.load(self.config.X_test_array_path)

            eng_tokenizer = load_tokenizer(self.config.eng_tokenizer_data_path)

            logger.info("Testing Translation Prediction")
            preds = np.argmax(model.predict(X_test), axis=-1)

            preds_text = []
            for i in preds:
                temp = []
                for j in range(len(i)):
                    t = get_word(i[j], eng_tokenizer)
                    if j > 0:
                        if (t == get_word(i[j-1], eng_tokenizer)) or (t == None):
                            temp.append('')
                        else:
                            temp.append(t)
                    else:
                        if(t == None):
                            temp.append('')
                        else:
                            temp.append(t) 
            
                preds_text.append(' '.join(temp))

            pred_fra2eng = pd.DataFrame({'actual_fra' : test[:,1], 'actual_eng' : test[:,0], 'predicted_eng' : preds_text})

            pred_fra2eng.to_csv(self.config.results_data_path,index=False,header=True)

            logger.info(display(pred_fra2eng.head(10)))

            logger.info("Model evaluation is completed")

            return pred_fra2eng
        
        except Exception as e:
            raise CustomException(e,sys)        

In [10]:
## 6. Update the pipeline

try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.evaluate_model()
except Exception as e:
  raise e

[2024-07-16 19:59:28,047: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-07-16 19:59:28,050: INFO: common: created directory at: output]
[2024-07-16 19:59:28,051: INFO: common: created directory at: output/model_evaluation]
[2024-07-16 19:59:28,052: INFO: 3121909552: Loading model, tokenizer and test dataset]
[2024-07-16 19:59:35,707: INFO: 3121909552: Testing Translation Prediction]


Unnamed: 0,actual_fra,actual_eng,predicted_eng
0,je suis contente de lentendre,im glad to hear it,im glad to hear it
1,cest mort,its dead,its
2,ne tarrête pas de grimper,keep climbing,keep climbing
3,je vous respecte,i respect you,ill will you
4,quest ce que tu as fait ensuite,what did you do then,what you
5,je vous donnerai un indice,ill give you a hint,ill give you a hint
6,sontelles déjà arrivées,have they arrived yet,have they just come
7,elle sait tout,she knows everything,she knows everything
8,jespère que cest vrai,i hope its true,i hope its true
9,ils mont dépouillée,they robbed me blind,they robbed me blind


[2024-07-16 20:00:45,228: INFO: 3121909552: None]
