In [1]:
import os
os.chdir("../")

In [2]:
os.environ["MLFLOW_TRACKING_URI"] = "https://dagshub.com/princevkurien/Bird_species_classfication.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"] = "princevkurien"
os.environ["MLFLOW_TRACKING_PASSWORD"]="7e5f50bcbd2a6865427b3e9be952d81eb8ec9ac5"

In [4]:
import tensorflow as tf 

In [6]:
model = tf.keras.models.load_model("artifacts/training/model.h5")

In [7]:

from pydantic import BaseModel , FilePath , FileUrl , DirectoryPath , AnyUrl
from pydantic.dataclasses import dataclass
from pathlib import Path

class EvaluationConfig(BaseModel):
    path_of_model: FilePath
    evaluation_model_dir: DirectoryPath
    test_data: DirectoryPath
    best_model_path : Path
    all_params: dict
    mlflow_uri: str
    score_dir : DirectoryPath
    params_image_size: list
    params_batch_size: int

In [None]:
#   root_dir:  artifacts/evaluation
#   path_of_model: artifacts/training/model.h5
#   evaluation_model_dir : artifacts/evaluation/model_dir 
#   test_data: artifacts/data_ingestion/test
#   best_model_path : artifacts/evaluation/model_dir/best_model.h5
#   mlflow_uri: "https://dagshub.com/princevkurien/Bird_species_classfication.mlflow"

In [8]:
from BirdClassifier.constants import *
from BirdClassifier.utils import create_directories, read_yaml

In [9]:
class ConfigurationManager:
    def __init__(
        self, config_file_path=CONFIG_FILE_PATH, param_file_path=PARAMS_FILE_PATH
    ) -> None:
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(param_file_path)
        create_directories([self.config.artifacts_root])
    def get_validation_config(self) -> EvaluationConfig:
        
        eval =self.config.evaluation
        params = self.params
        create_directories([eval.evaluation_model_dir, eval.score_dir])
        eval_config = EvaluationConfig( path_of_model = eval.path_of_model,
                                       test_data = eval.test_data , 
                                       score_dir=eval.score_dir ,
                                       evaluation_model_dir = eval.evaluation_model_dir ,
                                       best_model_path = eval.best_model_path , 
                                       mlflow_uri=eval.mlflow_uri,
                                       all_params=params,
                                       params_image_size=params.IMAGE_SIZE,
                                       params_batch_size=self.params.BATCH_SIZE)
        return eval_config
        

In [10]:
from BirdClassifier.utils import save_json , s3_download_model , upload_file , get_best_model_s3 , load_json
import tensorflow as tf
from pathlib import Path
import mlflow
import mlflow.keras
from urllib.parse import urlparse
import time
import shutil

In [13]:


class Evaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config
        self.timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")

    def _valid_generator(self):

        datagenerator_kwargs = dict(
            rescale = 1./255,
        )

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],
            batch_size=self.config.params_batch_size,
            interpolation="bilinear"
        )

        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.test_data,
            **dataflow_kwargs
        )


    @staticmethod
    def load_model(path: Path) -> tf.keras.Model:
        return tf.keras.models.load_model(path)
    @property
    def _get_trained_model_path(self):
        trained_model_name = f'model_{self.timestamp}.h5'
        trained_model_path = os.path.join(self.config.evaluation_model_dir , trained_model_name)
        shutil.copy(src= self.config.path_of_model , dst=trained_model_path)
        return trained_model_path
    


    def evaluation(self):
        self.train_model_path = self._get_trained_model_path
        self.best_model_path = get_best_model_s3(self.config.best_model_path)
        self.best_model = None
        if self.best_model_path is not None:
            self.best_model = self.load_model(self.best_model_path)
        self.model = self.load_model(self.train_model_path)
        self._valid_generator()
        self.log_into_mlflow()
        
    def _evaluate_model(self , model):
        scores = model.evaluate(self.valid_generator)
        response_ = {"loss":scores[0], "accuracy": scores[1]}
        return response_
    def update_best_model(self ):
        s3_model_result = self.result.s3_best_model_score
        local_model_result = self.result.local_model_score
        if (s3_model_result is None) :
            pass 
        elif local_model_result.accuracy > s3_model_result.accuracy:
            self.best_model = self.model
            upload_file(file_name=self.train_model_path , object_name= "Best_model")
            upload_file(file_name=self.best_model_path , object_name= f"{self.timestamp}_model")
        else:
            upload_file(file_name=self.train_model_path , object_name= f"{self.timestamp}_model")
            
        

    def log_into_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        with mlflow.start_run():
            mlflow.log_params(self.config.all_params)
            self.scores_ = dict()
            self.scores_["local_model_score"] = self._evaluate_model(model=self.model)
            self.scores_["s3_best_model_score"] = None
            if self.best_model is not None : 
                self.scores_["s3_best_model_score"] = self._evaluate_model(model= self.best_model)
            score_file_name= f'{self.timestamp}_score.json'
            score_file_path = os.path.join(self.config.score_dir ,score_file_name)
            save_json(path=Path(score_file_path) , data=self.scores_)
            self.result = load_json(path=Path(score_file_path))
            self.update_best_model()
            local_model_result = self.result.local_model_score
                
            
            mlflow.log_metrics(
                {"loss": local_model_result.loss, "accuracy": local_model_result.accuracy}
            )
            # Model registry does not work with file store
            if tracking_url_type_store != "file":

                # Register the model
                # There are other ways to use the Model Registry, which depends on the use case,
                # please refer to the doc for more information:
                # https://mlflow.org/docs/latest/model-registry.html#api-workflow
                mlflow.keras.log_model(self.model, "model", registered_model_name="VGG16Model")
            else:
                mlflow.keras.log_model(self.model, "model")


In [14]:
try:
    config = ConfigurationManager()
    val_config = config.get_validation_config()
    evaluation = Evaluation(val_config)
    evaluation.evaluation()
    
except Exception as e:
   raise e

2022-09-30 15:40:00.652 | INFO     | BirdClassifier.utils.common:read_yaml:30 - yaml file: configs/config.yaml loaded successfully
2022-09-30 15:40:00.653 | INFO     | BirdClassifier.utils.common:read_yaml:30 - yaml file: params.yaml loaded successfully
2022-09-30 15:40:00.654 | INFO     | BirdClassifier.utils.common:create_directories:49 - created directory at: artifacts
2022-09-30 15:40:00.655 | INFO     | BirdClassifier.utils.common:create_directories:49 - created directory at: artifacts/evaluation/model_dir
2022-09-30 15:40:00.655 | INFO     | BirdClassifier.utils.common:create_directories:49 - created directory at: artifacts/evaluation/scores
ERROR:root:list index out of range


Found 2000 images belonging to 400 classes.


2022-09-30 15:40:12.462 | INFO     | BirdClassifier.utils.common:save_json:63 - json file saved at: artifacts/evaluation/scores/2022-09-30-15-40-00_score.json
2022-09-30 15:40:12.463 | INFO     | BirdClassifier.utils.common:load_json:79 - json file loaded succesfully from: artifacts/evaluation/scores/2022-09-30-15-40-00_score.json


INFO:tensorflow:Assets written to: /tmp/tmpnbxi9x3d/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmpnbxi9x3d/model/data/model/assets
Successfully registered model ''.
2022/09/30 15:41:54 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: VGG16Model, version 2
Created version '2' of model 'VGG16Model'.
