In [1]:
import os

In [2]:
%pwd

'd:\\forgery detection model\\image_forgery_detection_model\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\forgery detection model\\image_forgery_detection_model'

In [5]:
import dagshub
dagshub.init(repo_owner='sharonjolly', repo_name='image_forgery_detection_model', mlflow=True)

# import mlflow
# with mlflow.start_run():
#   mlflow.log_param('parameter name', 'value')
#   mlflow.log_metric('metric name', 1)

In [19]:

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class EvaluationConfig:
    model_path: Path
    model: str
    load_data: Path
    mlflow_uri: str
    params: dict

In [20]:
import dagshub
dagshub.init(repo_owner='sharonjolly',repo_name='image_forgery_detection_model',mlflow=True)

[2025-08-11 10:28:54,638: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/repos/sharonjolly/image_forgery_detection_model "HTTP/1.1 200 OK"]


[2025-08-11 10:28:54,650: INFO: helpers: Initialized MLflow to track repo "sharonjolly/image_forgery_detection_model"]


[2025-08-11 10:28:54,655: INFO: helpers: Repository sharonjolly/image_forgery_detection_model initialized!]


In [21]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories, save_json

In [22]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
    
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        
    def get_model_evaluation_config(self) -> EvaluationConfig:
        config = self.config.model_evaluation
        params = self.params.trainer  # <-- fixed

        model_evaluation_config = EvaluationConfig(
            model_path=config.model_path,
            model=config.model,
            load_data=config.load_data,
            mlflow_uri=config.mlflow_uri,
            params=params
        )

        return model_evaluation_config


In [33]:

from cnnClassifier import logger
import joblib
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence
import mlflow
import mlflow.keras
from urllib.parse import urlparse
from pathlib import Path
from cnnClassifier.utils.common import save_json
import os
import tempfile
from mlflow.exceptions import RestException
from mlflow.tracking import MlflowClient

In [36]:
class ModelEvaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config
        self.model = None
        self.X_test = None
        self.y_test = None
        self.score = None

    def load_data(self):
        """Loads test data from joblib files specified in config."""
        logger.info(f"Fetching test dataset from: {self.config.load_data}")
        try:
            x_path = Path(self.config.load_data) / 'X_90.joblib'
            y_path = Path(self.config.load_data) / 'y.joblib'
            X = joblib.load(x_path)
            y = joblib.load(y_path)
            logger.info(f"Data successfully loaded — X: {X.shape}, y: {y.shape}")
            return X, y
        except Exception as e:
            logger.error(f"Unable to load dataset: {e}")
            raise

    def split_data(self, X, y):
        """Splits data into training and testing sets."""
        logger.info("Separating dataset into training and testing subsets")
        try:
            from sklearn.model_selection import train_test_split
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, random_state=42, stratify=y
            )
            logger.info(f"Test subset prepared — X_test: {X_test.shape}, y_test: {y_test.shape}")
            return X_test, y_test
        except Exception as e:
            logger.error(f"Failed during dataset splitting: {e}")
            raise

    def preprocess_data(self, X_test, y_test):
        """Reshapes test data for CNN input."""
        logger.info("Reshaping test data for model compatibility")
        try:
            X_test = X_test.reshape(X_test.shape[0], 128, 128, 3)
            y_test = y_test.reshape(y_test.shape[0], 2)
            logger.info(f"Data reshaped — X_test: {X_test.shape}, y_test: {y_test.shape}")
            self.X_test, self.y_test = X_test, y_test
        except Exception as e:
            logger.error(f"Error while reshaping data: {e}")
            raise

    @staticmethod
    def load_model(path: Path) -> tf.keras.Model:
        """Loads the trained model."""
        logger.info(f"Retrieving trained model from path: {path}")
        try:
            return tf.keras.models.load_model(path)
        except Exception as e:
            logger.error(f"Failed to load model: {e}")
            raise

    def get_test_generator(self):
        """Returns a Sequence generator for test data."""
        class TestGenerator(Sequence):
            def __init__(self, X, y, batch_size, **kwargs):
                super().__init__(**kwargs)
                self.X = X
                self.y = y
                self.batch_size = batch_size
                self.indexes = np.arange(len(self.X))

            def __len__(self):
                return int(np.floor(len(self.X) / self.batch_size))

            def __getitem__(self, index):
                indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
                X = [self.X[k] for k in indexes]
                y = [self.y[k] for k in indexes]
                return np.array(X), np.array(y)

        return TestGenerator(self.X_test, self.y_test, self.config.params['batch_size'])

    def evaluation(self):
        """Evaluates the model and saves scores."""
        logger.info("Starting model evaluation process")
        try:
            X, y = self.load_data()
            X_test, y_test = self.split_data(X, y)
            self.preprocess_data(X_test, y_test)

            model_path = Path(self.config.model_path) / self.config.model
            self.model = self.load_model(model_path)

            test_generator = self.get_test_generator()

            logger.info("Evaluating model performance on test dataset")
            self.score = self.model.evaluate(
                test_generator,
                batch_size=self.config.params['batch_size'],
                return_dict=True
            )
            logger.info(f"Evaluation completed — Scores: {self.score}")

            self.save_score()
        except Exception as e:
            logger.error(f"Evaluation failed: {e}")
            raise

    def save_score(self):
        """Saves evaluation scores to a JSON file."""
        logger.info("Saving evaluation results to JSON file")
        try:
            f1_score = self.score.get('f1_score', 0.0)
            if isinstance(f1_score, tf.Tensor):
                f1_score = np.mean(f1_score.numpy())
            elif isinstance(f1_score, np.ndarray):
                f1_score = np.mean(f1_score)

            scores = {
                "loss": float(self.score.get('loss', 0.0)),
                "accuracy": float(self.score.get('accuracy', 0.0)),
                "precision": float(self.score.get('precision', 0.0)),
                "recall": float(self.score.get('recall', 0.0)),
                "f1_score": float(f1_score)
            }
            save_json(path=Path("scores.json"), data=scores)
            logger.info("Results successfully saved to scores.json")
        except Exception as e:
            logger.error(f"Failed to save results: {e}")
            raise

    def log_into_mlflow(self):
        """Logs parameters, metrics, and model to MLflow."""
        logger.info("Initiating MLflow logging sequence")
        try:
            mlflow.set_tracking_uri(self.config.mlflow_uri)
            tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
            logger.info(f"MLflow tracking URI set to: {self.config.mlflow_uri}")

            with mlflow.start_run():
                mlflow.log_params(self.config.params)

                f1_score = self.score.get('f1_score', 0.0)
                if isinstance(f1_score, tf.Tensor):
                    f1_score = np.mean(f1_score.numpy())
                elif isinstance(f1_score, np.ndarray):
                    f1_score = np.mean(f1_score)

                mlflow.log_metrics({
                    "loss": float(self.score.get('loss', 0.0)),
                    "accuracy": float(self.score.get('accuracy', 0.0)),
                    "precision": float(self.score.get('precision', 0.0)),
                    "recall": float(self.score.get('recall', 0.0)),
                    "f1_score": float(f1_score)
                })

                with tempfile.TemporaryDirectory() as tmpdirname:
                    temp_model_path = os.path.join(tmpdirname, "model.keras")
                    logger.info(f"Saving model temporarily at: {temp_model_path}")
                    self.model.save(temp_model_path)
                    if not os.path.exists(temp_model_path):
                        raise FileNotFoundError(f"Model file not found at {temp_model_path}")
                    logger.info(f"Model saved — size: {os.path.getsize(temp_model_path)} bytes")

                    logger.info("Uploading model artifact to MLflow")
                    mlflow.log_artifact(temp_model_path, artifact_path="model")
                    logger.info("Model artifact uploaded successfully")

                if tracking_url_type_store != "file":
                    logger.info("Attempting model registration in MLflow registry")
                    client = MlflowClient()
                    run_id = mlflow.active_run().info.run_id

                    try:
                        client.get_registered_model("image_forgery_detection_model")
                    except RestException:
                        logger.info("Model not found in registry — creating new entry")
                        client.create_registered_model("image_forgery_detection_model")

                    try:
                        source = mlflow.get_artifact_uri("model")
                        logger.info(f"Model source URI: {source}")
                        result = client.create_model_version(
                            name="image_forgery_detection_model",
                            source=source,
                            run_id=run_id
                        )
                        logger.info(f"Model registered successfully — version {result.version}")
                    except RestException as e:
                        logger.error(f"MLflow registry error: {e.__class__.__name__} - {str(e)}")
                        if hasattr(e, "message"):
                            logger.error(f"Message: {e.message}")
                        if hasattr(e, "error_code"):
                            logger.error(f"Error code: {e.error_code}")
                        raise
                    except Exception as e:
                        logger.error(f"Unexpected model registry error: {e}")
                        raise

        except Exception as e:
            logger.error(f"MLflow logging failed: {e}")
            raise


In [37]:

try:
        config = ConfigurationManager()
        eval_config = config.get_model_evaluation_config()
        evaluation = ModelEvaluation(eval_config)
        evaluation.evaluation()
        evaluation.log_into_mlflow()
except Exception as e:
        logger.error(f"Pipeline failed: {e}")
        raise

[2025-08-11 14:03:14,197: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-08-11 14:03:14,206: INFO: common: yaml file: params.yaml loaded successfully]
[2025-08-11 14:03:14,211: INFO: 986613851: Starting model evaluation process]
[2025-08-11 14:03:14,212: INFO: 986613851: Fetching test dataset from: artifacts/data_preprocessing/pickle]


[2025-08-11 14:03:16,920: INFO: 986613851: Data successfully loaded — X: (9501, 49152), y: (9501, 2)]
[2025-08-11 14:03:16,922: INFO: 986613851: Separating dataset into training and testing subsets]
[2025-08-11 14:03:17,623: INFO: 986613851: Test subset prepared — X_test: (1901, 49152), y_test: (1901, 2)]
[2025-08-11 14:03:17,793: INFO: 986613851: Reshaping test data for model compatibility]
[2025-08-11 14:03:17,795: INFO: 986613851: Data reshaped — X_test: (1901, 128, 128, 3), y_test: (1901, 2)]
[2025-08-11 14:03:17,796: INFO: 986613851: Retrieving trained model from path: artifacts\model_trainer\model\model.keras]
[2025-08-11 14:03:18,229: INFO: 986613851: Evaluating model performance on test dataset]
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 51ms/step - accuracy: 0.9100 - f1_score: 0.8701 - loss: 0.2331 - precision: 0.9100 - recall: 0.9100
[2025-08-11 14:03:22,917: INFO: 986613851: Evaluation completed — Scores: {'accuracy': 0.9099576473236084, 'f1_score': <tf.

2025/08/11 14:04:05 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: image_forgery_detection_model, version 2


[2025-08-11 14:04:05,799: INFO: 986613851: Model registered successfully — version 2]
