In [1]:

import os

In [2]:

%pwd

'd:\\forgery detection model\\image_forgery_detection_model\\research'

In [3]:

os.chdir("../")

In [4]:

%pwd

'd:\\forgery detection model\\image_forgery_detection_model'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    load_data: Path
    save_model: Path
    params: dict


In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    load_data: Path
    save_model: Path
    params: dict


In [7]:

from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml,create_directories

In [8]:

class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH):
    
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        
        config = self.config.model_trainer
        params = self.params.trainer

        create_directories([config.root_dir])
        model_trainer_config = ModelTrainerConfig(
            root_dir=Path(config.root_dir),
            load_data=Path(config.load_data),
            save_model=Path(config.save_model),
            params=params
        )

        return model_trainer_config

In [9]:

import joblib
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import Sequence
from tensorflow.keras.metrics import Precision, Recall
from keras.optimizers import Adam
from cnnClassifier import logger
import gc

In [10]:
class ModelTrainer(Sequence):
    def __init__(self, config: ModelTrainerConfig, **kwargs):
        """
        Constructor: Sets up ModelTrainer with given configuration settings.
        """
        super().__init__(**kwargs)
        logger.info("ModelTrainer instance created")
        self.config = config
        self.params = config.params
        self.model = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.train_indexes = None
        self.val_indexes = None

    def __len__(self):
        """
        Calculates total training batches per epoch.
        """
        return int(np.floor(len(self.X_train) / self.params['batch_size']))

    def __getitem__(self, index):
        """
        Fetches a batch of training data.
        """
        indexes = self.train_indexes[index * self.params['batch_size']:(index + 1) * self.params['batch_size']]
        X = [self.X_train[k] for k in indexes]
        y = [self.y_train[k] for k in indexes]
        return np.array(X), np.array(y)

    def on_epoch_end(self):
        """
        Reshuffles the training set indices after each epoch.
        """
        self.train_indexes = np.arange(len(self.X_train))
        np.random.shuffle(self.train_indexes)

    def get_validation_data(self):
        """
        Returns a generator that yields validation data in batches.
        """
        class ValidationGenerator(Sequence):
            def __init__(self, X, y, batch_size, **kwargs):
                super().__init__(**kwargs)
                self.X = X
                self.y = y
                self.batch_size = batch_size
                self.indexes = np.arange(len(self.X))

            def __len__(self):
                return int(np.floor(len(self.X) / self.batch_size))

            def __getitem__(self, index):
                indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
                X = [self.X[k] for k in indexes]
                y = [self.y[k] for k in indexes]
                return np.array(X), np.array(y)

        return ValidationGenerator(self.X_test, self.y_test, self.params['batch_size'])

    def load_data(self):
        """
        Loads dataset arrays from serialized joblib files.
        """
        logger.info(f"Attempting to load dataset from {self.config.load_data}")
        try:
            x_path = self.config.load_data / 'X_90.joblib'
            y_path = self.config.load_data / 'y.joblib'
            X = joblib.load(x_path)
            y = joblib.load(y_path)
            logger.info(f"Dataset loaded successfully: X={X.shape}, y={y.shape}")
            return X, y
        except Exception as e:
            logger.error(f"Failed to load dataset: {e}")
            raise

    def split_data(self, X, y):
        """
        Divides the dataset into training and testing subsets.
        """
        logger.info("Partitioning dataset into training and testing sets")
        try:
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, random_state=42, stratify=y
            )
            logger.info(f"Train: X={X_train.shape}, y={y_train.shape} | Test: X={X_test.shape}, y={y_test.shape}")
            return X_train, X_test, y_train, y_test
        except Exception as e:
            logger.error(f"Data split failed: {e}")
            raise

    def preprocess_data(self, X_train, X_test, y_train, y_test):
        """
        Prepares data shapes for CNN model and sets training indices.
        """
        logger.info("Starting preprocessing of dataset")
        try:
            X_train = X_train.reshape(X_train.shape[0], 128, 128, 3)
            X_test = X_test.reshape(X_test.shape[0], 128, 128, 3)
            y_train = y_train.reshape(y_train.shape[0], 2)
            y_test = y_test.reshape(y_test.shape[0], 2)
            logger.info(f"Reshaped: X_train={X_train.shape}, X_test={X_test.shape}")
            logger.info(f"Reshaped: y_train={y_train.shape}, y_test={y_test.shape}")

            self.X_train, self.X_test = X_train, X_test
            self.y_train, self.y_test = y_train, y_test
            self.train_indexes = np.arange(len(self.X_train))
            np.random.shuffle(self.train_indexes)
            logger.info("Data preprocessing finished")
        except Exception as e:
            logger.error(f"Preprocessing error: {e}")
            raise

    def build_model(self):
        """
        Constructs the CNN architecture for training.
        """
        logger.info("Constructing CNN model architecture")
        try:
            model = tf.keras.Sequential([
                tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
                tf.keras.layers.MaxPooling2D((2, 2)),
                tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
                tf.keras.layers.MaxPooling2D((2, 2)),
                tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(64, activation='relu'),
                tf.keras.layers.Dense(2, activation='softmax')
            ])
            logger.info("CNN model successfully built")
            return model
        except Exception as e:
            logger.error(f"Model creation failed: {e}")
            raise

    def train(self):
        """
        Executes the model training process and saves the best weights.
        """
        logger.info("Initiating training sequence")
        try:
            self.model = self.build_model()
            optimizer = self.params['optimizer']
            metrics = [metric.lower() if metric == 'accuracy' else getattr(tf.keras.metrics, metric)() 
                       for metric in self.params['metrics']]
            self.model.compile(optimizer=optimizer, 
                              loss='categorical_crossentropy', 
                              metrics=metrics)
            self.model.summary()

            cal1 = tf.keras.callbacks.EarlyStopping(
                monitor='val_loss', 
                patience=self.params['patience'], 
                restore_best_weights=True
            )
            cal2 = tf.keras.callbacks.ModelCheckpoint(
                str(self.config.save_model / 'model.keras'), 
                monitor='val_loss', 
                save_best_only=True
            )
            
            history = self.model.fit(
                self,
                epochs=self.params['epochs'],
                validation_data=self.get_validation_data(),
                callbacks=[cal1, cal2],
                verbose=1
            )
            logger.info("Model training successfully completed")
            return history.history
        except Exception as e:
            logger.error(f"Training process failed: {e}")
            raise

    def run_training_pipeline(self):
        """
        Executes the complete training workflow from loading to training.
        """
        logger.info("Launching full training workflow")
        try:
            X, y = self.load_data()
            X_train, X_test, y_train, y_test = self.split_data(X, y)
            del X, y
            gc.collect()
            logger.info("Intermediate dataset cleared from memory")

            self.preprocess_data(X_train, X_test, y_train, y_test)
            history = self.train()
            logger.info("Training workflow executed successfully")
            return history
        except Exception as e:
            logger.error(f"Pipeline execution failed: {e}")
            raise


In [11]:
try:
    config_manager = ConfigurationManager()
    model_trainer_config = config_manager.get_model_trainer_config()
    trainer = ModelTrainer(config=model_trainer_config)
    history = trainer.run_training_pipeline()
except Exception as e:
        logger.error(f"Pipeline failed: {e}")
        raise

[2025-08-09 13:19:55,571: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-08-09 13:19:55,581: INFO: common: yaml file: params.yaml loaded successfully]
[2025-08-09 13:19:55,585: INFO: common: created directory at: artifacts]
[2025-08-09 13:19:55,585: INFO: common: created directory at: artifacts/model_trainer]
[2025-08-09 13:19:55,585: INFO: 1075094852: ModelTrainer instance created]
[2025-08-09 13:19:55,590: INFO: 1075094852: Launching full training workflow]
[2025-08-09 13:19:55,590: INFO: 1075094852: Attempting to load dataset from artifacts\data_preprocessing\pickle]
[2025-08-09 13:19:58,498: INFO: 1075094852: Dataset loaded successfully: X=(9501, 49152), y=(9501, 2)]
[2025-08-09 13:19:58,501: INFO: 1075094852: Partitioning dataset into training and testing sets]
[2025-08-09 13:19:59,570: INFO: 1075094852: Train: X=(7600, 49152), y=(7600, 2) | Test: X=(1901, 49152), y=(1901, 2)]
[2025-08-09 13:19:59,938: INFO: 1075094852: Intermediate dataset cleared from mem

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/15
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 179ms/step - accuracy: 0.8364 - f1_score: 0.7519 - loss: 0.3227 - precision: 0.8364 - recall: 0.8364 - val_accuracy: 0.8681 - val_f1_score: 0.8188 - val_loss: 0.3397 - val_precision: 0.8681 - val_recall: 0.8681
Epoch 2/15
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 176ms/step - accuracy: 0.8646 - f1_score: 0.8188 - loss: 0.2830 - precision: 0.8646 - recall: 0.8646 - val_accuracy: 0.8644 - val_f1_score: 0.8285 - val_loss: 0.2877 - val_precision: 0.8644 - val_recall: 0.8644
Epoch 3/15
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 175ms/step - accuracy: 0.8689 - f1_score: 0.8238 - loss: 0.2768 - precision: 0.8689 - recall: 0.8689 - val_accuracy: 0.8745 - val_f1_score: 0.8346 - val_loss: 0.2689 - val_precision: 0.8745 - val_recall: 0.8745
Epoch 4/15
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 174ms/step - accuracy: 0.8820 - f1_score: 0.8407 - loss