In [1]:
import os
os.chdir('../')
!pwd

/home/lok/Documents/ML_Projects/rice_leaf_disease


In [2]:
from pathlib import Path
from dataclasses import dataclass

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    training_data: Path
    params_epochs: int
    params_input_shape: list
    params_augmentation: bool
    params_optimizer: str
    params_loss: str
    params_metrics: list
    
@dataclass(frozen=True)
class PrepareCallbackConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    checkpoint_model_filepath: Path
    

In [3]:
from rice_leaf_disease.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from rice_leaf_disease.utils.common import create_directories, read_yaml


class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])
        
    def get_prepare_callback_config(self) -> PrepareCallbackConfig:
        config = self.config.prepare_callbacks
        model_checkpoint_dir = os.path.dirname(config.checkpoint_model_filepath)
        
        create_directories([Path(model_checkpoint_dir),
                            Path(config.tensorboard_root_log_dir)])
        
        prepare_callback_config = PrepareCallbackConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)    
        )
        
        return prepare_callback_config
    
    
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_model = self.config.prepare_model
        params = self.params
        
        training_data = os.path.join(self.config.data_ingestion.unzip_dir, 'rice_leaf_diseases')
        
        create_directories([Path(training.root_dir)])
        
        training_config = TrainingConfig(
            root_dir = Path(training.root_dir),
            trained_model_path = Path(training.trained_model_path),
            training_data = Path(training_data),
            params_epochs = params.model_params.epochs,
            params_input_shape = params.model_params.input_shape,
            params_augmentation = params.model_params.augmentation,
            params_optimizer=params.model_params.optimizer,
            params_loss=params.model_params.loss_function,
            params_metrics=params.model_params.metrics
               
        )
        
        return training_config
        

In [4]:
import time
import tensorflow as tf

class PrepareCallback:
    def __init__(self, config: PrepareCallbackConfig):
        self.config = config
        
    
    """
    The @property decorator is used to define a method as a getter property.
    It allows to access the method as if it were an attribute, 
    without explicitly calling it like a function.    
    """    
    @property
    def _create_tb_callbacks(self):
        """
        Logs training metrics for visualization in TensorBoard.
        """
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        
        tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f"tb_logs_at_{timestamp}"
        )
        
        return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)
    
    @property
    def _create_checkpoint_callbacks(self):
        """
        Saves model checkpoints during training, ensuring best performing
        model is retained.
        """
        return tf.keras.callbacks.ModelCheckpoint(
            filepath=self.config.checkpoint_model_filepath,
            save_best_only=True
        )
    
    def get_tb_checkpoint_callback(self):
        return [
            self._create_tb_callbacks,
            self._create_checkpoint_callbacks
        ]

2024-12-28 00:39:13.756276: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[2024-12-28 00:39:19,274: INFO : utils : NumExpr defaulting to 8 threads.]


In [10]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, Sequential

import numpy as np
import cv2
from PIL import Image

In [11]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
        self.model = None  # Model will be loaded in the get_model method

    def get_model(self):
        """
        Load the model structure from the specified path and compile it.
        """
        model_path = Path('artifacts/prepare_model/model.keras')
        if not model_path.exists():
            raise FileNotFoundError(f"Model file not found at {model_path}")
        
        self.model = tf.keras.models.load_model(model_path)
        
        # Compile the model only if it's not compiled already
        if not self.model.optimizer:
            self.model.compile(
                optimizer=self.config.params_optimizer,
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=self.config.params_metrics
            )   
    
    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)
    
    def get_and_prepare_data(self):
        data_path = Path('artifacts/data_ingestion/rice_leaf_diseases')
        bacteria = list(data_path.glob("Bacterial leaf blight/*"))
        brown = list(data_path.glob("Brown spot/*"))
        smut = list(data_path.glob("Leaf smut/*"))
        
        print(f"Bacteria samples: {len(bacteria)}")
        print(f"Brown spot samples: {len(brown)}")
        print(f"Smut samples: {len(smut)}")

        data = {"bacteria": bacteria, "brown": brown, "smut": smut}
        labels_dict = {
            'bacteria': 0,
            'brown': 1,
            'smut': 2
        }
        X, y = [], []

        for class_name, images in data.items():
            for image in images:
                img = cv2.imread(str(image))
                if img is None:
                    print(f"Failed to load image: {image}")
                    continue
                resized_img = cv2.resize(img, (180, 180))
                X.append(resized_img)
                y.append(labels_dict[class_name])
        
        print(f"Loaded {len(X)} images.")
        
        X = np.array(X)
        y = np.array(y)
        
        # Ensure that the arrays are not empty
        if X.size == 0 or y.size == 0:
            raise ValueError("No data found to train the model.")

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
        X_train_scaled = X_train / 255.0
        X_test_scaled = X_test / 255.0
        return X_train_scaled, X_test_scaled, y_train, y_test

        
    def train(self, callback_list: list):
        """
        Train the model using the specified callbacks and configuration.
        """
        # Ensure the model is loaded before training
        self.model = Sequential([
        layers.Conv2D(16, 3, padding='same', activation='relu', input_shape=(180, 180, 3)),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),  # Dropout after first Conv2D layer

        layers.Conv2D(32, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),  # Dropout after second Conv2D layer

        layers.Conv2D(64, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),

        layers.Flatten(),
        layers.Dense(128, activation='relu'),

        layers.Dense(3, activation='softmax')
     ])

        
        # Compile the model if it hasn't been compiled yet
        self.model.compile(
            optimizer='adam', 
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            metrics=['accuracy']
        )
        
        # Get the data
        X_train_scaled, X_test_scaled, y_train, y_test = self.get_and_prepare_data()

        # Train the model using fit
        self.model.fit(
            X_train_scaled, y_train,
            validation_data=(X_test_scaled, y_test),
            epochs=self.config.params_epochs,
            callbacks=callback_list
        )

        # Save the trained model
        self.save_model(self.config.trained_model_path, self.model)

In [12]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_checkpoint_callback()

    training_config = config.get_training_config()
    training = Training(config=training_config)
    
    training.train(callback_list)

except Exception as e:
    print(f"Error during training: {e}")
    raise e

[2024-12-28 00:44:33,146: INFO : common : YAML file: config/config.yaml loaded successfully!]
[2024-12-28 00:44:33,159: INFO : common : YAML file: params.yaml loaded successfully!]
[2024-12-28 00:44:33,162: INFO : common : created directory at: artifacts]
[2024-12-28 00:44:33,165: INFO : common : created directory at: artifacts/prepare_callbacks/checkpoint_dir]
[2024-12-28 00:44:33,167: INFO : common : created directory at: artifacts/prepare_callbacks/tensorboard_log_dir]
[2024-12-28 00:44:33,169: INFO : common : created directory at: artifacts/training]
Bacteria samples: 40
Brown spot samples: 40
Smut samples: 40


  super().__init__(


Loaded 120 images.
Epoch 1/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step - accuracy: 0.4015 - loss: 4.0206 - val_accuracy: 0.3667 - val_loss: 1.1342
Epoch 2/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 0.3330 - loss: 2.0592 - val_accuracy: 0.3667 - val_loss: 1.0915
Epoch 3/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 0.3608 - loss: 1.1083 - val_accuracy: 0.2667 - val_loss: 1.1018
Epoch 4/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 0.3575 - loss: 1.1320 - val_accuracy: 0.2667 - val_loss: 1.1007
Epoch 5/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.3731 - loss: 1.1022 - val_accuracy: 0.2667 - val_loss: 1.0992
Epoch 6/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2s/step - accuracy: 0.3653 - loss: 1.0965 - val_accuracy: 0.2667 - val_loss: 1.0993
Epoch 7/30
[1m3/3[0m [32m━━━━━━━━━━━