In [1]:
import os

In [2]:
%pwd

'c:\\Users\\admin\\Desktop\\JI\\Project\\capstone\\milk_adulteration_detection\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\Users\\admin\\Desktop\\JI\\Project\\capstone\\milk_adulteration_detection'

In [18]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    params_threshold: float
    


@dataclass(frozen=True)
class PrepareCallbacksConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    checkpoint_model_filepath: Path

In [6]:
from detection.constants import *
from detection.utils.common import read_yaml, create_directories

In [22]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])


    
    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
        )

        return prepare_callback_config
    




    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = os.path.join(self.config.data_ingestion.root_dir, "glucose")
        create_directories([
            Path(training.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            training_data=Path(training_data),
            params_threshold=params.THRESHOLD
            
        )

        return training_config

In [11]:
import time
from tensorboardX import SummaryWriter
import shutil
import joblib
from sklearn.metrics import accuracy_score

In [12]:
class PrepareCallback:
    def __init__(self, config: PrepareCallbacksConfig):
        self.config = config

    @property
    def _create_tb_callbacks(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        tb_running_log_dir = os.path.join(self.config.tensorboard_root_log_dir, f"tb_logs_at_{timestamp}")
        
        # Remove the directory if it already exists
        if os.path.exists(tb_running_log_dir):
            shutil.rmtree(tb_running_log_dir)

        # Create a SummaryWriter for TensorBoard logging
        self.writer = SummaryWriter(log_dir=tb_running_log_dir)

        def tb_callback(env):
            for i in range(len(env.models)):
                self.writer.add_scalar(f'error_{i}', env.evaluation_result_list[i][1], env.iteration)
                self.writer.add_scalar(f'logloss_{i}', env.evaluation_result_list[i][2], env.iteration)

        return tb_callback

    @property
    def _create_ckpt_callbacks(self):
        checkpoint_path = self.config.checkpoint_model_filepath
        
        def ckpt_callback(env):
            joblib.dump(env.model, checkpoint_path)

        return ckpt_callback

    def get_tb_ckpt_callbacks(self):
        return [
            self._create_tb_callbacks,
            self._create_ckpt_callbacks
        ]

In [40]:
import os
import urllib.request as request
import xgboost as xgb
import time
import pandas as pd
import cv2
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [43]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
    
    def get_base_model(self):
        self.model = joblib.load(
            self.config.updated_base_model_path
        )
    
    #function to load all images as data

    def load_data(self, file_path, threshold  ):

        file_path = self.config.training_data
        threshold = self.config.params_threshold

        folders = ['0', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '1', '1.5', '2', '2.5','3', '4', '5', '6', '7', '8']

        #Define dataframe in which the image is saved

        # Define column names
        columns = ['Red Channel', 'Green Channel', 'Blue Channel', 'Hue', 'Saturation', 'Value', 'Lightness', 'channel a',  'channel b', 'Target']

        # Create an empty DataFrame with columns
        df = pd.DataFrame(columns=columns)

        # Iterate through folders
        for folder in folders:
            folder_path = str(file_path) + folder

            # Iterate through files in the current folder
            for filename in os.listdir(folder_path):
                # Check if the file has an image extension
                if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
                    # Construct the full path to the image
                    image_path = os.path.join(folder_path, filename)

                    # Read the image (using OpenCV)
                    img = cv2.imread(image_path)

                    # Split the image into channels
                    r, g, b = cv2.split(img)
                    # Calculate the average of each channel
                    average_r = r.mean()
                    average_g = g.mean()
                    average_b = b.mean()


                    # Convert RGB image to HSV
                    hsv_img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)

                    # Split the HSV image into channels
                    h, s, v = cv2.split(hsv_img)
                    # Calculate the average of each channel
                    average_h = h.mean()
                    average_s = s.mean()
                    average_v = v.mean()

                    # Convert RGB image to LAB
                    img_lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)

                    # Split LAB image into components
                    L, a, b = cv2.split(img_lab)
                    # Calculate the average of each channel
                    average_L = L.mean()
                    average_a = a.mean()
                    average_b = b.mean()

                    if float(folder) < threshold:
                        # Append rows one by one
                        new_row = {'Red Channel': average_r, 'Green Channel': average_g, 'Blue Channel': average_b, 'Hue': average_h, 'Saturation': average_s, 'Value': average_v, 'Lightness': average_L, 'channel a': average_a,  'channel b': average_b, 'Target': 0}
                        df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
                    else:
                        new_row = {'Red Channel': average_r, 'Green Channel': average_g, 'Blue Channel': average_b, 'Hue': average_h, 'Saturation': average_s, 'Value': average_v, 'Lightness': average_L, 'channel a': average_a,  'channel b': average_b, 'Target': 1}
                        df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

        df['Target'] = df['Target'].astype(int)
        return df

    
    @staticmethod
    def save_model(self, path: Path, model: xgb):
        joblib.dump( model, path)

    def train(self, df ):

        df_shuffled = df.sample(frac=1, random_state=42)

        # 'Target' column is the target variable
        y = df_shuffled['Target']  # Target variable
        X = df_shuffled.drop(columns=['Target'])  # Features

        # Split the shuffled data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        self.model.fit(X_train, y_train)
        # Predict on the test data
        y_pred = self.model.predict(X_test)

        # Calculate accuracy
        accuracy = accuracy_score(y_test, y_pred)

        print('Accuracy is ', accuracy)




        self.save_model( self,
            path=self.config.trained_model_path,
            model=self.model
        )



In [44]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    trainingConfig = config.get_training_config()

    
    file_path = trainingConfig.training_data
    threshold = trainingConfig.params_threshold

    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    df = training.load_data(file_path , threshold)
    training.train(df)
    
except Exception as e:
    raise e

[2024-06-09 02:42:35,469: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-06-09 02:42:35,485: INFO: common: yaml file: params.yaml loaded successfully]
[2024-06-09 02:42:35,502: INFO: common: created directory at: artifacts]
[2024-06-09 02:42:35,507: INFO: common: created directory at: artifacts\prepare_callbacks\checkpoint_dir]
[2024-06-09 02:42:35,511: INFO: common: created directory at: artifacts\prepare_callbacks\xgbboard_log_dir]
[2024-06-09 02:42:35,514: INFO: common: created directory at: artifacts\training]


[2024-06-09 02:42:35,517: INFO: common: created directory at: artifacts\training]


  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)


Accuracy is  0.8797814207650273
