In [1]:
import os

In [2]:
%pwd

'/mnt/e/bgRemoveApp/bg_removal/research'

In [3]:
os.chdir("../")

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir:Path
    trained_model_path:Path
    base_model_path:Path
    training_data:Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list
        

@dataclass(frozen=True)
class PrepareCallBackConfig:
    root_dir:Path
    tensorboard_root_log_dir:Path
    checkpoint_model_filepath:Path
    csv_filePath:Path

In [5]:
from bgremove.constants import *
from bgremove.utils.common import read_yaml, create_directories
import tensorflow as tf

2023-08-30 10:48:33.791223: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[2023-08-30 10:48:35,256: INFO: utils: Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.]
[2023-08-30 10:48:35,257: INFO: utils: NumExpr defaulting to 8 threads.]


In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filePath = CONFIG_FILE_PATH,
        params_filePath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filePath)
        self.params = read_yaml(params_filePath)
        
    def get_call_backs_config(self)->PrepareCallBackConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallBackConfig(
            root_dir = Path(config.root_dir),
            tensorboard_root_log_dir = Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath = Path(config.checkpoint_model_filepath),
            csv_filePath = Path(config.csv_path)
        )

        return prepare_callback_config
    
    def get_training_config(self)->TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = self.config.data_ingestion.unzip_dir
        create_directories([
            Path(training.root_dir)
        ])
        
        training_config = TrainingConfig(
            root_dir = Path(training.root_dir),
            trained_model_path = Path(training.trained_model_path),
            base_model_path =Path(prepare_base_model.base_model_path),
            training_data = Path(training_data),
            params_epochs = params.EPOCHS,
            params_batch_size = params.BATCH_SIZE,
            params_is_augmentation = params.AUGMENTATION,
            params_image_size = params.IMAGE_SIZE
        )
        
        return training_config
        

In [7]:
import time

In [8]:
params = read_yaml(PARAMS_FILE_PATH)

[2023-08-30 10:48:35,616: INFO: common: yaml file: params.yaml loaded successfully]


In [9]:

class PrepareCallBacks:
    def __init__(self, config:PrepareCallBackConfig):
        self.config= config
    
    @property
    def _create_tb_callbacks(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f"tb_logs_at_{timestamp}"
        )
        return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)
    
    @property
    def _create_ckpt_callbacks(self):
            return tf.keras.callbacks.ModelCheckpoint(
                filepath=str(self.config.checkpoint_model_filepath),
                verbose=params.VERBOSE,
                save_best_only=params.SAVE_BEST_ONLY)
    
    @property
    def _reduce_on_paleatue(self):
            return tf.keras.callbacks.ReduceLROnPlateau(
                monitor=params.MONITOR,
                factor=params.FACTOR, 
                patience=params.PATIENCE_REDUCE_LEARNING, 
                min_lr=params.MIN_LR, 
                verbose=params.VERBOSE)
            
    @property
    def _csv_logger(self):
            return tf.keras.callbacks.CSVLogger(self.config.csv_filePath)
    
    @property 
    def _early_stopping(self):
            return tf.keras.callbacks.EarlyStopping(monitor=params.MONITOR, patience=params.PATIENCE_EARLY_STOPPING, restore_best_weights=params.RESTORE_BEST_WEIGHTS)
            
    def get_callbacks(self):
        return [
            self._create_tb_callbacks, 
            self._create_ckpt_callbacks,
            self._reduce_on_paleatue,
            self._csv_logger,
            self._early_stopping
        ]

In [10]:
import tensorflow as tf
import cv2
from glob import glob
import numpy as np

In [11]:
H = params.HEIGHT
W = params.WIDTH
print(H,W)

512 512


In [12]:
class Training:
    def __init__(self, config:TrainingConfig):
        self.config = config
    
    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.base_model_path
        )
    
    def load_data(self, path):
        train_x = sorted(glob(os.path.join(path, "train", "blurred_image", "*.jpg")))
        train_y = sorted(glob(os.path.join(path, "train", "mask", "*.png")))

        valid_x = sorted(glob(os.path.join(path, "validation", "P3M-500-NP", "original_image", "*.jpg")))
        valid_y = sorted(glob(os.path.join(path, "validation", "P3M-500-NP", "mask", "*.png")))
        
        print(f"traiing list {len(train_x)}, and trainy {len(train_y)}, validation is {len(valid_x)}& {len(valid_y)}")

        return (train_x, train_y), (valid_x, valid_y)
    

    def read_image(self, path):
        path = path.decode()
        x = cv2.imread(path, cv2.IMREAD_COLOR)
        x = cv2.resize(x, (W, H))
        x = x / 255.0
        x = x.astype(np.float32)
        return x

    def read_mask(self, path):
        path = path.decode()
        x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        x = cv2.resize(x, (W, H))
        x = x / 255.0
        x = x.astype(np.float32)
        x = np.expand_dims(x, axis=-1)
        return x

    def tf_parse(self, x, y):
        def _parse(x, y):
            x = self.read_image(x)
            y = self.read_mask(y)
            return x, y

        x, y = tf.numpy_function(_parse, [x, y], [tf.float32, tf.float32])
        x.set_shape([H, W, 3])
        y.set_shape([H, W, 1])
        return x, y

    def tf_dataset(self, X, Y, batch):
        ds = tf.data.Dataset.from_tensor_slices((X, Y))
        ds = ds.map(self.tf_parse).batch(batch).prefetch(10)
        print(f"the whole df {ds}")
        return ds

    def loading_data_set(self, path, batch_size):
        (train_x, train_y), (valid_x, valid_y) = self.load_data(path)

        print(f"Train: {len(train_x)} - {len(train_y)}")
        print(f"Valid: {len(valid_x)} - {len(valid_y)}")
        
        print(batch_size)

        train_dataset = self.tf_dataset(train_x, train_y, batch=batch_size)
        valid_dataset = self.tf_dataset(valid_x, valid_y, batch=batch_size)
        
#         print('++++++++++++++++++++++ Train Ds +++++++++++++++++++++')
#         print(train_dataset)
#         print('++++++++++++++++++++++ valid Ds +++++++++++++++++++++')
#         print(valid_dataset)
        return (train_dataset, valid_dataset)

        
        
    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)
    
    def train(self, trained_ds, valid_data, callback_list):
        self.model.fit(
            trained_ds,
            epochs=params.EPOCHS,
            validation_data= valid_data,
            callbacks= callback_list
        )
        
        self.save_model(
            path = self.config.trained_model_path,
            model= self.model
        )

        

In [14]:
path_config = read_yaml(CONFIG_FILE_PATH)
print(path_config.data_ingestion.unzip_dir)

[2023-08-30 10:48:58,605: INFO: common: yaml file: config/config.yaml loaded successfully]
artifacts/data_ingestion/data


In [None]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_call_backs_config()
    prepare_callbacks = PrepareCallBacks(config= prepare_callbacks_config)
    callback_list = prepare_callbacks.get_callbacks()
    
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    train_ds, valid_ds = training.loading_data_set(path_config.data_ingestion.unzip_dir, params.BATCH_SIZE)
    print(train_ds)
    training.train(train_ds, valid_ds, callback_list)
    
except Exception as e:
    raise e

[2023-08-30 10:49:06,164: INFO: common: yaml file: config/config.yaml loaded successfully]
[2023-08-30 10:49:06,171: INFO: common: yaml file: params.yaml loaded successfully]
[2023-08-30 10:49:06,178: INFO: common: created directory at: artifacts/prepare_callbacks/checkpoint_dir]
[2023-08-30 10:49:06,183: INFO: common: created directory at: artifacts/prepare_callbacks/tensorboard_log_dir]
[2023-08-30 10:49:06,188: INFO: common: created directory at: artifacts/training]
traiing list 9421, and trainy 9421, validation is 500& 500
Train: 9421 - 9421
Valid: 500 - 500
1
the whole df <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 512, 512, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 512, 512, 1), dtype=tf.float32, name=None))>
the whole df <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 512, 512, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 512, 512, 1), dtype=tf.float32, name=None))>
<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 512, 512,

2023-08-30 10:49:11.691889: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [9421]
	 [[{{node Placeholder/_1}}]]


In [None]:
%pwd

In [None]:
%pwd
print(path_config.data_ingestion.unzip_dir)

In [None]:
print(params.BATCH_SIZE)

In [None]:
train_x = sorted(glob(os.path.join(str(path_config.data_ingestion.unzip_dir), "train", "blurred_image", "*.jpg")))
print(len(train_x))
print(glob(os.path.join(str(path_config.data_ingestion.unzip_dir), "train", "blurred_image", "*.jpg")))