In [8]:
import os 
os.chdir("/home/om/Desktop/dl-project")

%pwd

'/home/om/Desktop/dl-project'

In [9]:
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    params_batch_size: int
    params_epochs: int
    params_image_size: list
    params_is_augmentation: bool
    updated_model_path: Path
    training_data_path: Path
    

In [10]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories
import tensorflow as tf

In [11]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_training_config(self) -> TrainingConfig:
        training = self.config.train_model
        prepare_base_model = self.config.prepare_base_model
        params = self.params.prepare_base_model
        training_data_path = os.path.join(self.config.data_ingestion.unzip_dir, "kidney-CT_Scan/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone")

        training_config = TrainingConfig(
            root_dir=Path(self.config.artifacts_root),
            trained_model_path=Path(training.model_path),
            params_batch_size=params.BATCH_SIZE,
            params_epochs=params.EPOCHS,
            params_image_size=params.IMAGE_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            updated_model_path=Path(self.config.prepare_base_model.updated_model_path),
            training_data_path=Path(training_data_path)
        )

        return training_config

In [12]:
import os
from zipfile import ZipFile
import urllib.request as request
import tensorflow as tf
from cnnClassifier import logger

In [13]:
import tensorflow as tf
import os
from dataclasses import dataclass

class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config

    def get_base_model(self):
        model = tf.keras.models.load_model(self.config.updated_model_path)
        return model

    def get_data_generators(self):
        # ✅ Folder-based loading using image_dataset_from_directory
        from tensorflow.keras.preprocessing import image_dataset_from_directory

        IMG_SIZE = self.config.params_image_size[:2]
        BATCH_SIZE = self.config.params_batch_size
        DATA_DIR = Path(self.config.training_data_path) # This should be the dataset folder path

        train_ds = image_dataset_from_directory(
            DATA_DIR,
            validation_split=0.2,
            subset="training",
            seed=42,
            image_size=IMG_SIZE,
            batch_size=BATCH_SIZE,
            label_mode="categorical"
        )

        val_ds = image_dataset_from_directory(
            DATA_DIR,
            validation_split=0.2,
            subset="validation",
            seed=42,
            image_size=IMG_SIZE,
            batch_size=BATCH_SIZE,
            label_mode="categorical"
        )

        # Optional: prefetch for performance
        AUTOTUNE = tf.data.AUTOTUNE
        train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
        val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)

        return train_ds, val_ds

    def save_model(self):
        self.model.save(self.config.train_model.model_path)
        logger.info("Training completed and model saved")

    def train(self):
        logger.info("Training started")

        # ❌ No need to load CSV or dataframe
        # ✅ Instead, load folder-based datasets
        train_ds, val_ds = self.get_data_generators()
        sample_batch = next(iter(train_ds))
        print("Train batch shape:", sample_batch[0].shape)

        # Build model
        self.model = self.get_base_model()

        # self.model.compile(optimizer='adam',
        #                    loss='categorical_crossentropy',
        #                    metrics=['accuracy'])

        # Train the model
        self.model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=self.config.params_epochs
        )

        # Save the trained model
        self.save_model()


In [14]:
try:
    config_manager = ConfigurationManager()
    training_config = config_manager.get_training_config()
    training = Training(training_config)
    training.train()
except Exception as e:
    logger.exception(f"Exception occurred during training: {e}")
    raise e

2025-07-07 10:49:49,282 - INFO - common - Created directory: artifacts
2025-07-07 10:49:49,286 - INFO - 217551913 - Training started
Found 12446 files belonging to 4 classes.
Using 9957 files for training.
Found 12446 files belonging to 4 classes.
Using 2489 files for validation.


2025-07-07 10:49:50.423264: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [9957]
	 [[{{node Placeholder/_0}}]]
2025-07-07 10:49:50.424143: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [9957]
	 [[{{node Placeholder/_4}}]]


Train batch shape: (32, 224, 224, 3)
Epoch 1/50


KeyboardInterrupt: 

In [None]:
# import pandas as pd
# df = pd.read_csv("/home/om/Desktop/dl-project/artifacts/data_ingestion/kidneyData.csv")

In [None]:
# df['target'].value_counts()

target
1    5077
0    3709
3    2283
2    1377
Name: count, dtype: int64

In [None]:
import pandas as pd
import os

df = pd.read_csv("artifacts/data_ingestion/kidneyData.csv")
df['path'] = df['path'].apply(lambda x: os.path.join('/home/om/Desktop/dl-project/artifacts/data_ingestion', x.lstrip('./')))
print(df['path'].head())
print(df['path'].apply(os.path.exists).value_counts())

0    /home/om/Desktop/dl-project/artifacts/data_ing...
1    /home/om/Desktop/dl-project/artifacts/data_ing...
2    /home/om/Desktop/dl-project/artifacts/data_ing...
3    /home/om/Desktop/dl-project/artifacts/data_ing...
4    /home/om/Desktop/dl-project/artifacts/data_ing...
Name: path, dtype: object
path
True    12446
Name: count, dtype: int64


In [None]:
df['Class'].value_counts()

Class
Normal    5077
Cyst      3708
Tumor     2283
Stone     1377
CYST         1
Name: count, dtype: int64