In [1]:
import os

In [2]:
%pwd

'/home/tejas/MLProject/Thyroid-Disease-Prediction/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/home/tejas/MLProject/Thyroid-Disease-Prediction'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    model_name: str
    parameters: dict
    target_column: str

In [6]:

from ThyroidProject.constants import *
from ThyroidProject.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        """
        This function returns the ModelTrainerConfig object that contains the configuration for the model training process.

        Args:
            None

        Returns:
            ModelTrainerConfig: The ModelTrainerConfig object containing the configuration for the model training process

        """
        config = self.config.model_trainer
        params = self.params.GradientBoostedTreesLearner
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            model_name=config.model_name,
            parameters=params,
            target_column=schema.name,
        )
        return model_trainer_config

In [8]:
import pandas as pd
import os
from ThyroidProject import logger
import tensorflow_decision_forests as tfdf

2024-01-29 15:47:15.782972: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-29 15:47:15.784676: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-29 15:47:15.811587: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-29 15:47:15.811644: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-29 15:47:15.812456: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [9]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):
        """
        trains the model using the training data and saves it to the artifacts directory
        """
        train_data = pd.read_csv(self.config.train_data_path)
        
        tf_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(
            train_data,label=self.config.target_column , max_num_classes=3)
        print(tf_dataset)

        model = tfdf.keras.GradientBoostedTreesModel(
            **self.config.parameters
        )
        model.fit(tf_dataset)
        inspector = model.make_inspector()
        logger.info(
            f"The results of the model building are: {inspector.training_logs()}")
        inspector.export_to_tensorboard(os.path.join(self.config.root_dir,"tensorboard_logs"))
        model.save(os.path.join(self.config.root_dir, self.config.model_name)) # saving the trained model for serving

In [10]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-01-29 15:47:17,575:INFO:common:yaml file: config/config.yaml loaded successfully]
[2024-01-29 15:47:17,577:INFO:common:yaml file: params.yaml loaded successfully]
[2024-01-29 15:47:17,579:INFO:common:yaml file: schema.yaml loaded successfully]
[2024-01-29 15:47:17,580:INFO:common:created directory at :artifacts]
[2024-01-29 15:47:17,580:INFO:common:created directory at :artifacts/model_trainer]


<_PrefetchDataset element_spec=({'age': TensorSpec(shape=(None,), dtype=tf.int64, name=None), 'sex': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'on_thyroxine': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'query_on_thyroxine': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'on_antihyroid_meds': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'sick': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'pregnant': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'thyroid_surgery': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'I131_treatment': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'query_hypothyroid': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'query_hyperthyroid': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'lithium': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'goitre': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'tumor': TensorSpec(shape=(None,), dtype=tf.string, name=N

2024-01-29 15:47:19.244084: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-29 15:47:19.244518: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Training dataset read in 0:00:02.399096. Found 5756 examples.
[2024-01-29 15:47:21,779:INFO:tf_logging:Training dataset read in 0:00:02.399096. Found 5756 examples.]
Training model...
[2024-01-29 15:47:21,779:INFO:tf_logging:Training model...]
Model trained in 0:00:00.712851
[2024-01-29 15:47:22,493:INFO:tf_logging:Model trained in 0:00:00.712851]
Compiling model...
[2024-01-29 15:47:22,494:INFO:tf_logging:Compiling model...]


[INFO 24-01-29 15:47:22.4845 IST kernel.cc:1233] Loading model from path /tmp/tmp5s066u49/model/ with prefix d0dcd81170ef4e9e
[INFO 24-01-29 15:47:22.4895 IST decision_forest.cc:660] Model loaded with 111 root(s), 4117 node(s), and 14 input feature(s).
[INFO 24-01-29 15:47:22.4896 IST abstract_model.cc:1344] Engine "GradientBoostedTreesGeneric" built
[INFO 24-01-29 15:47:22.4896 IST kernel.cc:1061] Use fast generic engine


Model compiled.
[2024-01-29 15:47:23,071:INFO:tf_logging:Model compiled.]
[2024-01-29 15:47:23,115:INFO:523508393:The results of the model building are: [TrainLog(num_trees=1, evaluation=Evaluation(num_examples=None, accuracy=0.9787610769271851, loss=0.8328334093093872, rmse=None, ndcg=None, aucs=None, auuc=None, qini=None)), TrainLog(num_trees=2, evaluation=Evaluation(num_examples=None, accuracy=0.9787610769271851, loss=0.6510120034217834, rmse=None, ndcg=None, aucs=None, auuc=None, qini=None)), TrainLog(num_trees=3, evaluation=Evaluation(num_examples=None, accuracy=0.9787610769271851, loss=0.5196457505226135, rmse=None, ndcg=None, aucs=None, auuc=None, qini=None)), TrainLog(num_trees=4, evaluation=Evaluation(num_examples=None, accuracy=0.9805309772491455, loss=0.4200752377510071, rmse=None, ndcg=None, aucs=None, auuc=None, qini=None)), TrainLog(num_trees=5, evaluation=Evaluation(num_examples=None, accuracy=0.9805309772491455, loss=0.3441726565361023, rmse=None, ndcg=None, aucs=None, 