#### Initial Setup

In [1]:
import os

In [2]:
%pwd

'E:\\RajaRajeshwari\\MyFolders\\Projects\\mlops_water_potability_prediction\\notebooks'

In [3]:
os.chdir("../")

In [4]:
%pwd

'E:\\RajaRajeshwari\\MyFolders\\Projects\\mlops_water_potability_prediction'

#### Entity

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_file_name: str
    iterations: int
    learning_rate: float
    random_seed: int
    custom_loss: list
    target_column: str

#### Configuration

In [6]:
from src.mlops_water_potability_prediction_project.constants import *
from src.mlops_water_potability_prediction_project.utilities.helpers import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH, schema_filepath=SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories(directories_path_list=[self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.CatBoost
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        print(config)

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_file_name=config.model_file_name,
            iterations=params.iterations,
            learning_rate=params.learning_rate,
            random_seed=params.random_seed,
            custom_loss=['Accuracy', 'AUC'],
            target_column=schema.name
        )

        return model_trainer_config

#### Component

In [9]:
import joblib
import os
import pandas as pd
from catboost import CatBoostClassifier
from src.mlops_water_potability_prediction_project import logger


class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):
        try:
            train_df = pd.read_csv(self.config.train_data_path)
            test_df = pd.read_csv(self.config.test_data_path)

            X_train = train_df.drop([self.config.target_column], axis=1)
            X_test = test_df.drop([self.config.target_column], axis=1)
            y_train = train_df[[self.config.target_column]]
            y_test = test_df[[self.config.target_column]]

            classifier = CatBoostClassifier(iterations=self.config.iterations, random_seed=self.config.random_seed, learning_rate=self.config.learning_rate, custom_loss=self.config.custom_loss)
            classifier.fit(X_train, y_train, verbose=True, plot=True)

            joblib.dump(classifier, os.path.join(self.config.root_dir, self.config.model_file_name))
            
            logger.info("Trained and saved the model")
        except Exception as e:
            raise e

#### Pipeline

In [10]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-01-29 12:12:51,039]: INFO: helpers: YAML file: config\config.yaml loaded successfully]
[2024-01-29 12:12:51,041]: INFO: helpers: YAML file: params.yaml loaded successfully]
[2024-01-29 12:12:51,042]: INFO: helpers: YAML file: schema.yaml loaded successfully]
[2024-01-29 12:12:51,043]: INFO: helpers: Created directory at: artifacts]
[2024-01-29 12:12:51,044]: INFO: helpers: Created directory at: artifacts/model_trainer]
{'root_dir': 'artifacts/model_trainer', 'train_data_path': 'artifacts/data_transformation/train_set.csv', 'test_data_path': 'artifacts/data_transformation/test_set.csv', 'model_file_name': 'model.joblib'}


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

0:	learn: 0.6656813	total: 1.76ms	remaining: 86.4ms
1:	learn: 0.6304005	total: 3.37ms	remaining: 80.9ms
2:	learn: 0.6147720	total: 5.08ms	remaining: 79.6ms
3:	learn: 0.5986872	total: 6.64ms	remaining: 76.4ms
4:	learn: 0.5825654	total: 8.41ms	remaining: 75.7ms
5:	learn: 0.5677987	total: 9.95ms	remaining: 72.9ms
6:	learn: 0.5603531	total: 11.5ms	remaining: 70.4ms
7:	learn: 0.5561712	total: 12.8ms	remaining: 67.3ms
8:	learn: 0.5479152	total: 14.2ms	remaining: 64.9ms
9:	learn: 0.5427341	total: 15.8ms	remaining: 63.1ms
10:	learn: 0.5364166	total: 17.3ms	remaining: 61.4ms
11:	learn: 0.5278448	total: 18.9ms	remaining: 59.7ms
12:	learn: 0.5240916	total: 20.3ms	remaining: 57.7ms
13:	learn: 0.5201039	total: 21.7ms	remaining: 55.9ms
14:	learn: 0.5137773	total: 23.4ms	remaining: 54.5ms
15:	learn: 0.5075113	total: 24.8ms	remaining: 52.6ms
16:	learn: 0.5006836	total: 26.3ms	remaining: 51ms
17:	learn: 0.4921582	total: 27.7ms	remaining: 49.2ms
18:	learn: 0.4870537	total: 29.2ms	remaining: 47.6ms
19:	l