In [1]:
import os

In [2]:
%pwd

'c:\\development\\Machine-Learning\\end-to-end-ml-project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\development\\Machine-Learning\\end-to-end-ml-project'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainingConfig:
    root_dir : Path
    X_train_data_path: Path
    X_test_data_path: Path
    y_train_data_path: Path
    y_test_data_path: Path
    model_name : str
    alpha: float
    n_estimators: int
    max_depth: int
    learning_rate: float
    

In [6]:
from ml_project.constants import *
from ml_project.utils.common import read_yaml , create_directories

In [22]:
class ConfigurationManager:
    def __init__(self, config_filepath= CONFIG_FILE_PATH , params_filepath=PARAMS_FILE_PATH ,schema_filepath=SCHEMA_FILE_PATH ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        
        create_directories([self.config.model_training.root_dir])
        
        
    def get_model_training_config(self) -> ModelTrainingConfig:
        config = self.config.model_training
        params = self.params.LGBM
        schema = self.schema.TARGET
        
        model_training_config = ModelTrainingConfig(
            root_dir= Path(config.root_dir),
            X_train_data_path= Path(config.X_train_data_path),
            X_test_data_path= Path(config.X_test_data_path),
            y_train_data_path= Path(config.y_train_data_path),
            y_test_data_path= Path(config.y_test_data_path),
            model_name= config.model_name,
            alpha= params.alpha,
            n_estimators= params.n_estimators,
            max_depth= params.max_depth,
            learning_rate= params.learning_rate
        ) 
        
        return model_training_config

In [8]:
import pandas as pd
import os 
from ml_project import logger
from lightgbm import LGBMClassifier
import joblib

[2024-02-25 13:32:15,224: INFO: utils:  Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.]
[2024-02-25 13:32:15,224: INFO: utils:  NumExpr defaulting to 8 threads.]


In [18]:
class ModelTrainer:
    def __init__(self , config: ModelTrainingConfig) -> None:
        self.config = config

    def train(self):
        X_train = pd.read_csv(self.config.X_train_data_path)
        X_test = pd.read_csv(self.config.X_test_data_path)
        y_train = pd.read_csv(self.config.y_train_data_path)
        y_test = pd.read_csv(self.config.y_test_data_path)
    
        
        clf = LGBMClassifier(alpha=self.config.alpha,
                             n_estimators=self.config.n_estimators,
                             max_depth=self.config.max_depth,
                             learning_rate=self.config.learning_rate)
        
        clf.fit(X_train, y_train)
        
        joblib.dump(clf, os.path.join(self.config.root_dir, f"{self.config.model_name}.joblib"))
        

In [23]:
try:
    config = ConfigurationManager()
    model_training_config = config.get_model_training_config()
    model_training_config = ModelTrainer(config=model_training_config)
    model_training_config.train()
except Exception as e:
    raise e    
    
    

[2024-02-25 13:56:45,127: INFO: common:  yaml file: config\config.yaml loaded successfully]
[2024-02-25 13:56:45,128: INFO: common:  yaml file: params.yaml loaded successfully]
[2024-02-25 13:56:45,130: INFO: common:  yaml file: schema.yaml loaded successfully]
[2024-02-25 13:56:45,131: INFO: common:  created directory at: artifacts/model_training]
[LightGBM] [Info] Number of positive: 4198, number of negative: 31970
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001303 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1128
[LightGBM] [Info] Number of data points in the train set: 36168, number of used features: 31
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.116069 -> initscore=-2.030190
[LightGBM] [Info] Start training from score -2.030190


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
