In [1]:
import os

In [2]:
%pwd

'D:\\Python\\MLProjectsPW\\EEG_EYE\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'D:\\Python\\MLProjectsPW\\EEG_EYE'

In [5]:
from dataclasses import dataclass
from pathlib import Path

In [6]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir:Path
    train_data_path:Path
    test_data_path:Path
    n_neighbors: list
    algorithm: list
    leaf_size: list
    metric: list
    model_name: str
    target_column: str

In [7]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml , create_directories

In [8]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self)->ModelTrainerConfig:
        config  = self.config.model_trainer
        params = self.params.KNeighborsClassifier
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            n_neighbors=params.n_neighbors,
            algorithm = params.algorithm,
            leaf_size = params.leaf_size,
            metric = params.metric,
            target_column=schema.name
        )
        return model_trainer_config

In [9]:
import pandas as pd
import os
from mlProject import logger
from mlProject.utils.common import save_bin
from pycaret.classification import *

In [12]:
class ModelTrainer:
    def __init__(self,config: ModelTrainerConfig):
        self.config = config

    def train(self):
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)

        # init setup
        s = setup(train_data , target = self.config.target_column , session_id = 123)
        #create model
        knn = create_model('knn')

        # define search space
        params = {"n_neighbors": self.config.n_neighbors,'algorithm': self.config.algorithm,
                  "leaf_size": self.config.leaf_size, "metric": self.config.metric}
        
        # tune model
        tuned_knn = tune_model(knn, custom_grid = params)

        #save model obj as pickle file using joblib
        save_bin(tuned_knn,Path(os.path.join(self.config.root_dir, self.config.model_name)))

In [13]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2023-09-02 16:30:03,706: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-09-02 16:30:03,711: INFO: common: yaml file: params.yaml loaded successfully]
[2023-09-02 16:30:03,715: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-09-02 16:30:03,715: INFO: common: created directory at :artifacts]
[2023-09-02 16:30:03,715: INFO: common: created directory at :artifacts/model_trainer]


Unnamed: 0,Description,Value
0,Session id,123
1,Target,eyeDetection
2,Target type,Binary
3,Original data shape,"(11235, 15)"
4,Transformed data shape,"(11235, 15)"
5,Transformed train set shape,"(7864, 15)"
6,Transformed test set shape,"(3371, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9492,0.9875,0.9371,0.948,0.9425,0.897,0.897
1,0.9327,0.9863,0.9145,0.9331,0.9237,0.8635,0.8636
2,0.9479,0.9862,0.943,0.9403,0.9417,0.8946,0.8946
3,0.9543,0.9916,0.943,0.9539,0.9484,0.9073,0.9074
4,0.9504,0.9864,0.9314,0.956,0.9436,0.8993,0.8995
5,0.9618,0.991,0.9571,0.9571,0.9571,0.9227,0.9227
6,0.9478,0.9893,0.9314,0.9504,0.9408,0.8942,0.8943
7,0.9389,0.9869,0.9286,0.9339,0.9312,0.8763,0.8763
8,0.9453,0.9847,0.9457,0.9324,0.939,0.8894,0.8895
9,0.9466,0.9873,0.9171,0.9611,0.9386,0.8913,0.8921


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9492,0.9875,0.9371,0.948,0.9425,0.897,0.897
1,0.9327,0.9863,0.9145,0.9331,0.9237,0.8635,0.8636
2,0.9479,0.9862,0.943,0.9403,0.9417,0.8946,0.8946
3,0.9543,0.9916,0.943,0.9539,0.9484,0.9073,0.9074
4,0.9504,0.9864,0.9314,0.956,0.9436,0.8993,0.8995
5,0.9618,0.991,0.9571,0.9571,0.9571,0.9227,0.9227
6,0.9478,0.9893,0.9314,0.9504,0.9408,0.8942,0.8943
7,0.9389,0.9869,0.9286,0.9339,0.9312,0.8763,0.8763
8,0.9453,0.9847,0.9457,0.9324,0.939,0.8894,0.8895
9,0.9466,0.9873,0.9171,0.9611,0.9386,0.8913,0.8921


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
[2023-09-02 16:30:11,094: INFO: common: binary file saved at : artifacts\model_trainer\best_pipeline.pkl]
