In [2]:
import os

In [3]:

%pwd

'c:\\Users\\Omar\\Desktop\\Omar_Files\\Python_Analysis\\EndToEndMLProjectGenderClassification\\research'

In [4]:
os.chdir("../")

In [27]:
%pwd

'c:\\Users\\Omar\\Desktop\\Omar_Files\\Python_Analysis\\EndToEndMLProjectGenderClassification'

In [28]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir:Path
    train_data_arr_path:Path
    model_name:str 
    n_estimators: int
    min_samples_split: int
    min_samples_leaf: int
    random_state: int
    target_column:str

In [29]:
from EndToEndMLProjectGenderClassification.constants import *
from EndToEndMLProjectGenderClassification.utils.common import read_yaml,create_directories

In [30]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH) -> None:
        
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        self.schema=read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])      


    def get_training_config(self)-> TrainingConfig:
        config=self.config.training
        params=self.params.GradientBoostingClassifier
        schema=self.schema
      

        create_directories([config.root_dir])

        training_config = TrainingConfig(
            root_dir=config.root_dir,
            train_data_arr_path=config.train_data_arr_path,
            model_name=config.model_name,
            n_estimators=params.n_estimators,
            min_samples_split=params.min_samples_split,
            min_samples_leaf=params.min_samples_leaf,
            random_state=params.random_state,
            target_column=schema.TARGET_COLUMN

        )

        return training_config

In [31]:
import os
import urllib.request as request
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
import pickle,joblib
import pandas as pd
import numpy as np
from EndToEndMLProjectGenderClassification import logger

In [32]:

class Training:
    def __init__(self,config:TrainingConfig):
        self.config= config

    def initiate_Training(self):

        with open(self.config.train_data_arr_path, 'rb') as f:
            train_data = np.load(f)  


        x_train,y_train=(
            train_data[:,:-1],train_data[:,-1]
        )

        model=GradientBoostingClassifier(
            n_estimators= self.config.n_estimators,
            min_samples_split=self.config.min_samples_split,
            min_samples_leaf=self.config.min_samples_leaf,
            random_state=self.config.random_state
            )
        
        model.fit(x_train,y_train)  

        joblib.dump(model,os.path.join(self.config.root_dir,self.config.model_name))      



In [33]:

try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.initiate_Training()
except Exception as e:
    raise e

[2024-09-10 20:54:33,597: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-09-10 20:54:33,600: INFO: common: yaml file: params.yaml loaded successfully]
[2024-09-10 20:54:33,605: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-09-10 20:54:33,607: INFO: common: created directory at: artifacts]
[2024-09-10 20:54:33,609: INFO: common: created directory at: artifacts/training]


In [34]:
import joblib
from pathlib import Path
model=joblib.load(Path("artifacts/training/model.joblib"))

In [35]:
model.predict([[3000,.25,200,1,2,1,4]])


array([1.])

In [36]:
model.predict_proba([[3000,.25,200,1,2,1,4]])

array([[0.39863408, 0.60136592]])

In [37]:
np.round(model.predict_proba([[3000,.25,200,1,2,1,4]]).max(),2)

np.float64(0.6)