In [2]:
import os

In [3]:

%pwd

'c:\\Users\\Omar\\Desktop\\Omar_Files\\Python_Analysis\\EndToEndMLProjectGenderClassification\\research'

In [4]:
os.chdir("../")

In [164]:
%pwd


'c:\\Users\\Omar\\Desktop\\Omar_Files\\Python_Analysis\\EndToEndMLProjectGenderClassification'

In [165]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir:Path
    train_data_arr_path:Path
    test_data_arr_path: Path 
    best_model:str 
    best_model_params: Path
    target_column:str

In [166]:
from EndToEndMLProjectGenderClassification.constants import *
from EndToEndMLProjectGenderClassification.utils.common import read_yaml,create_directories,save_json

In [167]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH
                 ) -> None:
        
        self.config=read_yaml(config_filepath)
        self.schema=read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])      


    def get_training_config(self)-> TrainingConfig:
        config=self.config.training
        schema=self.schema

        create_directories([config.root_dir])

        training_config = TrainingConfig(
            root_dir=config.root_dir,
            train_data_arr_path=config.train_data_arr_path,
            test_data_arr_path=config.test_data_arr_path,
            best_model=config.best_model,
            best_model_params=config.best_model_params,
            target_column=schema.TARGET_COLUMN
        )

        return training_config

In [168]:
import os
import urllib.request as request
import pickle,joblib
import pandas as pd
import numpy as np
from EndToEndMLProjectGenderClassification import logger
from sklearn.metrics import accuracy_score,f1_score,recall_score,precision_score,classification_report
from sklearn.ensemble import (
    RandomForestClassifier,GradientBoostingClassifier,AdaBoostClassifier,BaggingClassifier
)

from sklearn.tree import DecisionTreeClassifier

import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.svm import SVC
import json
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from EndToEndMLProjectGenderClassification.hyperpatameters.params import models

In [169]:

class Training:
    def __init__(self,config:TrainingConfig):
        self.config= config

    def initiate_Training(self):

        with open(self.config.train_data_arr_path, 'rb') as f:
            train_data = np.load(f)

        with open(self.config.test_data_arr_path, 'rb') as f:
            test_data = np.load(f)    

        x_train,y_train,x_test,y_test=(
            train_data[:,:-1],train_data[:,-1],test_data[:,:-1],test_data[:,-1]
        )

        yamalpath=Path("model.yaml")      
        params_config=read_yaml(yamalpath)
    
        best_model = None
        best_score = -float("inf")
        best_params = {}
        best_model_name = ""

        for model_name, model in models.items():
            print(f"Running RandomizedSearchCV for {model_name}...")
            param_grid = params_config['models'][model_name]

            random_search = RandomizedSearchCV(
                estimator=model,
                param_distributions=param_grid,
                n_iter=3, 
                scoring='accuracy',  
                cv=3, 
                verbose=2,
                random_state=42,
                error_score='raise'
            )
            random_search.fit(x_train, y_train)

            if random_search.best_score_ > best_score:
                best_score = random_search.best_score_
                best_model = random_search.best_estimator_
                best_params = {"best_params":random_search.best_params_}
                best_model_name = model_name

            #np.random.seed(42)
            best_model.fit(x_train,y_train)
            joblib.dump(best_model,os.path.join(self.config.root_dir,self.config.best_model))
            results={"best_model_name":best_model_name,"best_params":best_params}
            save_json(path=Path(self.config.best_model_params),data=results)  

In [170]:

try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.initiate_Training()
except Exception as e:
    raise e

[2024-10-30 21:02:15,811: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-10-30 21:02:15,816: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-10-30 21:02:15,818: INFO: common: created directory at: artifacts]
[2024-10-30 21:02:15,820: INFO: common: created directory at: artifacts/training]
[2024-10-30 21:02:15,854: INFO: common: yaml file: model.yaml loaded successfully]
Running RandomizedSearchCV for RandomForestClassifier...
Fitting 3 folds for each of 3 candidates, totalling 9 fits
[CV] END min_samples_leaf=1, min_samples_split=10, n_estimators=300; total time=   3.3s
[CV] END min_samples_leaf=1, min_samples_split=10, n_estimators=300; total time=   3.5s
[CV] END min_samples_leaf=1, min_samples_split=10, n_estimators=300; total time=   3.9s
[CV] END min_samples_leaf=2, min_samples_split=5, n_estimators=200; total time=   2.1s
[CV] END min_samples_leaf=2, min_samples_split=5, n_estimators=200; total time=   3.7s
[CV] END min_samples_leaf=2, min_