In [1]:
import os 

In [2]:
%pwd

'c:\\code\\ML\\breast_cancer\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\code\\ML\\breast_cancer'

In [5]:
from xgboost import XGBClassifier

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier , RandomForestClassifier
from xgboost import XGBClassifier

In [26]:
from dataclasses import dataclass , field
import numpy as np
from typing import Dict , List , Any

#entity

@dataclass 
class ModelBuilding:
    cv : int
    random_state : int
    n_iter : int
    n_jobs : int
    model_save_path : str
    



In [9]:
#config

In [None]:
from breast_cancer.utils.common import read_yaml , create_directories
from breast_cancer.constants import *
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH, 
                 params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config.artifacts_root])

    def model_building_config(self):
        config = self.config.model_building
        param = self.params.model_building
        
        model_building_configuration = ModelBuilding(cv = param.cv , random_state= param.random_state,n_iter= param.n_iter , n_jobs = param.n_jobs,
                                                     model_save_path= config.model_save_path)
        # create_directories([config.model_save_path])
        return model_building_configuration

In [28]:
#components

In [29]:
from breast_cancer import logger
from breast_cancer.pipeline.stage04_data_transformation import data_transformation_pipeline
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score
import joblib

class ModelBuildingComponent :
    def __init__(self,config:ModelBuilding):
        self.config = config

    def load_data(self):
        data = data_transformation_pipeline()
        return data.main()
    def model(self):
        x_train, x_test, y_train, y_test = self.load_data()

        rf = RandomForestClassifier(random_state=self.config.random_state)
        gb = GradientBoostingClassifier(random_state=self.config.random_state)

        rf_params = {
            'n_estimators': [50, 100, 150],
            'max_depth': [3, 5, 10, None]
        }

        gb_params = {
            'n_estimators': [50, 100, 150],
            'learning_rate': [0.01, 0.1, 0.2],
            'max_depth': [3, 5, 7]
        }

        #random search for rf
        rf_search = RandomizedSearchCV(rf,rf_params,n_iter=self.config.n_iter, cv= self.config.cv, n_jobs= self.config.n_jobs)
        rf_search.fit(x_train,y_train)
        rf_best = rf_search.best_estimator_

        #random search for gradien boost
        gb_search = RandomizedSearchCV(gb,gb_params,n_iter=self.config.n_iter, cv= self.config.cv, n_jobs= self.config.n_jobs)
        gb_search.fit(x_train,y_train)
        gb_best = gb_search.best_estimator_


        #ensemble

        ensemble = VotingClassifier(estimators=[
            ("rf",rf_best),
            ("gb",gb_best)
        ],voting="soft")

        ensemble.fit(x_train,y_train)

        #eval

        y_pred = ensemble.predict(x_test)

        acc = accuracy_score(y_true = y_test , y_pred=y_pred)

        print(f"Ensemble Test Accuracy: {acc:.4f}")

        joblib.dump(ensemble, self.config.model_save_path + ".pkl")

        return ensemble
    


In [30]:
#pipeline

In [32]:
try:
    config = ConfigurationManager()
    model_building = config.model_building_config()
    model_building_comp = ModelBuildingComponent(config=model_building)
    model_building_comp.model()

except Exception as e:
    raise e

[2025-05-14 21:30:04,523: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-14 21:30:04,532: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-14 21:30:04,538: INFO: common: created directory at: artifacts]
[2025-05-14 21:30:04,543: INFO: common: created directory at: artifacts/models/ensemble]
[2025-05-14 21:30:04,553: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-14 21:30:04,569: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-14 21:30:04,575: INFO: common: created directory at: artifacts]
[2025-05-14 21:30:04,577: INFO: common: created directory at: artifacts/models/scaler]
[2025-05-14 21:30:04,605: INFO: data_transformation: train test split]
[2025-05-14 21:30:04,616: INFO: data_transformation: Standard Scaling]
[2025-05-14 21:30:04,643: INFO: data_transformation: synthetic minority over sampling technique]


  y = df['diagnosis'].replace({'M':1,'B':0})


Ensemble Test Accuracy: 0.9649
