In [21]:
import os

In [22]:
%pwd

'c:\\Users\\quamr\\OneDrive\\Desktop\\project\\gemstonePricePrediction'

In [23]:
os.chdir('c:\\Users\\quamr\\OneDrive\\Desktop\\project\\gemstonePricePrediction')

In [24]:
%pwd

'c:\\Users\\quamr\\OneDrive\\Desktop\\project\\gemstonePricePrediction'

In [25]:
@dataclass
class ModelTrainerConfig:
    root_dir: str
    trained_model_file_path: str
    train_array_path: str
    test_array_path: str


In [26]:
from src.gemstonePricePrediction.constants import *
from src.gemstonePricePrediction.utils.common import read_yaml,create_directories,save_object,evaluate_models,print_evaluated_results,model_metrics


In [27]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:

        config = self.config.model_trainer
        params = self.params
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            trained_model_file_path=config.trained_model_file_path,
            target_column=schema.name,
            params=params
        )

        return model_trainer_config

        

In [None]:
import os
import sys
import logging
import numpy as np

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, VotingRegressor
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

from xgboost import XGBRegressor
from catboost import CatBoostRegressor



In [None]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def initiate_model_training(self):

        try:
            logging.info("Loading transformed train and test arrays")

            train_array = np.load(self.config.train_array_path)
            test_array = np.load(self.config.test_array_path)

            xtrain, ytrain, xtest, ytest = (
                train_array[:, :-1],
                train_array[:, -1],
                test_array[:, :-1],
                test_array[:, -1]
            )

            logging.info("Arrays loaded successfully")

            # ----- REST OF YOUR EXISTING TRAINING CODE -----
            # (models, tuning, voting regressor etc)

            models = {
                "Linear Regression": LinearRegression(**self.config.params["LinearRegression"]),
                "Lasso": Lasso(**self.config.params["Lasso"]),
                "Ridge": Ridge(**self.config.params["Ridge"]),
                "K-Neighbors Regressor": KNeighborsRegressor(),
                "Decision Tree": DecisionTreeRegressor(**self.config.params["DecisionTreeRegressor"]),
                "Random Forest Regressor": RandomForestRegressor(**self.config.params["RandomForestRegressor"]),
                "XGBRegressor": XGBRegressor(**self.config.params["XGBRegressor"]),
                "CatBoosting Regressor": CatBoostRegressor(verbose=False),
                "GradientBoosting Regressor": GradientBoostingRegressor(**self.config.params["GradientBoostingRegressor"]),
                "AdaBoost Regressor": AdaBoostRegressor(**self.config.params["AdaBoostRegressor"])
            }

            logging.info("Evaluating multiple models")

            model_report: dict = evaluate_models(xtrain, ytrain, xtest, ytest, models)

            logging.info(f'Model Report : {model_report}')

            best_model_score = max(model_report.values())

            best_model_name = list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]

            best_model = models[best_model_name]

            if best_model_score < 0.6:
                logging.info('Best model has r2 Score less than 60%')
                raise CustomException('No Best Model Found', sys.exc_info())

            logging.info(
                f'Best Model Found , Model Name : {best_model_name} , R2 Score : {best_model_score}'
            )

            logging.info('Hyperparameter tuning started for CatBoost')

            cbr = CatBoostRegressor(verbose=False)

            param_dist = {
                "depth": self.config.params["CatBoostRegressor"]["depth"],
                "learning_rate": self.config.params["CatBoostRegressor"]["learning_rate"],
                "iterations": self.config.params["CatBoostRegressor"]["iterations"]
            }

            rscv = RandomizedSearchCV(
                cbr,
                param_dist,
                scoring='r2',
                cv=5,
                n_jobs=-1
            )

            rscv.fit(xtrain, ytrain)

            best_cbr = rscv.best_estimator_
            logging.info('Hyperparameter tuning complete for CatBoost')

            logging.info('Hyperparameter tuning started for KNN')

            knn = KNeighborsRegressor()

            param_grid = {
                "n_neighbors": self.config.params["KNeighborsRegressor"]["n_neighbors"]
            }

            grid = GridSearchCV(
                knn,
                param_grid,
                cv=5,
                scoring='r2',
                n_jobs=-1
            )

            grid.fit(xtrain, ytrain)

            best_knn = grid.best_estimator_
            logging.info('Hyperparameter tuning Complete for KNN')

            logging.info('Voting Regressor model training started')

            er = VotingRegressor([
                ('cbr', best_cbr),
                ('xgb', XGBRegressor()),
                ('knn', best_knn)
            ], weights=[3, 2, 1])

            er.fit(xtrain, ytrain)

            print_evaluated_results(xtrain, ytrain, xtest, ytest, er)

            logging.info('Voting Regressor Training Completed')

            os.makedirs(
                os.path.dirname(self.config.trained_model_file_path),
                exist_ok=True
            )

            save_object(
                file_path=self.config.trained_model_file_path,
                obj=er
            )

            logging.info('Model pickle file saved')

            ytest_pred = er.predict(xtest)

            mae, rmse, r2 = model_metrics(ytest, ytest_pred)

            logging.info(f'Test MAE : {mae}')
            logging.info(f'Test RMSE : {rmse}')
            logging.info(f'Test R2 Score : {r2}')

            logging.info('Final Model Training Completed')

            return mae, rmse, r2

        except Exception as e:
            logging.error('Exception occurred at Model Training')
            raise CustomException(e, sys.exc_info())


In [30]:
config = ConfigurationManager()
# Step 2: Model Training (NO ARGUMENTS ðŸ˜„)
trainer = ModelTrainer(config.get_model_trainer_config())
mae, rmse, r2 = trainer.initiate_model_training()

print("MAE:", mae)
print("RMSE:", rmse)
print("R2 Score:", r2)

[2026-02-11 16:10:36,705: INFO: common: yaml file loaded successfully from: C:\Users\quamr\OneDrive\Desktop\project\gemstonePricePrediction\config\config.yaml]


[2026-02-11 16:10:36,718: INFO: common: yaml file loaded successfully from: C:\Users\quamr\OneDrive\Desktop\project\gemstonePricePrediction\params.yaml]
[2026-02-11 16:10:36,726: INFO: common: yaml file loaded successfully from: C:\Users\quamr\OneDrive\Desktop\project\gemstonePricePrediction\schema.yaml]
[2026-02-11 16:10:36,730: INFO: common: created directory at: artifacts]
[2026-02-11 16:10:36,732: INFO: common: created directory at: artifacts/model_trainer]


TypeError: ModelTrainerConfig.__init__() got an unexpected keyword argument 'target_column'