In [1]:
import os
%pwd

'd:\\Machine_Learning\\Titanic_Pipeline_Project\\research'

In [2]:
os.chdir("../")
%pwd

'd:\\Machine_Learning\\Titanic_Pipeline_Project'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir:Path
    data_path:Path

In [4]:
from titanic.constants import *
from titanic.utils.common import read_yaml, create_directories

In [5]:
class ConfigurationManager:
    def __init__(
            self,
            config_file_path = CONFIG_FILE_PATH,
            params_file_path = PARAMS_FILE_PATH
            ):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])
    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer 

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir= config.root_dir,
            data_path= config.data_path
        )

        return model_trainer_config


In [6]:
from sklearn.metrics import accuracy_score
from titanic.logging import logger
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import (
    AdaBoostClassifier,
    GradientBoostingClassifier,
    RandomForestClassifier,
)
# from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.preprocessing import StandardScaler
# from sklearn.tree import train_test_split

from sklearn.model_selection import train_test_split

from titanic.utils.common import save_object

class ModelTrainer:
    def __init__(self, config:ModelTrainerConfig):
        self.config = config

    def fetch_transformed_data(self):
        df = pd.read_csv(self.config.data_path)
        logger.info("Data has been fetched successfully")
        return df
    
    def scaling_data(self):
        df = self.fetch_transformed_data()

        x = df.drop(columns='Survived', axis=1)
        y = df['Survived']

        sd = StandardScaler()
        x = sd.fit_transform(x)


        logger.info("Data has been scaled successfully")
        return x,y

    def initiate_model_training(self):
        x, y = self.scaling_data()

        models = {
                "Random Forest": RandomForestClassifier(),
                "Decision Tree": DecisionTreeClassifier(),
                "Gradient Boosting": GradientBoostingClassifier(),
                "K-Neighbors Regressor": KNeighborsClassifier(),
                "CatBoosting Regressor": CatBoostClassifier(verbose=False),
                "AdaBoost Regressor": AdaBoostClassifier()
                # "XGBRegressor": XGBClassifier(),
                # "Linear Regression": LinearRegression(),
            }
        # For Hyper Parameter tuning 
        # model_report:dict = evaluate_models(x_train = x_train, y_train = y_train,x_test=x_test, y_test=y_test, models=models, param = params)
        accuracy_dict = {}
        for model_name, model in models.items():
            model.fit(x, y)
            logger.info(f"Model {model_name} has been trained successfully")
            y_pred = model.predict(x)
            accuracy = accuracy_score(y, y_pred)
            accuracy_dict[accuracy] = (model, model_name, accuracy)
        
        
        # best_model = list(models.values())[accuracy_list.index(max(accuracy_list))]
        # best_model = max(list(accuracy_dict.values()))
        best_model = accuracy_dict[max(accuracy_dict)][0]
        
        model_path= Path(os.path.join(self.config.root_dir,"model.pkl"))

        save_object(path = model_path,obj =best_model)
        logger.info(f"Model Name: {accuracy_dict[max(accuracy_dict)][1]} has been saved successfully with accuracy: {accuracy_dict[max(accuracy_dict)][2]}")

        logger.info("Model Training Complete")

In [7]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config= model_trainer_config)
    model_trainer.initiate_model_training()

except Exception as e:
    raise e

[2023-12-30 13:22:46,773: INFO: common: yaml file config\config.yaml loaded successfully]
[2023-12-30 13:22:46,775: INFO: common: yaml file params.yaml loaded successfully]
[2023-12-30 13:22:46,777: INFO: common: created directory at: artifacts]
[2023-12-30 13:22:46,778: INFO: common: created directory at: artifacts/model_trainer]
[2023-12-30 13:22:46,781: INFO: 2966315374: Data has been fetched successfully]
[2023-12-30 13:22:46,784: INFO: 2966315374: Data has been scaled successfully]
[2023-12-30 13:22:46,913: INFO: 2966315374: Model Random Forest has been trained successfully]
[2023-12-30 13:22:46,929: INFO: 2966315374: Model Decision Tree has been trained successfully]
[2023-12-30 13:22:47,004: INFO: 2966315374: Model Gradient Boosting has been trained successfully]
[2023-12-30 13:22:47,004: INFO: 2966315374: Model K-Neighbors Regressor has been trained successfully]
[2023-12-30 13:22:48,431: INFO: 2966315374: Model CatBoosting Regressor has been trained successfully]
[2023-12-30 1