In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\karthikeya\\Insurance_Premium_Prediction'

In [21]:
from dataclasses import dataclass

from sklearn.ensemble import (
    AdaBoostRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor
)
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, median_absolute_error, mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
import joblib as jl
from src.logger import logger

from src.utils import evaluate_models

In [17]:
@dataclass
class ModelTrainerConfig:
    trained_model_file_path = os.path.join("artifacts", "model.joblib")

In [27]:
class ModelTrainer:

    def __init__(self):
        self.model_trainer_config = ModelTrainerConfig()

    def initiate_model_training(self, train_array, test_array):
        try:
            logger.info("splitting train and test input data")

            X_train,y_train, X_test, y_test = (
                train_array[:, :-1],
                train_array[:, -1],
                test_array[:, :-1],
                test_array[:, -1]
            )

            models = {
                "RandomForest":RandomForestRegressor(),
                "DecisionTree": DecisionTreeRegressor(),
                "GradientBoosting" : GradientBoostingRegressor(),
                "LinearRegression" : LinearRegression(),
                "XGBRegressor" : XGBRegressor(),
                "AdaBoostRegressor": AdaBoostRegressor()

            }
            model_report:dict = evaluate_models(
                X_train=X_train,
                y_train=y_train,
                X_test= X_test,
                y_test =y_test,
                models= models)
            
            best_model_score = max(sorted(model_report.values()))

            best_model_name = list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]

            best_model = models[best_model_name]

            if best_model_score<0.6:
                raise Exception("No best model found")
            
            logger.info(f"Best found model on both training and testing dataset")

            model_obj = jl.dump(best_model,self.model_trainer_config.trained_model_file_path)

            predicted = best_model.predict(X_test)

            R2_score = r2_score(y_test, predicted)

            return R2_score


        except Exception as e:
            raise e

In [28]:
from src.components.data_ingestion import DataIngestion, DataIngestionConfig
from src.components.data_transformation import DataTransformation, DataTransformationConfig

In [29]:
obj = DataIngestion()
_, train_data, test_data = obj.initiate_data_ingestion()

data_transformation = DataTransformation()

train_arr, test_arr,_ = data_transformation.initiate_data_transformation(train_data, test_data)

model_trainer = ModelTrainer()

model_trainer.initiate_model_training(train_arr, test_arr)

[2024-11-19 19:38:37,742, INFO, data_ingestion, Entered the data ingestion method ]
[2024-11-19 19:38:37,743, INFO, data_ingestion, Establishing Connection with SQLite databse ]
[2024-11-19 19:38:37,745, INFO, utils, Successfully connected to the SQLite database. ]
[2024-11-19 19:38:42,483, INFO, data_ingestion, Successfuly read the raw data as dataframe ]
[2024-11-19 19:38:42,485, INFO, utils, Disconnected from the SQLite database. ]
[2024-11-19 19:38:42,485, INFO, data_ingestion, Disconnected from SQLite database ]
[2024-11-19 19:38:42,486, INFO, data_ingestion, Train Test Split Initiated ]
[2024-11-19 19:38:53,373, INFO, data_ingestion, Data ingestion is complete ]
[2024-11-19 19:38:56,017, INFO, data_transformation, Reading train and test data completed ]
[2024-11-19 19:38:56,018, INFO, data_transformation, Obtaining preprocessing object ]
[2024-11-19 19:38:56,018, INFO, data_transformation, Numerical columns standard scaling completed ]
[2024-11-19 19:38:56,019, INFO, data_transfo

0.8245757070520024