In [1]:
import os

In [2]:
%pwd

'c:\\Users\\KUNAL MEHTA\\Desktop\\Data Science Training\\Projects\\Auto-Insurance-Risk-Profiling\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\Users\\KUNAL MEHTA\\Desktop\\Data Science Training\\Projects\\Auto-Insurance-Risk-Profiling'

In [36]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ClassModelTrainerConfig:
    root_dir: Path
    train_data_class_path: Path
    test_data_path: Path
    model_class_name: str
    n_estimators: int
    learning_rate: float
    max_depth: int
    min_samples_leaf: int
    max_features: float

@dataclass(frozen=True)
class RegModelTrainerConfig:
    root_dir: Path
    train_data_reg_path: Path
    test_data_path: Path
    model_reg_name: str
    learning_rate: float
    max_depth: int
    max_features: float
    min_samples_leaf: int
    n_estimators: int

In [7]:
from AutoInsurance.constants import *
from AutoInsurance.utils.common import read_yaml, create_directories

In [37]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_class_model_trainer_config(self) -> ClassModelTrainerConfig:
        config = self.config.class_model_trainer
        params = self.params.GradientBoostingClassifier

        create_directories([config.root_dir])

        class_model_trainer_config = ClassModelTrainerConfig(
            root_dir= config.root_dir,
            train_data_class_path= config.train_data_class_path,
            test_data_path= config.test_data_path,
            model_class_name= config.model_class_name,
            n_estimators= params.n_estimators,
            learning_rate= params.learning_rate,
            max_depth= params.max_depth,
            min_samples_leaf= params.min_samples_leaf,
            max_features= params.max_features,
        )

        return class_model_trainer_config
    
    def get_reg_model_trainer_config(self) -> RegModelTrainerConfig:
        config = self.config.reg_model_trainer
        params = self.params.GradientBoostingRegressor

        create_directories([config.root_dir])    

        reg_model_trainer_config = RegModelTrainerConfig(
            root_dir= config.root_dir,
            train_data_reg_path= config.train_data_reg_path,
            test_data_path= config.test_data_path,
            model_reg_name= config.model_reg_name,
            n_estimators= params.n_estimators,
            learning_rate= params.learning_rate,
            max_depth= params.max_depth,
            min_samples_leaf= params.min_samples_leaf,
            max_features= params.max_features,
        )

        return reg_model_trainer_config 

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as datetime
import seaborn as sns
import math
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, roc_curve, auc, mean_squared_error, r2_score, classification_report
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.cluster import KMeans
import scipy.stats as stats
import joblib


In [38]:
class ClassModelTrainer:
    def __init__(self, config: ClassModelTrainerConfig):
        self.config = config

    def train_model(self):
        train_data = pd.read_csv(self.config.train_data_class_path)
        test_data = pd.read_csv(self.config.test_data_path)
        train_x = train_data.drop('claim', axis = 1)
        train_y = train_data['claim']

        model = GradientBoostingClassifier(
            n_estimators= self.config.n_estimators,
            learning_rate= self.config.learning_rate,
            max_depth= self.config.max_depth,
            min_samples_leaf= self.config.min_samples_leaf,
            max_features= self.config.max_features,
        )

        model.fit(train_x, train_y)

        joblib.dump(model, os.path.join(self.config.root_dir, self.config.model_class_name))


class RegModelTrainer:
    def __init__(self, config: RegModelTrainerConfig):
        self.config = config

    def train_model(self):
        train_data = pd.read_csv(self.config.train_data_reg_path)
        test_data = pd.read_csv(self.config.test_data_path)
        train_x = train_data.drop('log_amount', axis = 1)
        train_y = train_data['log_amount']

        model = GradientBoostingRegressor(
            n_estimators= self.config.n_estimators,
            learning_rate= self.config.learning_rate,
            max_depth= self.config.max_depth,
            min_samples_leaf= self.config.min_samples_leaf,
            max_features= self.config.max_features,
        )

        model.fit(train_x, train_y)

        joblib.dump(model, os.path.join(self.config.root_dir, self.config.model_reg_name))


In [39]:
try:
    config = ConfigurationManager()
    class_model_trainer_config = config.get_class_model_trainer_config()
    class_model_trainer_config = ClassModelTrainer(config = class_model_trainer_config)
    class_model_trainer_config.train_model()
    reg_model_trainer_config = config.get_reg_model_trainer_config()
    reg_model_trainer_config = RegModelTrainer(config = reg_model_trainer_config)
    reg_model_trainer_config.train_model()

except Exception as e:
    raise e

[2024-05-22 01:30:08,129: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-22 01:30:08,133: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-22 01:30:08,138: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-05-22 01:30:08,141: INFO: common: created directory at: artifacts]
[2024-05-22 01:30:08,142: INFO: common: created directory at: artifacts/model_trainer]
[2024-05-22 01:30:12,195: INFO: common: created directory at: artifacts/model_trainer]
