In [1]:
import os
os.chdir('../')
%pwd

'/home/utpal108/dev/Python/PW_Skills_Projects/Home-Loan-Approval-Prediction'

In [2]:
# Config Entity
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainingConfig:
    best_model_path: Path
    

In [3]:
from homeLoan.constants import *
from homeLoan.utils import create_directories, read_yaml

In [4]:
# Configuration Manager
class ConfigurationManager:
    def __init__(self, config_filepath = CONFIG_FILE_PATH, params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_model_training_config(self) -> ModelTrainingConfig:
        config= self.config.model_training
        create_directories([config.root_dir])

        model_training_config = ModelTrainingConfig(
            best_model_path = Path(config.best_model_path),
        )

        return  model_training_config


In [5]:
from homeLoan.utils import save_object, evaluate_model
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

In [6]:
# Component
class ModelTraining:
    def __init__(self, config: ModelTrainingConfig):
        self.config = config

    def initiate_model_training(self, train_arr, test_arr):
        try:
            # Split train, test data
            X_train = train_arr[:,:-1]
            X_test = test_arr[:,:-1]
            y_train = train_arr[:,-1]
            y_test = test_arr[:,-1]

            # List of the Models
            # Here we have classification problem, so we used the list of classification models
            models = {
                'SVC': SVC(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'KNeighborsClassifier': KNeighborsClassifier(),
                'RandomForestClassifier': RandomForestClassifier(),
                'GaussianNB': GaussianNB()
            }

            # Find the best model
            model_report, best_model = evaluate_model(models, X_train, X_test, y_train, y_test)
            print(model_report)
            print(best_model)
            best_model = models[list(best_model.keys())[0]]

            # Save the best model
            save_object(self.config.best_model_path, best_model)

        except Exception as e:
            raise e


In [7]:
from homeLoan.components.data_preprocessing import DataPreprocessing
from homeLoan.config import ConfigurationManager

In [8]:
# Pipeline
try:
    config = ConfigurationManager()
    data_preprocessing_config = config.get_data_preprocessing_config()
    model_training_config = config.get_model_training_config()

    data_preprocessing = DataPreprocessing(config=data_preprocessing_config)
    train_arr, test_arr = data_preprocessing.initiate_data_preprocessing()

    model_training = ModelTraining(config=model_training_config)
    model_training.initiate_model_training(train_arr=train_arr, test_arr=test_arr)

except Exception as e:
    raise e

2023-12-02 21:48:26,719 : homeLoan.logger - INFO - YAML file: config/config.yaml loaded successfully
2023-12-02 21:48:26,721 : homeLoan.logger - INFO - YAML file: params.yaml loaded successfully
2023-12-02 21:48:26,722 : homeLoan.logger - INFO - created directory at: artifacts
2023-12-02 21:48:26,723 : homeLoan.logger - INFO - created directory at: artifacts/preprocessor
2023-12-02 21:48:26,724 : homeLoan.logger - INFO - created directory at: artifacts/model_training


{'SVC': 0.7881773399014779, 'DecisionTreeClassifier': 0.75, 'KNeighborsClassifier': 0.7978142076502732, 'RandomForestClassifier': 0.8379888268156425, 'GaussianNB': 0.840909090909091}
{'GaussianNB': 0.840909090909091}
