In [2]:
import os
os.chdir("../")

In [3]:
from LoanApproval import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from LoanApproval.utils import read_yam_file, create_directories

In [4]:
from pathlib import Path

In [14]:
class TrainingConfigurationManager:
    def __init__(self,config_file_path = CONFIG_FILE_PATH):
        self.config = read_yam_file(config_file_path)
        self.dataset_load = os.path.join(self.config['data_preprocessing']['root_dir'], self.config['data_preprocessing']['local_file'])
        create_directories([self.config['artifacts_root']])

    def get_training_config(self):
        config = self.config['training']
        create_directories([config['root_dir']])

        training_config = {'root_dir':Path(config['root_dir']), 'dataset_load': Path(self.dataset_load),
                           'local_model_file': Path(config['local_model_file']), 'test_data': Path(config['test_data']),
                           'score_save': Path(config['score_save'])}

        return training_config

In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import joblib
from sklearn.metrics import accuracy_score, f1_score
from src.LoanApproval.utils import save_reports

In [7]:
import mlflow

In [18]:
class Training:
    def __init__(self, config):
        self.config = config

    def splitting_data(self):
        data_path = self.config['dataset_load']
        df = pd.read_csv(data_path)
        self.train, self.test = train_test_split(df, test_size=0.2, random_state=33)
        root_dir = self.config['root_dir']
        test_data = self.config['test_data']
        test_data_save_file = os.path.join(root_dir, test_data)

        self.test.to_csv(test_data_save_file, index=False)    
    
   
    def training(self):
        x = self.train.drop('Loan_Status', axis=1)
        y = self.train['Loan_Status']

        self.lr_model = LogisticRegression()
        self.lr_model.fit(x, y)

        # saving model
        root_dir = self.config['root_dir']
        local_model_file = self.config['local_model_file']
        local_model_save_path = os.path.join(root_dir, local_model_file)
        joblib.dump(self.lr_model, local_model_save_path)
        
     #Function to evaluate the models and output the accuracy score of the model
    def evaluate(self):
        x_test = self.test.drop("Loan_Status", axis=1)
        y_test = self.test['Loan_Status']

        y_pred = self.lr_model.predict(x_test)
        test_accuracy_score = accuracy_score(y_test, y_pred)
        f1_test_score = f1_score(y_test, y_pred)
        
        scores = {
            'test_accuracy': test_accuracy_score,
            'f1_score': f1_test_score
        }
        raw_path = self.config['root_dir']
        score_save_file = self.config['score_save']

        score_save_path = os.path.join(raw_path, score_save_file)

        save_reports(scores, score_save_path)

In [19]:
config = TrainingConfigurationManager()
training_config = config.get_training_config()
training = Training(training_config)
training.splitting_data()
training.training()
training.evaluate()


[2023-03-29 17:47:21,094: INFO: utils]: yaml file configs\config.yaml load  successfully
[2023-03-29 17:47:21,097: INFO: utils]: created directory at artifacts
[2023-03-29 17:47:21,099: INFO: utils]: created directory at artifacts/training
[2023-03-29 17:47:21,163: INFO: utils]: reports are saved
