In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

# Get the path to the project directory
project_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(os.path.join(project_path, "src"))

from load_data import load_data
from preprocessing import PreprocessingPipeline
from train import TrainModel
from evaluate import Evaluation

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train, test = load_data('../data/loan_data_set.csv')

In [3]:
train

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
83,LP001273,Male,Yes,0,Graduate,No,6000,2250.0,265.0,360.0,,Semiurban,N
90,LP001316,Male,Yes,0,Graduate,No,2958,2900.0,131.0,360.0,1.0,Semiurban,Y
227,LP001758,Male,Yes,2,Graduate,No,6250,1695.0,210.0,360.0,1.0,Semiurban,Y
482,LP002537,Male,Yes,0,Graduate,No,2083,3150.0,128.0,360.0,1.0,Semiurban,Y
464,LP002493,Male,No,0,Graduate,No,4166,0.0,98.0,360.0,0.0,Semiurban,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,LP001245,Male,Yes,2,Not Graduate,Yes,1875,1875.0,97.0,360.0,1.0,Semiurban,Y
106,LP001369,Male,Yes,2,Graduate,No,11417,1126.0,225.0,360.0,1.0,Urban,Y
270,LP001888,Female,No,0,Graduate,No,3237,0.0,30.0,360.0,1.0,Urban,Y
435,LP002393,Female,,,Graduate,No,10047,0.0,,240.0,1.0,Semiurban,Y


In [4]:
test

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
350,LP002139,Male,Yes,0,Graduate,No,9083,0.0,228.0,360.0,1.0,Semiurban,Y
377,LP002223,Male,Yes,0,Graduate,No,4310,0.0,130.0,360.0,,Semiurban,Y
163,LP001570,Male,Yes,2,Graduate,No,4167,1447.0,158.0,360.0,1.0,Rural,Y
609,LP002978,Female,No,0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
132,LP001478,Male,No,0,Graduate,No,2718,0.0,70.0,360.0,1.0,Semiurban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...
231,LP001768,Male,Yes,0,Graduate,,3716,0.0,42.0,180.0,1.0,Rural,Y
312,LP002006,Female,No,0,Graduate,No,2507,0.0,56.0,360.0,1.0,Rural,Y
248,LP001824,Male,Yes,1,Graduate,No,2882,1843.0,123.0,480.0,1.0,Semiurban,Y
11,LP001027,Male,Yes,2,Graduate,,2500,1840.0,109.0,360.0,1.0,Urban,Y


In [5]:
pipeline = PreprocessingPipeline()

In [6]:
train, test = pipeline.preprocess_data(train_data=train, test_data=test)

Index(['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Gender',
       'Married', 'Dependents', 'Education', 'Self_Employed', 'Credit_History',
       'Property_Area', 'Loan_Amount_Term'],
      dtype='object')


In [7]:
trainer = TrainModel(train)

In [8]:
trainer.optimize()

[I 2025-03-09 23:39:53,117] A new study created in memory with name: no-name-1d7b32cb-5117-4f6b-adc5-c9509a3d114d
[I 2025-03-09 23:39:54,932] Trial 0 finished with value: 0.8332500482899693 and parameters: {'n_estimators': 265, 'max_depth': 32, 'min_samples_split': 3, 'min_samples_leaf': 7}. Best is trial 0 with value: 0.8332500482899693.
[I 2025-03-09 23:39:55,485] Trial 1 finished with value: 0.8269057029902125 and parameters: {'n_estimators': 80, 'max_depth': 26, 'min_samples_split': 15, 'min_samples_leaf': 7}. Best is trial 0 with value: 0.8332500482899693.
[I 2025-03-09 23:39:56,387] Trial 2 finished with value: 0.8280679058349975 and parameters: {'n_estimators': 130, 'max_depth': 47, 'min_samples_split': 19, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.8332500482899693.
[I 2025-03-09 23:39:57,339] Trial 3 finished with value: 0.8372001298054963 and parameters: {'n_estimators': 138, 'max_depth': 38, 'min_samples_split': 3, 'min_samples_leaf': 4}. Best is trial 3 with valu

In [9]:
model = trainer.train()

In [10]:
trainer.save_model(model)

Model saved to ../models/model.pkl


In [11]:
evaluation = Evaluation(model_path='../models/model.pkl', test_data=test, output_path='../metrics/metrics.csv')

In [12]:
evaluation.evaluate()