In [7]:

import pandas as pd
import pickle
# Load the dataset
data_path = 'C:/Users/jvr/Desktop/project/loan_data.csv'
loan_data = pd.read_csv(data_path)


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Selecting features and target
X = loan_data.drop(columns=['Loan ID', 'Verdict'])
y = loan_data['Verdict']

# Identifying categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object', 'bool']).columns
numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns

# Creating transformers for numerical and categorical data
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Creating a preprocessor with ColumnTransformer to apply transformations
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Creating a pipeline with preprocessor and a gradient boosting classifier
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', GradientBoostingClassifier(random_state=0))
])

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Training the model
model.fit(X_train, y_train)

# Predicting and evaluating the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

accuracy, report

(0.995575221238938,
 '              precision    recall  f1-score   support\n\n          No       1.00      0.99      0.99       805\n         Yes       1.00      1.00      1.00      1229\n\n    accuracy                           1.00      2034\n   macro avg       1.00      1.00      1.00      2034\nweighted avg       1.00      1.00      1.00      2034\n')