In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.impute import SimpleImputer

# Load the dataset
data = pd.read_csv('loan_data.csv')

# Separate features and target
X = data.drop('loan_status', axis=1)
y = data['loan_status']

# Preprocessing for numerical and categorical features
numeric_features = ['person_age', 'person_income', 'person_emp_exp', 'loan_amnt', 'loan_int_rate', 
                    'loan_percent_income', 'cb_person_cred_hist_length', 'credit_score']
categorical_features = ['person_gender', 'person_education', 'person_home_ownership', 
                        'loan_intent', 'previous_loan_defaults_on_file']

# Pipeline for numerical features: imputation and scaling
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

# Pipeline for categorical features: imputation and one-hot encoding
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

# Combine transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

# Create a pipeline with preprocessor and DecisionTreeClassifier
clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', DecisionTreeClassifier(max_depth=5, min_samples_split=10, 
                                          criterion='gini', random_state=42))
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9106666666666666

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.97      0.94      6990
           1       0.86      0.72      0.78      2010

    accuracy                           0.91      9000
   macro avg       0.89      0.84      0.86      9000
weighted avg       0.91      0.91      0.91      9000


Confusion Matrix:
 [[6755  235]
 [ 569 1441]]


In [5]:
import joblib

# Save the trained model to a file
joblib.dump(clf, 'loan_decision_tree_model.joblib')


['loan_decision_tree_model.joblib']

In [7]:
import joblib
import numpy as np
import pandas as pd

# Load the saved model
clf = joblib.load('loan_decision_tree_model.joblib')

# Define the list of input features and their options
features_options = {
    'person_age': "Enter your age (numeric value): ",
    'person_gender': "Select your gender (options: female, male): ",
    'person_education': "Select your education level (options: Master, Bachelor, High School, Associate): ",
    'person_income': "Enter your annual income (numeric value): ",
    'person_emp_exp': "Enter your years of employment experience (numeric value): ",
    'person_home_ownership': "Select your home ownership status (options: RENT, OWN, MORTGAGE): ",
    'loan_amnt': "Enter the loan amount you are requesting (numeric value): ",
    'loan_intent': "Select your loan intent (options: PERSONAL, EDUCATION, MEDICAL, VENTURE): ",
    'loan_int_rate': "Enter the loan interest rate (numeric value): ",
    'loan_percent_income': "Enter the loan percent of your income (numeric value): ",
    'cb_person_cred_hist_length': "Enter your credit history length in months (numeric value): ",
    'credit_score': "Enter your credit score (numeric value): ",
    'previous_loan_defaults_on_file': "Select if you have previous loan defaults (options: Yes, No): "
}

# Initialize an empty dictionary to hold user input
user_data = {}

# Collect user input for each feature
print("Please provide the following information. Type 'skip' if you prefer not to answer.")

for feature, prompt in features_options.items():
    user_input = input(prompt)
    if user_input.lower() == 'skip':
        user_data[feature] = np.nan  # Use NaN to indicate missing values
    else:
        # Validate input for categorical features
        if feature == 'person_gender' and user_input not in ['female', 'male']:
            print("Invalid input! Please enter 'female' or 'male'.")
            continue
        elif feature == 'person_education' and user_input not in ['Master', 'Bachelor', 'High School', 'Associate']:
            print("Invalid input! Please enter 'Master', 'Bachelor', 'High School', or 'Associate'.")
            continue
        elif feature == 'person_home_ownership' and user_input not in ['RENT', 'OWN', 'MORTGAGE']:
            print("Invalid input! Please enter 'RENT', 'OWN', or 'MORTGAGE'.")
            continue
        elif feature == 'loan_intent' and user_input not in ['PERSONAL', 'EDUCATION', 'MEDICAL', 'VENTURE']:
            print("Invalid input! Please enter 'PERSONAL', 'EDUCATION', 'MEDICAL', or 'VENTURE'.")
            continue
        elif feature == 'previous_loan_defaults_on_file' and user_input not in ['Yes', 'No']:
            print("Invalid input! Please enter 'Yes' or 'No'.")
            continue
        
        # Attempt to convert numeric values to the appropriate type
        if feature in ['person_age', 'person_income', 'person_emp_exp', 'loan_amnt', 
                       'loan_int_rate', 'loan_percent_income', 'cb_person_cred_hist_length', 
                       'credit_score']:
            try:
                user_data[feature] = float(user_input)
            except ValueError:
                print(f"Invalid numeric input for {feature}. Please enter a numeric value.")
                continue
        else:
            user_data[feature] = user_input

# Convert the user data to a DataFrame for compatibility with the model
user_df = pd.DataFrame([user_data])

# Make a prediction
prediction = clf.predict(user_df)[0]

# Output the prediction
if prediction == 1:
    print("The model suggests that this user should be granted a loan.")
else:
    print("The model suggests that this user should not be granted a loan.")


Please provide the following information. Type 'skip' if you prefer not to answer.
The model suggests that this user should be granted a loan.
