In [27]:
import pandas as pd
import joblib
import json
import warnings
warnings.filterwarnings("ignore") 

In [None]:
# Configuration
loc = "Your folder path"

# eg . /Users/yourname/Documents/VSCode/End-End Logistic Regression- Loan Approval Classification/

## Load New Dataset


In [None]:
data_new = pd.read_csv(loc + r"raw/loan_data_for_prediction.csv")

## Load saved features, model, Box-Cox Lambda Values and Feature Scaler

In [30]:
# Load model
model = joblib.load("final_model_lr.joblib")

# Load feature scaler
scaler = joblib.load("scaler.joblib")

# Load feature list
with open("feature_list.json", "r") as f:
    feature_list = json.load(f)

## Data Prepration

In [31]:
# Convert categorical variables to numerical using OneHotEncoding and mapping
data = data_new.copy()
data = pd.get_dummies(data, columns=['person_gender', 'previous_loan_defaults_on_file', 'person_home_ownership', 'loan_intent'], drop_first=True)

#Use mapping for education variable
education_mapping = {'High School': 0, 'Associate': 1, 'Bachelor': 2, 'Master': 3, 'Doctorate': 4}

data['person_education'] = data['person_education'].map(education_mapping)
data = data.astype(float)

In [32]:
# Ensure all expected columns exist
for col in feature_list:
    if col not in data.columns:
        data[col] = 0  # Assign 0 for missing one-hot encoded categories

In [33]:
# Keeping only the selected features
data = data[feature_list]

In [34]:
# Apply feature scaling
data_scaled = scaler.transform(data)

## Make Predictions on new dataset

In [35]:
# Predict
loan_approval_prediction = model.predict(data_scaled)

In [36]:
# add prediction to main df
data_new["loan_status"] = loan_approval_prediction


In [37]:
data_new

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,23.0,male,Associate,24013.0,0,MORTGAGE,3000.0,MEDICAL,11.01,0.12,4.0,604,No,1.0
1,32.0,male,Associate,104085.0,8,MORTGAGE,4875.0,VENTURE,10.43,0.05,8.0,679,Yes,0.0
2,23.0,male,Associate,46149.0,0,RENT,8000.0,EDUCATION,14.18,0.17,2.0,468,Yes,0.0
3,49.0,male,Doctorate,115237.0,28,MORTGAGE,7500.0,DEBTCONSOLIDATION,6.03,0.07,13.0,581,No,0.0
4,32.0,male,Bachelor,59983.0,7,RENT,3500.0,HOMEIMPROVEMENT,10.65,0.06,6.0,650,No,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,29.0,male,High School,45479.0,4,RENT,12000.0,HOMEIMPROVEMENT,9.63,0.26,7.0,656,No,1.0
19996,31.0,female,Associate,68272.0,10,MORTGAGE,6049.0,MEDICAL,12.33,0.09,9.0,563,No,0.0
19997,34.0,male,Associate,41675.0,12,RENT,8000.0,PERSONAL,7.90,0.19,7.0,618,Yes,0.0
19998,28.0,male,Master,96968.0,5,MORTGAGE,16000.0,PERSONAL,5.99,0.17,7.0,651,Yes,0.0


## Final Thoughts on Model Usage in Production

This prediction step demonstrates **how the logistic regression model we built earlier can be deployed in a real-world banking environment** to streamline the loan approval process.

### How the Model Helps:
- **Automated Decision-Making**: By predicting loan approval likelihood for new applicants, banks can make **instant decisions** on straightforward cases.
- **Faster Processing**: Reduces turnaround time significantly, leading to a **better customer experience**.
- **Resource Optimization**: Allows **loan officers to focus on edge cases or high-risk applications** that require deeper investigation.
- **Scalability**: Once deployed, the model can handle **thousands of applications per day**, far beyond what manual teams can achieve.

### Human + Machine Collaboration:
- While this model performs well, **certain critical or high-value loan applications** may still require manual verification and expert judgment.
- The ideal approach is a **hybrid system**:  
  - Use the model for **standard, low-risk cases**, and  
  - **Flag ambiguous or high-risk cases** for manual review.
