In [3]:
## Import of data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Load the dataset
loan = pd.read_csv('loan.csv')
loan.head()

## Feature Engineering
# 1. Debt-to-Income Ratio (DTI)
loan['Debt_to_Income_Ratio'] = loan['loan_amnt'] / loan['person_income']

# 2. Employment Stability Score
loan['Employment_Stability'] = loan['person_emp_exp'] / loan['person_age']

# 3. Credit Score Category (Binned Feature)
def categorize_credit_score(score):
    if score >= 750:
        return 3  # Excellent
    elif 650 <= score < 750:
        return 2  # Good
    elif 550 <= score < 650:
        return 1  # Fair
    else:
        return 0  # Poor

loan['Credit_Score_Category'] = loan['credit_score'].apply(categorize_credit_score)

# 4. Loan Affordability Index
loan['Loan_Affordability_Index'] = loan['person_income'] / (loan['loan_amnt'] * (1 + loan['loan_int_rate']))

# 5. Loan Intent Risk Score (Categorical Encoding)
loan_intent_mapping = {
    "home_improvement": 0.2,
    "education": 0.3,
    "medical": 0.5,
    "personal": 0.6,
    "business": 0.8
}

loan['Loan_Intent_Risk'] = loan['loan_intent'].map(loan_intent_mapping).fillna(0.5)  # Default risk if category is unknown

# Display the first few rows to check the new features
print(loan.head())


   person_age person_gender person_education  person_income  person_emp_exp  \
0        22.0        female           Master        71948.0               0   
1        21.0        female      High School        12282.0               0   
2        25.0        female      High School        12438.0               3   
3        23.0        female         Bachelor        79753.0               0   
4        24.0          male           Master        66135.0               1   

  person_home_ownership  loan_amnt loan_intent  loan_int_rate  \
0                  RENT    35000.0    PERSONAL          16.02   
1                   OWN     1000.0   EDUCATION          11.14   
2              MORTGAGE     5500.0     MEDICAL          12.87   
3                  RENT    35000.0     MEDICAL          15.23   
4                  RENT    35000.0     MEDICAL          14.27   

   loan_percent_income  cb_person_cred_hist_length  credit_score  \
0                 0.49                         3.0           561  