In [13]:
import pandas as pd
import numpy as np

df = pd.read_csv("C:/Users/PMLS/OneDrive/Desktop/Loan Approval Dataset/loan_approval_dataset.csv")
df.head()

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,1,2,Graduate,No,9600000,29900000,12,778,2400000,17600000,22700000,8000000,Approved
1,2,0,Not Graduate,Yes,4100000,12200000,8,417,2700000,2200000,8800000,3300000,Rejected
2,3,3,Graduate,No,9100000,29700000,20,506,7100000,4500000,33300000,12800000,Rejected
3,4,3,Graduate,No,8200000,30700000,8,467,18200000,3300000,23300000,7900000,Rejected
4,5,5,Not Graduate,Yes,9800000,24200000,20,382,12400000,8200000,29400000,5000000,Rejected


In [14]:
df.isnull().sum()

loan_id                      0
 no_of_dependents            0
 education                   0
 self_employed               0
 income_annum                0
 loan_amount                 0
 loan_term                   0
 cibil_score                 0
 residential_assets_value    0
 commercial_assets_value     0
 luxury_assets_value         0
 bank_asset_value            0
 loan_status                 0
dtype: int64

In [15]:
df_encoded = df.copy()

#Encoding categorical features
df_encoded.columns = df_encoded.columns.str.strip().str.lower()

df_encoded['education'] = ( df_encoded['education'].astype(str).str.strip().str.lower().map({'graduate': 1, 'not graduate': 0}))

df_encoded['self_employed'] = (df_encoded['self_employed'].astype(str).str.strip().str.lower().map({'yes': 1, 'no': 0}))

# Sum all assets into one
df_encoded['total_assets_value'] = (
    df_encoded["residential_assets_value"] +
    df_encoded["commercial_assets_value"] +
    df_encoded["luxury_assets_value"] +
    df_encoded["bank_asset_value"]
)

df_encoded['dependents'] = df_encoded['no_of_dependents']
df_encoded['annual_income'] = df_encoded['income_annum']
df_encoded['loan_amount'] = df_encoded['loan_amount']
df_encoded['loan_period'] = df_encoded['loan_term']
df_encoded['credit_score'] = df_encoded['cibil_score']
df_encoded['loan_status'] = df_encoded['loan_status']

features = ['dependents','education','self_employed','annual_income','loan_amount','loan_period','credit_score','total_assets_value','loan_status']

df_model = df_encoded[features]

df_model.head()


Unnamed: 0,dependents,education,self_employed,annual_income,loan_amount,loan_period,credit_score,total_assets_value,loan_status
0,2,1,0,9600000,29900000,12,778,50700000,Approved
1,0,0,1,4100000,12200000,8,417,17000000,Rejected
2,3,1,0,9100000,29700000,20,506,57700000,Rejected
3,3,1,0,8200000,30700000,8,467,52700000,Rejected
4,5,0,1,9800000,24200000,20,382,55000000,Rejected


In [30]:
from sklearn.model_selection import train_test_split

X = df_model.drop('loan_status', axis=1)
y = ( df_model['loan_status'].str.strip().map({'Approved': 1, 'Rejected': 0}))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42
)


In [31]:
from sklearn.linear_model import LogisticRegression

lg = LogisticRegression(class_weight='balanced', solver='liblinear', random_state=42)
lg.fit(X_train, y_train)

y_pred = lg.predict(X_test)


In [32]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Confusion Matrix:
[[158 160]
 [ 95 441]]

Classification Report:
              precision    recall  f1-score   support

           0       0.62      0.50      0.55       318
           1       0.73      0.82      0.78       536

    accuracy                           0.70       854
   macro avg       0.68      0.66      0.66       854
weighted avg       0.69      0.70      0.69       854



In [33]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(
    class_weight='balanced',  
    max_depth=5,              
    min_samples_split=10,     
    random_state=42
)
dt_model.fit(X_train, y_train)

y_pred = dt_model.predict(X_test)
y_pred_proba = dt_model.predict_proba(X_test)[:, 1]


print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[312   6]
 [ 21 515]]

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.98      0.96       318
           1       0.99      0.96      0.97       536

    accuracy                           0.97       854
   macro avg       0.96      0.97      0.97       854
weighted avg       0.97      0.97      0.97       854



In [38]:
import pandas as pd

def predict_loan_status():
    print("Enter loan application details:")
    
    dependents = int(input("Number of dependents: "))
    education = input("Education (Graduate/Not Graduate): ").strip().lower()
    self_employed = input("Self Employed? (Yes/No): ").strip().lower()
    annual_income = float(input("Annual Income: "))
    loan_amount = float(input("Loan Amount: "))
    loan_period = int(input("Loan Period (months): "))
    credit_score = float(input("Credit Score: "))
    total_assets_value = float(input("Total Assets Value: "))
    
    
    education_encoded = 1 if education == "graduate" else 0
    self_employed_encoded = 1 if self_employed == "yes" else 0
    
    input_data = pd.DataFrame([{
        'dependents': dependents,
        'education': education_encoded,
        'self_employed': self_employed_encoded,
        'annual_income': annual_income,
        'loan_amount': loan_amount,
        'loan_period': loan_period,
        'credit_score': credit_score,
        'total_assets_value': total_assets_value
    }])
    
   
    prediction = dt_model.predict(input_data)[0]  # 0 = Rejected, 1 = Approved
    result_label = "Approved" if prediction == 1 else "Rejected"
    
    print(f"\nLoan Status Prediction: {result_label}")


predict_loan_status()


Enter loan application details:


Number of dependents:  3
Education (Graduate/Not Graduate):  Graduate
Self Employed? (Yes/No):  No
Annual Income:  5630000
Loan Amount:  345000
Loan Period (months):  6
Credit Score:  650
Total Assets Value:  3465788



Loan Status Prediction: Approved
