In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Re-load data and re-process to ensure correct X and y (fix for upstream NaN error)
url = "https://raw.githubusercontent.com/dphi-official/Datasets/master/Loan_Data/loan_train.csv"
data_fixed = pd.read_csv(url)
data_fixed.head()


Unnamed: 0.1,Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,0,LP002305,Female,No,0,Graduate,No,4547,0.0,115.0,360.0,1.0,Semiurban,1
1,1,LP001715,Male,Yes,3+,Not Graduate,Yes,5703,0.0,130.0,360.0,1.0,Rural,1
2,2,LP002086,Female,Yes,0,Graduate,No,4333,2451.0,110.0,360.0,1.0,Urban,0
3,3,LP001136,Male,Yes,0,Not Graduate,Yes,4695,0.0,96.0,,1.0,Urban,1
4,4,LP002529,Male,Yes,2,Graduate,No,6700,1750.0,230.0,300.0,1.0,Semiurban,1


In [19]:
data_fixed = data_fixed.dropna()

# Convert categorical to numeric (correctly, excluding the faulty Loan_Status mapping)
data_fixed['Gender'] = data_fixed['Gender'].map({'Male':1, 'Female':0})
data_fixed['Married'] = data_fixed['Married'].map({'Yes':1, 'No':0})
data_fixed['Education'] = data_fixed['Education'].map({'Graduate':1, 'Not Graduate':0})
data_fixed['Self_Employed'] = data_fixed['Self_Employed'].map({'Yes':1, 'No':0})

# Loan_Status is already numeric (0 or 1) in the original dataset, so no mapping is needed here.
# If it were 'Y'/'N', then data_fixed['Loan_Status'] = data_fixed['Loan_Status'].map({'Y':1, 'N':0}) would be correct.
# But based on initial data.head(), it's already 0/1.

features = [
    'Gender', 'Married', 'Education',
    'ApplicantIncome', 'LoanAmount',
    'Credit_History'
]

X = data_fixed[features]
y = data_fixed['Loan_Status']

# Ensure y has no NaNs after re-creation
if y.isna().any():
    print("Warning: NaN values still present in y after re-processing. Dropping rows with NaN in y.")
    # Drop rows where y is NaN, and align X accordingly
    clean_indices = y.dropna().index
    X = X.loc[clean_indices]
    y = y.loc[clean_indices]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

print("Accuracy:", model.score(X_test, y_test))


Accuracy: 0.7662337662337663


In [20]:
def calculate_risk(input_data):
    prob = model.predict_proba(input_data)[0][1]
    risk = 1 - prob
    return risk

In [21]:
import numpy as np

actions = [-1, 0, 1]  # decrease, same, increase
q_table = np.zeros((50, len(actions)))

def get_state(risk):
    return min(int(risk * 50), 49)

def reward_function(interest, risk):
    default_loss = risk * 100
    profit = interest * (1 - risk)
    return profit - default_loss

In [22]:
for episode in range(500):
    for i in range(len(X_train)):
        sample = X_train.iloc[i:i+1]
        risk = calculate_risk(sample)

        state = get_state(risk)

        action_idx = np.argmax(q_table[state])
        action = actions[action_idx]

        base_rate = 5 + risk * 10
        new_rate = base_rate + action

        reward = reward_function(new_rate, risk)

        q_table[state, action_idx] += 0.1 * (
            reward + 0.9 * np.max(q_table[state]) - q_table[state, action_idx]
        )

In [None]:
def recommend_interest(user_input):
    input_df = pd.DataFrame([user_input])

    risk = calculate_risk(input_df)
    state = get_state(risk)

    action_idx = np.argmax(q_table[state])
    adjustment = actions[action_idx]

    base_rate = 5 + risk * 10
    final_rate = base_rate + adjustment

    return round(final_rate,2), round(risk,2)

In [None]:
user = {
    'Gender':1,
    'Married':1,
    'Education':1,
    'ApplicantIncome':5000,
    'LoanAmount':200,
    'Credit_History':1
}

rate, risk = recommend_interest(user)

print("Risk:", risk)
print("Interest Rate:", rate)

Risk: 0.32
Interest Rate: 7.2


In [30]:
recommend_interest({
    'Gender':1,'Married':1,'Education':1,
    'ApplicantIncome':10000,'LoanAmount':100,'Credit_History':1
})


(np.float64(7.5), np.float64(0.15))