<a href="https://colab.research.google.com/github/rohanpagadala/Smart-Loan-Recovery-System-with-Machine-Learning/blob/main/Smart_Loan_Recovery_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
df = pd.read_csv("/content/loan_recovery.csv")
df.describe()

Unnamed: 0,Age,Monthly_Income,Num_Dependents,Loan_Amount,Loan_Tenure,Interest_Rate,Collateral_Value,Outstanding_Loan_Amount,Monthly_EMI,Num_Missed_Payments,Days_Past_Due,Collection_Attempts
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,43.116,134829.92,1.476,1024907.0,46.104,11.19282,603224.0,562726.0,15861.53602,1.912,70.678,3.0
std,12.733217,68969.356746,1.145447,590755.6,18.23706,3.775209,745713.1,472358.1,18709.231315,2.110252,60.211038,2.807805
min,21.0,15207.0,0.0,54138.0,12.0,5.02,0.0,15712.83,261.88,0.0,0.0,0.0
25%,32.0,76343.25,0.0,462984.8,36.0,7.9075,0.0,182207.2,4039.0975,1.0,4.0,1.0
50%,44.0,134929.5,1.0,997124.0,48.0,10.915,232768.4,413324.0,9330.17,2.0,66.5,2.0
75%,53.0,193086.25,3.0,1557952.0,60.0,14.5775,1111106.0,832478.7,20439.485,3.0,122.25,4.0
max,64.0,249746.0,3.0,1995325.0,72.0,17.97,2744395.0,1932396.0,127849.23,12.0,180.0,10.0


In [4]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

features = ['Age', 'Monthly_Income', 'Loan_Amount', 'Loan_Tenure', 'Interest_Rate',
            'Collateral_Value', 'Outstanding_Loan_Amount', 'Monthly_EMI',
            'Num_Missed_Payments', 'Days_Past_Due']

scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[features])

kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
df['Borrower_Segment'] = kmeans.fit_predict(df_scaled)

In [5]:
df['Segment_Name'] = df['Borrower_Segment'].map({
    0: 'Moderate Income, High Loan Burden',
    1: 'High Income, Low Default Risk',
    2: 'Moderate Income, Medium Risk',
    3: 'High Loan, Higher Default Risk'
})

In [6]:
df['High_Risk_Flag'] = df['Segment_Name'].apply(
    lambda x: 1 if x in ['High Loan, Higher Default Risk', 'Moderate Income, High Loan Burden'] else 0
)

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

X = df[features]
y = df['High_Risk_Flag']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
risk_scores = rf_model.predict_proba(X_test)[:, 1]

In [8]:
df_test = X_test.copy()
df_test['Risk_Score'] = risk_scores
df_test['Predicted_High_Risk'] = (df_test['Risk_Score'] > 0.5).astype(int)

df_test = df_test.merge(
    df[['Borrower_ID', 'Segment_Name', 'Recovery_Status', 'Collection_Method', 'Collection_Attempts', 'Legal_Action_Taken']],
    left_index=True, right_index=True
)

In [9]:
def assign_recovery_strategy(risk_score):
    if risk_score > 0.75:
        return "Immediate legal notices & aggressive recovery attempts"
    elif 0.50 <= risk_score <= 0.75:
        return "Settlement offers & repayment plans"
    else:
        return "Automated reminders & monitoring"

df_test['Recovery_Strategy'] = df_test['Risk_Score'].apply(assign_recovery_strategy)

In [11]:
df_test[['Borrower_ID', 'Risk_Score', 'Predicted_High_Risk',
         'Segment_Name', 'Recovery_Status', 'Recovery_Strategy']].head(10)

Unnamed: 0,Borrower_ID,Risk_Score,Predicted_High_Risk,Segment_Name,Recovery_Status,Recovery_Strategy
436,BRW_437,0.42,0,"High Loan, Higher Default Risk",Partially Recovered,Automated reminders & monitoring
448,BRW_449,0.64,1,"Moderate Income, High Loan Burden",Fully Recovered,Settlement offers & repayment plans
88,BRW_89,0.76,1,"High Loan, Higher Default Risk",Fully Recovered,Immediate legal notices & aggressive recovery ...
145,BRW_146,0.32,0,"High Income, Low Default Risk",Fully Recovered,Automated reminders & monitoring
344,BRW_345,0.09,0,"High Income, Low Default Risk",Fully Recovered,Automated reminders & monitoring
239,BRW_240,0.97,1,"Moderate Income, High Loan Burden",Partially Recovered,Immediate legal notices & aggressive recovery ...
279,BRW_280,0.12,0,"High Income, Low Default Risk",Partially Recovered,Automated reminders & monitoring
445,BRW_446,0.91,1,"High Loan, Higher Default Risk",Fully Recovered,Immediate legal notices & aggressive recovery ...
320,BRW_321,1.0,1,"Moderate Income, High Loan Burden",Partially Recovered,Immediate legal notices & aggressive recovery ...
121,BRW_122,0.63,1,"Moderate Income, High Loan Burden",Fully Recovered,Settlement offers & repayment plans


In [12]:
import plotly.express as px

fig = px.scatter(df, x='Monthly_Income', y='Loan_Amount',
                 color='Segment_Name', size='Loan_Amount',
                 title='Borrower Segments by Monthly Income and Loan Amount',
                 labels={'Monthly_Income': 'Income ($)', 'Loan_Amount': 'Loan ($)'},
                 color_discrete_sequence=px.colors.qualitative.Vivid)

fig.show()