# Credit Risk Modeling and Decision Framework for Consumer Lending

Objective:
Build a credit risk model to predict probability of default for a digital lending platform and use it for underwriting decisions.

In [None]:
import pandas as pd
import numpy as np

print("Environment ready")


Environment ready


In [None]:
import pandas as pd

columns = [
    "status", "duration", "credit_history", "purpose", "credit_amount",
    "savings", "employment", "installment_rate", "personal_status",
    "other_debtors", "residence_since", "property", "age",
    "other_installments", "housing", "existing_credits",
    "job", "num_dependents", "telephone", "foreign_worker", "target"
]

df = pd.read_csv(
    "../data/german_credit.data",
    sep=" ",
    names=columns
)

df.head()


Unnamed: 0,status,duration,credit_history,purpose,credit_amount,savings,employment,installment_rate,personal_status,other_debtors,...,property,age,other_installments,housing,existing_credits,job,num_dependents,telephone,foreign_worker,target
0,A11,6,A34,A43,1169,A65,A75,4,A93,A101,...,A121,67,A143,A152,2,A173,1,A192,A201,1
1,A12,48,A32,A43,5951,A61,A73,2,A92,A101,...,A121,22,A143,A152,1,A173,1,A191,A201,2
2,A14,12,A34,A46,2096,A61,A74,2,A93,A101,...,A121,49,A143,A152,1,A172,2,A191,A201,1
3,A11,42,A32,A42,7882,A61,A74,2,A93,A103,...,A122,45,A143,A153,1,A173,2,A191,A201,1
4,A11,24,A33,A40,4870,A61,A73,3,A93,A101,...,A124,53,A143,A153,2,A173,2,A191,A201,2


In [None]:
df["default"] = (df["target"] == 2).astype(int)

df["default"].value_counts(normalize=True)


default
0    0.7
1    0.3
Name: proportion, dtype: float64

Target Definition:
A customer is labeled as default if the credit outcome is bad (target = 2), which is consistent with standard credit risk modeling practice.


In [None]:
df.shape


(1000, 22)

In [None]:
df.isnull().sum()


status                0
duration              0
credit_history        0
purpose               0
credit_amount         0
savings               0
employment            0
installment_rate      0
personal_status       0
other_debtors         0
residence_since       0
property              0
age                   0
other_installments    0
housing               0
existing_credits      0
job                   0
num_dependents        0
telephone             0
foreign_worker        0
target                0
default               0
dtype: int64

In [None]:
features = [
    "duration",
    "credit_amount",
    "installment_rate",
    "age",
    "existing_credits"
]

X = df[features]
y = df["default"]

X.head()


Unnamed: 0,duration,credit_amount,installment_rate,age,existing_credits
0,6,1169,4,67,2
1,48,5951,2,22,1
2,12,2096,2,49,1
3,42,7882,2,45,1
4,24,4870,3,53,2


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

preds = model.predict_proba(X_test)[:, 1]

roc_auc_score(y_test, preds)


0.6830952380952381

Model Choice:
Logistic regression was selected for its interpretability and stability, which are critical requirements in lending and credit risk decisioning.


In [None]:
import pandas as pd

feature_importance = pd.DataFrame({
    "feature": features,
    "coefficient": model.coef_[0]
}).sort_values(by="coefficient", ascending=False)

feature_importance


Unnamed: 0,feature,coefficient
2,installment_rate,0.184851
0,duration,0.028195
1,credit_amount,5.2e-05
3,age,-0.012359
4,existing_credits,-0.19229


In [None]:
df_test = X_test.copy()
df_test["default"] = y_test.values
df_test["risk_score"] = preds

df_test.head()


Unnamed: 0,duration,credit_amount,installment_rate,age,existing_credits,default,risk_score
30,18,1913,3,36,1,0,0.277379
128,12,1860,4,34,2,0,0.247472
289,24,1024,4,48,1,1,0.310462
216,18,3104,3,31,1,0,0.302832
966,27,2520,4,23,2,1,0.373088


In [None]:
df_test["risk_band"] = pd.qcut(
    df_test["risk_score"],
    q=5,
    labels=["Very Low", "Low", "Medium", "High", "Very High"]
)

df_test["risk_band"].value_counts()


risk_band
Very Low     40
Low          40
Medium       40
High         40
Very High    40
Name: count, dtype: int64

In [None]:
df_test.groupby("risk_band")["default"].mean()


  df_test.groupby("risk_band")["default"].mean()


risk_band
Very Low     0.150
Low          0.225
Medium       0.250
High         0.350
Very High    0.525
Name: default, dtype: float64

Risk Band Analysis:
Customers are segmented into risk bands based on predicted default probability. 
Higher risk bands show significantly higher observed default rates, validating the model's ranking ability.


In [None]:
def underwriting_decision(risk_band):
    if risk_band in ["Very Low", "Low"]:
        return "Approve"
    elif risk_band == "Medium":
        return "Approve with Conditions"
    else:
        return "Reject"

df_test["decision"] = df_test["risk_band"].apply(underwriting_decision)

df_test["decision"].value_counts()


decision
Approve                    80
Reject                     80
Approve with Conditions    40
Name: count, dtype: int64

Decision Logic:
The underwriting model informs a policy-driven decision engine, enabling risk-based approvals rather than binary accept or reject outcomes.

Early Warning Objective:
Identify customers who are likely to default in the near term after loan disbursal, enabling proactive collections and customer intervention.

In [None]:
df_test["high_installment_burden"] = (df_test["installment_rate"] >= 3).astype(int)
df_test["low_age_risk"] = (df_test["age"] < 30).astype(int)
df_test["high_credit_amount"] = (df_test["credit_amount"] > df_test["credit_amount"].median()).astype(int)

df_test.head()


Unnamed: 0,duration,credit_amount,installment_rate,age,existing_credits,default,risk_score,risk_band,decision,high_installment_burden,low_age_risk,high_credit_amount
30,18,1913,3,36,1,0,0.277379,Medium,Approve with Conditions,1,0,0
128,12,1860,4,34,2,0,0.247472,Low,Approve,1,0,0
289,24,1024,4,48,1,1,0.310462,High,Reject,1,0,0
216,18,3104,3,31,1,0,0.302832,Medium,Approve with Conditions,1,0,1
966,27,2520,4,23,2,1,0.373088,High,Reject,1,1,1


In [None]:
df_test["early_warning_score"] = (
    df_test["high_installment_burden"] +
    df_test["low_age_risk"] +
    df_test["high_credit_amount"]
)

df_test["early_warning_score"].value_counts()


1    83
2    78
3    25
0    14
Name: count, dtype: int64

In [None]:
def early_warning_action(score):
    if score >= 2:
        return "Trigger Collection Alert"
    elif score == 1:
        return "Send Reminder"
    else:
        return "No Action"

df_test["early_warning_action"] = df_test["early_warning_score"].apply(early_warning_action)

df_test["early_warning_action"].value_counts()


Trigger Collection Alert    103
Send Reminder                83
No Action                    14
Name: count, dtype: int64

Early Warning Logic:
Post-disbursal risk signals are monitored continuously to identify financial stress before actual default occurs, allowing timely interventions that reduce overall credit loss.

Model Monitoring:
Key metrics monitored include default rate by risk band, score distribution stability, and feature drift over time to ensure model reliability in production.

In [None]:
df_test["risk_score"].describe()


count    200.000000
mean       0.312445
std        0.118312
min        0.110695
25%        0.233708
50%        0.284930
75%        0.375329
max        0.672541
Name: risk_score, dtype: float64

# Conclusion
This project demonstrates an end to end credit risk system covering underwriting, risk based decisioning, early warning signals, and monitoring considerations for a digital lending platform.
