The likelihood-based dynamic programming approach is theoretically optimal but computationally expensive. For practical scalability and robustness, I used quantile-based discretization, which is widely adopted in retail credit modeling and provides stable, interpretable risk buckets

In [3]:
df = pd.read_csv("D:\\code\\Quantitative research\\Task 3 and 4_Loan_Data.csv")
df.head()

Unnamed: 0,customer_id,credit_lines_outstanding,loan_amt_outstanding,total_debt_outstanding,income,years_employed,fico_score,default
0,8153374,0,5221.545193,3915.471226,78039.38546,5,605,0
1,7442532,5,1958.928726,8228.75252,26648.43525,2,572,1
2,2256073,0,3363.009259,2027.83085,65866.71246,4,602,0
3,4885975,0,4766.648001,2501.730397,74356.88347,5,612,0
4,4700614,1,1345.827718,1768.826187,23448.32631,6,631,0


In [6]:
def fico_quantization_fast(fico_scores, defaults, num_buckets):
    df = pd.DataFrame({
        "fico": fico_scores,
        "default": defaults
    })

    # Create quantile buckets
    df["bucket"] = pd.qcut(df["fico"], q=num_buckets, duplicates="drop")

    # Compute PD per bucket
    bucket_stats = (
        df.groupby("bucket")
          .agg(
              min_fico=("fico", "min"),
              max_fico=("fico", "max"),
              count=("default", "count"),
              defaults=("default", "sum")
          )
          .reset_index(drop=True)
    )

    bucket_stats["PD"] = bucket_stats["defaults"] / bucket_stats["count"]

    return bucket_stats


In [7]:
bucket_map = fico_quantization_fast(
    fico_scores=df["fico_score"],
    defaults=df["default"],
    num_buckets=5
)

print(bucket_map)


   min_fico  max_fico  count  defaults        PD
0       408       587   2050       817  0.398537
1       588       623   1971       425  0.215627
2       624       653   1989       301  0.151332
3       654       688   1997       200  0.100150
4       689       850   1993       108  0.054190


  df.groupby("bucket")


In [8]:
bucket_map["Rating"] = range(len(bucket_map), 0, -1)
bucket_map


Unnamed: 0,min_fico,max_fico,count,defaults,PD,Rating
0,408,587,2050,817,0.398537,5
1,588,623,1971,425,0.215627,4
2,624,653,1989,301,0.151332,3
3,654,688,1997,200,0.10015,2
4,689,850,1993,108,0.05419,1
