# Task 4: Bucket FICO Scores

Optimal quantization of FICO scores using log-likelihood maximization.

## Load Data

In [None]:

import pandas as pd
import numpy as np

df = pd.read_csv("/mnt/data/Task 3 and 4_Loan_Data.csv")
fico = df['fico_score']
default = df['default']

df.head()


## Bucket Log-Likelihood Function

In [None]:

def bucket_log_likelihood(k, n):
    if k == 0 or k == n:
        return 0
    p = k / n
    return k * np.log(p) + (n - k) * np.log(1 - p)


## Optimal Bucketing via Dynamic Programming

In [None]:

def optimal_fico_buckets(fico, default, num_buckets):
    data = pd.DataFrame({'fico': fico, 'default': default})
    data = data.sort_values('fico').reset_index(drop=True)

    n = len(data)
    dp = np.full((n, num_buckets), -np.inf)
    split = np.zeros((n, num_buckets), dtype=int)

    for i in range(n):
        k = data.loc[:i, 'default'].sum()
        dp[i, 0] = bucket_log_likelihood(k, i + 1)

    for b in range(1, num_buckets):
        for i in range(b, n):
            for j in range(b - 1, i):
                k = data.loc[j+1:i, 'default'].sum()
                ll = dp[j, b-1] + bucket_log_likelihood(k, i - j)
                if ll > dp[i, b]:
                    dp[i, b] = ll
                    split[i, b] = j

    boundaries = []
    i = n - 1
    for b in reversed(range(num_buckets)):
        j = split[i, b]
        boundaries.append(data.loc[j+1, 'fico'] if b > 0 else data.loc[0, 'fico'])
        i = j

    return sorted(boundaries)


## Generate FICO Rating Buckets

In [None]:

NUM_BUCKETS = 5
boundaries = optimal_fico_buckets(fico, default, NUM_BUCKETS)

for i, b in enumerate(boundaries):
    print(f"Rating {i+1}: FICO â‰¥ {int(b)}")


## FICO to Rating Mapping Function

In [None]:

def fico_to_rating(fico_score, boundaries):
    for i, b in enumerate(boundaries):
        if fico_score < b:
            return i + 1
    return len(boundaries) + 1

# Example
fico_to_rating(720, boundaries)
