In [1]:
import pandas as pd
import numpy as np

# Step 1: Load the dataset
file_path = 'Task 3 and 4_Loan_Data.csv'
loan_data = pd.read_csv(file_path)

# Step 2: Focus on needed columns
fico_data = loan_data[['fico_score', 'default']].copy()

# Step 3: Number of buckets
n_buckets = 10

# Step 4: Quantile Buckets
# pd.qcut will divide the fico_score into equal-sized groups
fico_data['bucket'], bin_edges = pd.qcut(fico_data['fico_score'], q=n_buckets, retbins=True, labels=False, duplicates='drop')

# Step 5: Rating
# Lower Rating => Better credit score
fico_data['rating'] = n_buckets - fico_data['bucket'] - 1

# Step 6: Generated bucket boundaries and sample data
print("Bucket Boundaries (FICO ranges):")
for i in range(len(bin_edges)-1):
    print(f"Rating {n_buckets - i - 1}: FICO {bin_edges[i]:.2f} to {bin_edges[i+1]:.2f}")

print("\nSample of the rating map:")
print(fico_data.head(10))

# Step 7: Merge full data with new rating
loan_data = loan_data.merge(fico_data[['fico_score', 'rating']], on='fico_score')

# Step 8: Optional — Check average default rate by rating
rating_default_summary = loan_data.groupby('rating')['default'].mean().sort_index()
print("\nAverage default rate by rating:")
print(rating_default_summary)


Bucket Boundaries (FICO ranges):
Rating 9: FICO 408.00 to 560.00
Rating 8: FICO 560.00 to 587.00
Rating 7: FICO 587.00 to 607.00
Rating 6: FICO 607.00 to 623.00
Rating 5: FICO 623.00 to 638.00
Rating 4: FICO 638.00 to 653.00
Rating 3: FICO 653.00 to 670.00
Rating 2: FICO 670.00 to 688.00
Rating 1: FICO 688.00 to 714.00
Rating 0: FICO 714.00 to 850.00

Sample of the rating map:
   fico_score  default  bucket  rating
0         605        0       2       7
1         572        1       1       8
2         602        0       2       7
3         612        0       3       6
4         631        0       4       5
5         697        0       8       1
6         722        0       9       0
7         545        1       0       9
8         676        0       7       2
9         447        0       0       9

Average default rate by rating:
rating
0    0.040273
1    0.070672
2    0.096683
3    0.105404
4    0.127311
5    0.172438
6    0.180895
7    0.246199
8    0.297843
9    0.429086
Name: defau