In [33]:
import pandas as pd
import numpy as np

# Give full path (make sure to include .xlsx at the end)
file_path = r"C:\Users\rajat\OneDrive\Desktop\Inputs scorecard\Customer_Final_Scores.xlsx"

# Load into dataframe
df = pd.read_excel(file_path)

# See first 5 rows
print(df.head())

# See column names
print(df.columns)


   Behavior_Spending_Score  Income_INR  Total_Credit_Limit  \
0                       73     2309818             1996830   
1                       26     2431785             2664150   
2                       66      488998             1618560   
3                       73     2043400              515757   
4                       73     1279111             2655214   

   Loan_Tenure_Months  Months_Since_Most_Recent_Delinquency  \
0                  48                                    56   
1                  24                                     6   
2                  72                                    21   
3                  24                                    54   
4                  24                                    32   

   Credit_Utilization_Ratio  Credit_Card_Utilization  Employment_Years  \
0                      0.15                     0.49                 0   
1                      0.53                     0.41                 8   
2                      0.2

In [34]:
# Default_y = 0 → Non-Default (Good customer)
# Default_y = 1 → Default (Bad customer)
goods = (df["Default_y"] == 0).sum()
bads = (df["Default_y"] == 1).sum()

print(f"Goods: {goods}, Bads: {bads}")

Goods: 70, Bads: 30


In [35]:
# STEP 3: Calculate Base Odds
# ------------------------------------------------
# Odds = Goods : Bads ratio
BaseOdds = goods / bads
print(f"Base Odds = {BaseOdds:.2f}:1")

Base Odds = 2.33:1


In [36]:
# STEP 4: Choose PDO and Base Score
# ------------------------------------------------
PDO = 20         # Points to Double Odds (business decision)
BaseScore = 600  # Reference score point

In [38]:
# STEP 5: Calculate Factor and Offset
# ------------------------------------------------
# Formula:
#   Factor = PDO / ln(2)
#   Offset = BaseScore - Factor * ln(BaseOdds)
import math

#Factor → “how much score moves when risk changes”
#Offset → “starting point” to make sure your reference borrower gets the score you want
Factor = PDO / math.log(2)          # 0.693
Offset = BaseScore - Factor * math.log(BaseOdds)       # ≈ 0.847

print(f"Factor = {Factor:.2f}, Offset = {Offset:.2f}")

Factor = 28.85, Offset = 575.55


In [40]:
# STEP 6: Create deciles on Final_Score
# ------------------------------------------------
# Splits data into 10 buckets from lowest score to highest score
df["score_decile"] = pd.qcut(df["Final_Score"], 10, labels=False, duplicates="drop")

# Count how many customers in each decile
print(df["score_decile"].value_counts().sort_index())

# Show first 10 rows
print(df[["Final_Score", "score_decile"]].head(10))


score_decile
0    10
1    10
2    11
3     9
4    10
5    10
6    10
7    10
8    10
9    10
Name: count, dtype: int64
   Final_Score  score_decile
0          218             0
1          836             7
2          487             2
3          722             4
4          888             8
5          508             2
6          255             0
7          900             8
8          778             6
9          704             4


In [41]:
# STEP 7: Calculate stats per decile
# ------------------------------------------------
# For each decile: total count, bads, goods, avg Final_Score
decile_stats = df.groupby("score_decile").agg(
    total=("Final_Score","size"),                     # Count of customers in the decile
    bads=("Default_y","sum"),                         # Sum of Default_y (1 = default). Gives # of bads
    goods=("Default_y", lambda x: (x==0).sum()),     # Count of non-defaults
    avg_score=("Final_Score","mean")                # Average Final_Score in the decile
).reset_index()

print(decile_stats)


   score_decile  total  bads  goods   avg_score
0             0     10     8      2  219.300000
1             1     10     8      2  410.900000
2             2     11     7      4  524.636364
3             3      9     2      7  617.666667
4             4     10     1      9  694.700000
5             5     10     2      8  749.900000
6             6     10     1      9  801.900000
7             7     10     0     10  843.500000
8             8     10     0     10  880.300000
9             9     10     1      9  960.900000


In [42]:
# STEP 8: Calculate bad rate and odds 
# ------------------------------------------------
decile_stats["p_bad"] = decile_stats["bads"] / decile_stats["total"]       # p_bad = proportion of bads in that decile
decile_stats["odds"] = (1 - decile_stats["p_bad"]) / decile_stats["p_bad"] # Odds = ratio of goods to bads

print(decile_stats[["score_decile","bads","total","p_bad","odds"]])

   score_decile  bads  total     p_bad      odds
0             0     8     10  0.800000  0.250000
1             1     8     10  0.800000  0.250000
2             2     7     11  0.636364  0.571429
3             3     2      9  0.222222  3.500000
4             4     1     10  0.100000  9.000000
5             5     2     10  0.200000  4.000000
6             6     1     10  0.100000  9.000000
7             7     0     10  0.000000       inf
8             8     0     10  0.000000       inf
9             9     1     10  0.100000  9.000000


In [43]:
#decile_stats["odds"] = decile_stats["odds"].replace([np.inf], 10)
# STEP 9: Convert odds into calibrated scores
# ------------------------------------------------
# Formula:
#   Score = Offset + Factor * ln(Odds)
decile_stats["calibrated_score"] = decile_stats["odds"].apply(                    # lambda o: - defines an inline fun
    lambda o: round(Offset + Factor * math.log(o),2) if o > 0 else None
)

print(decile_stats[["score_decile", "odds", "calibrated_score"]])


   score_decile      odds  calibrated_score
0             0  0.250000            535.55
1             1  0.250000            535.55
2             2  0.571429            559.41
3             3  3.500000            611.70
4             4  9.000000            638.95
5             5  4.000000            615.55
6             6  9.000000            638.95
7             7       inf               inf
8             8       inf               inf
9             9  9.000000            638.95


In [44]:
# STEP 10: Save results into Excel
# ------------------------------------------------
output_file = r"C:\Users\rajat\OneDrive\Desktop\Inputs scorecard\Customer_Final_Scores_with_Calibration.xlsx"
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
    df.to_excel(writer, sheet_name="RawData", index=False)
    decile_stats.to_excel(writer, sheet_name="DecileCalibration", index=False)

print(f"Calibration completed. Results saved to {output_file}")

Calibration completed. Results saved to C:\Users\rajat\OneDrive\Desktop\Inputs scorecard\Customer_Final_Scores_with_Calibration.xlsx
