<h4 style="margin:0; font-size:16px">scorecard & stability</h4>
<ul style="margin-top:0; font-size:15px">
<li>population stability index (PSI)</li>
<li>characteristic stability index (CSI)</li>
</ul>

<h4 style="margin:0; font-size:16px">classification</h4>
<ul style="margin-top:0; font-size:15px">
<li>confusion matrix (crosstab or swapset)</li>
<li>accuracy (or error rate)</li>
<li>precision and recall</li>
<li>balanced accuracy and F1 score</li>
<li>receiver operating characteristic (ROC) and area under curve (AUC)</li>
<li>gini coefficient</li>
<li>kolmogorov-smirnov statistic (KS)</li>
</ul>

<h4 style="margin:0; font-size:16px">goodness-of-fit / model selection</h4>
<ul style="margin-top:0; font-size:15px">
<li>r² and adjusted r² score</li>
<li>log-likelihood</li>
<li>akaike information criterion (AIC) and bayesian information criterion (BIC)</li>
</ul>

<h4 style="margin:0; font-size:16px">regression error</h4>
<ul style="margin-top:0; font-size:15px">
<li>residuals and mean absolute error (MAE)</li>
<li>mean squared error (MSE) and root mean squared error (RMSE)</li>
</ul>

<h4 style="margin:0; font-size:16px">clustering & segmentation</h4>
<ul style="margin-top:0; font-size:15px">
<li>elbow method</li>
<li>silhouette score</li>
</ul>

<h4 style="margin:0; font-size:16px">model explainability</h4>
<ul style="margin-top:0; font-size:15px">
<li>shap</li>
<li>lime</li>
</ul>


In [1]:
import numpy as np
import pandas as pd
df = pd.read_csv('scores.csv')
display(df)

Unnamed: 0,id,score_dev,score_prod
0,1,480,520
1,2,510,505
2,3,620,640
3,4,710,690
4,5,550,560
5,6,720,730
6,7,400,450
7,8,670,600
8,9,580,590
9,10,800,770


In [3]:
# define 5 percentiles in expected (score_dev) population
bins = np.percentile(df['score_dev'], np.linspace(0, 100, 5 + 1))
print(bins)

[400. 504. 568. 640. 712. 800.]


In [68]:
# pd.cut has intervals in (a, b] edges
predicted = pd.cut(df['score_dev'], bins=bins, include_lowest=True)
actual = pd.cut(df['score_prod'], bins=bins, include_lowest=True)

In [69]:
# value_counts return groups in descending order based on their count.
predicted_prop = predicted.value_counts(normalize=True, sort=False)
actual_prop = actual.value_counts(normalize=True, sort=False)
print(predicted_prop)
print(actual_prop)

score_dev
(399.999, 504.0]    0.2
(504.0, 568.0]      0.2
(568.0, 640.0]      0.2
(640.0, 712.0]      0.2
(712.0, 800.0]      0.2
Name: proportion, dtype: float64
score_prod
(399.999, 504.0]    0.1
(504.0, 568.0]      0.3
(568.0, 640.0]      0.3
(640.0, 712.0]      0.1
(712.0, 800.0]      0.2
Name: proportion, dtype: float64


In [70]:
# replace zero with small number to avoid divison by zero in PSI calculation
predicted_prop = predicted_prop.replace(0, 1e-6)
actual_prop = actual_prop.replace(0, 1e-6)
print(predicted_prop)
print(actual_prop)

score_dev
(399.999, 504.0]    0.2
(504.0, 568.0]      0.2
(568.0, 640.0]      0.2
(640.0, 712.0]      0.2
(712.0, 800.0]      0.2
Name: proportion, dtype: float64
score_prod
(399.999, 504.0]    0.1
(504.0, 568.0]      0.3
(568.0, 640.0]      0.3
(640.0, 712.0]      0.1
(712.0, 800.0]      0.2
Name: proportion, dtype: float64


In [76]:
buckets = pd.DataFrame()
buckets['Actual Prop.'] = actual_prop
buckets['Predicted Prop.'] = predicted_prop
buckets = buckets.reset_index(drop=False)
display(buckets)

Unnamed: 0,score_prod,Actual Prop.,Predicted Prop.
0,"(399.999, 504.0]",0.1,0.2
1,"(504.0, 568.0]",0.3,0.2
2,"(568.0, 640.0]",0.3,0.2
3,"(640.0, 712.0]",0.1,0.2
4,"(712.0, 800.0]",0.2,0.2


In [80]:
# non-vectorized solutions
psi = []
for i, j in zip(actual_prop, predicted_prop):
    psi.append((i - j) * np.log(i/j))
buckets['PSI'] = psi
display(buckets)
print(sum(psi))

Unnamed: 0,score_prod,Actual Prop.,Predicted Prop.,PSI
0,"(399.999, 504.0]",0.1,0.2,0.069315
1,"(504.0, 568.0]",0.3,0.2,0.040547
2,"(568.0, 640.0]",0.3,0.2,0.040547
3,"(640.0, 712.0]",0.1,0.2,0.069315
4,"(712.0, 800.0]",0.2,0.2,0.0


0.21972245773362187


In [81]:
# vectorized solution
psi = np.sum((actual_prop - predicted_prop) * np.log(actual_prop / predicted_prop))
print(psi)

0.21972245773362187
