In [27]:
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'

#### Train data

In [2]:
non_pension_dev = pd.read_pickle('file1')
pension_dev = pd.read_pickle('file2')
street_dev = pd.read_pickle('file3')

#### Test data

In [3]:
non_pension_val = pd.read_pickle('file5')
non_pension_oot = pd.read_pickle('file6')
non_pension_nov = pd.read_excel('file7')

pension_val = pd.read_pickle('file8')
pension_oot = pd.read_pickle('file9')
pension_nov = pd.read_excel('file10')

street_val = pd.read_pickle('file11')
street_oot = pd.read_pickle('file12')
street_nov = pd.read_excel('file13')

In [6]:
non_pension_nov = non_pension_nov.rename(columns = {'CONTRACT_REF_NO' : 'CONTRACT_REF_NO_raw', 'pred_prob_final' : 'final_pd'})
pension_nov = pension_nov.rename(columns = {'CONTRACT_REF_NO' : 'CONTRACT_REF_NO_raw', 'pred_prob_final' : 'final_pd'})
street_nov = street_nov.rename(columns = {'CONTRACT_REF_NO' : 'CONTRACT_REF_NO_raw', 'pred_prob_final' : 'final_pd'})

#### Bins

In [7]:
bins = [e/100 for e in list(range(0, 105, 5))] # 20 bins

#### Calculations

In [28]:
def count_bins(df_name, colname):
    df_name = df_name[[colname]]
    df_name.sort_values(by = colname, ascending = False, inplace = True)
    df_name['bins'] = pd.cut(df_name[colname], bins)
    df_name = df_name.groupby(['bins']).size().reset_index()
    df_name.columns = ['bins', 'size']
    df_name['total'] = sum(df_name['size'])
    df_name['proportion'] = df_name['size'] / df_name['total']
    df_name = df_name.filter(['bins', 'proportion'])

    return df_name

In [29]:
dev_colname = 'final_pd'
test_colname = 'final_pd'

In [30]:
non_pension_dev_bins = count_bins(non_pension_dev, dev_colname)
pension_dev_bins = count_bins(pension_dev, dev_colname)
street_dev_bins = count_bins(street_dev, dev_colname)

In [31]:
non_pension_val_bins = count_bins(non_pension_val, test_colname)
pension_val_bins = count_bins(pension_val, test_colname)
street_val_bins = count_bins(street_val, test_colname)

In [32]:
non_pension_oot_bins = count_bins(non_pension_oot, test_colname)
pension_oot_bins = count_bins(pension_oot, test_colname)
street_oot_bins = count_bins(street_oot, test_colname)

In [33]:
non_pension_nov_bins = count_bins(non_pension_nov, test_colname)
pension_nov_bins = count_bins(pension_nov, test_colname)
street_nov_bins = count_bins(street_nov, test_colname)

#### Calculate PSI

In [58]:
def calculate_psi(df1, df2):

    df1 = pd.merge(df1, df2, on = 'bins', how = 'left')
    df1['difference'] = df1['proportion_y'] - df1['proportion_x']
    df1['ln'] = np.log(pd.Series([e + 0.000000000000001 for e in df1['proportion_y'].tolist()]) / pd.Series([e + 0.000000000000001 for e in df1['proportion_x'].tolist()]))
    df1['psi'] = df1['difference'] * df1['ln']
    return df1, df1['psi'].sum()

#### Non-Pension

In [59]:
non_pension_val_psi_df, non_pension_val_psi_value = calculate_psi(non_pension_dev_bins, non_pension_val_bins)
print('Non-Pension Val PSI:', non_pension_val_psi_value)

Non-Pension Val PSI: 8.568995606377356e-05


In [60]:
non_pension_dev_psi_df, non_pension_oot_psi_value = calculate_psi(non_pension_dev_bins, non_pension_oot_bins)
print('Non-Pension OOT PSI:', non_pension_oot_psi_value)

Non-Pension OOT PSI: 0.0035574971751615747


In [61]:
non_pension_dev_psi_df, non_pension_nov_psi_value = calculate_psi(non_pension_dev_bins, non_pension_nov_bins)
print('Non-Pension Nov PSI:', non_pension_nov_psi_value)

Non-Pension Nov PSI: 0.00572710858330607


#### Pension

In [62]:
pension_val_psi_df, pension_val_psi_value = calculate_psi(pension_dev_bins, pension_val_bins)
print('Pension Val PSI:', pension_val_psi_value)

Pension Val PSI: 0.00012065788631246394


In [63]:
pension_dev_psi_df, pension_oot_psi_value = calculate_psi(pension_dev_bins, pension_oot_bins)
print('Pension OOT PSI:', pension_oot_psi_value)

Pension OOT PSI: 0.13729037062979468


In [64]:
pension_dev_psi_df, pension_nov_psi_value = calculate_psi(pension_dev_bins, pension_nov_bins)
print('Pension Nov PSI:', pension_nov_psi_value)

Pension Nov PSI: 0.08082303668332005


#### Street

In [65]:
street_val_psi_df, street_val_psi_value = calculate_psi(street_dev_bins, street_val_bins)
print('Street Val PSI:', street_val_psi_value)

Street Val PSI: 0.0003048411586008483


In [66]:
street_dev_psi_df, street_oot_psi_value = calculate_psi(street_dev_bins, street_oot_bins)
print('Street OOT PSI:', street_oot_psi_value)

Street OOT PSI: 0.027961015460458323


In [67]:
street_dev_psi_df, street_nov_psi_value = calculate_psi(street_dev_bins, street_nov_bins)
print('Street Nov PSI:', street_nov_psi_value)

Street Nov PSI: 0.018230595215955492
