In [1]:
import pandas
import numpy as np

## Set up for dataset
features = ['checking account balance', 'duration', 'credit history',
            'purpose', 'amount', 'savings', 'employment', 'installment',
            'marital status', 'other debtors', 'residence time',
            'property', 'age', 'other installments', 'housing', 'credits',
            'job', 'persons', 'phone', 'foreign']
target = 'repaid'
df = pandas.read_csv('../../data/credit/german.data', sep=' ',
                     names=features+[target])
import matplotlib.pyplot as plt
numerical_features = ['duration', 'age', 'residence time', 'installment', 'amount', 'duration', 'persons', 'credits']
quantitative_features = list(filter(lambda x: x not in numerical_features, features))
X = pandas.get_dummies(df, columns=quantitative_features, drop_first=True)
encoded_features = list(filter(lambda x: x != target, X.columns))


## Test function
def test_decision_maker(X_test, y_test, interest_rate, decision_maker):
    n_test_examples = len(X_test)
    utility = 0

    ## Example test function - this is only an unbiased test if the data has not been seen in training
    for t in range(n_test_examples):
        action = decision_maker.get_best_action(X_test.iloc[t])
        good_loan = y_test.iloc[t] # assume the labels are correct
        duration = X_test['duration'].iloc[t]
        amount = X_test['amount'].iloc[t]
        # If we don't grant the loan then nothing happens
        if (action==1):
            if (good_loan != 1):
                utility -= amount
            else:
                utility += amount*(pow(1 + interest_rate, duration) - 1)
    return utility

In [2]:
import nicolabk_kaiie_banker
for epsilon in [10, 25, 35, 50, 75, 100, None]:
    decision_maker = nicolabk_kaiie_banker.Nicolabk_Kaiie_Banker(epsilon=epsilon)

    interest_rate = 0.005

    ### Do a number of preliminary tests by splitting the data in parts
    from sklearn.model_selection import train_test_split
    n_tests = 10
    utility = []
    for iter in range(n_tests):
        X_train, X_test, y_train, y_test = train_test_split(X[encoded_features], X[target], test_size=0.2)
        decision_maker.set_interest_rate(interest_rate)
        decision_maker.fit(X_train, y_train)
        utility.append(test_decision_maker(X_test, y_test, interest_rate, decision_maker))
        print(iter, end='\r')
    print(f'Epsilon: {epsilon}')
    print(f'Average utility: {sum(utility) / n_tests}')
    print(f'Standard deviation: {np.std(utility)}')

Epsilon: 10
Average utility: -2215.370966283067
Standard deviation: 5568.807401860468
Epsilon: 25
Average utility: -3281.643498126649
Standard deviation: 4132.390670722679
Epsilon: 35
Average utility: 278.2419609073815
Standard deviation: 3608.7779627832415
Epsilon: 50
Average utility: 173.02695050351855
Standard deviation: 3984.4679901697573
Epsilon: 75
Average utility: 828.2276953686147
Standard deviation: 6057.425424490574
Epsilon: 100
Average utility: 3641.2979438633324
Standard deviation: 3125.4588890761315
Epsilon: None
Average utility: 4076.377041480875
Standard deviation: 4237.9681620813135
