In [225]:
import os
import numpy as np 
import pandas as pd 
from subprocess import check_output

In [226]:
sub_path = "Submissions"
all_files = os.listdir(sub_path)

# Read and concatenate submissions
outs = [pd.read_csv(os.path.join(sub_path, f), index_col=0) for f in all_files]
concat_sub = pd.concat(outs, axis=1)
cols = list(map(lambda x: "is_iceberg_" + str(x), range(len(concat_sub.columns))))
concat_sub.columns = cols
concat_sub.reset_index(inplace=True)
concat_sub.head()

Unnamed: 0,id,is_iceberg_0,is_iceberg_1,is_iceberg_2,is_iceberg_3,is_iceberg_4,is_iceberg_5
0,5941774d,0.02138313,0.010011,0.003657,0.016455,0.22013,0.023473
1,4023181e,0.6183937,0.404991,0.020802,0.986631,0.014379,0.375685
2,b20200e4,9.098931e-07,0.007189,0.001704,0.001559,4.1e-05,0.006849
3,e7f018bb,0.9738217,0.997395,0.999148,0.999509,0.992729,0.998656
4,4371c8c3,0.4257437,0.067517,0.001759,0.997644,0.074289,0.035506


In [227]:
model_amount = len(concat_sub.columns)

In [228]:
# check correlation
concat_sub.corr()

Unnamed: 0,is_iceberg_0,is_iceberg_1,is_iceberg_2,is_iceberg_3,is_iceberg_4,is_iceberg_5
is_iceberg_0,1.0,0.703407,0.630337,0.474636,0.601516,0.70471
is_iceberg_1,0.703407,1.0,0.834851,0.532091,0.843965,0.842419
is_iceberg_2,0.630337,0.834851,1.0,0.435538,0.842089,0.811806
is_iceberg_3,0.474636,0.532091,0.435538,1.0,0.409355,0.642066
is_iceberg_4,0.601516,0.843965,0.842089,0.409355,1.0,0.786657
is_iceberg_5,0.70471,0.842419,0.811806,0.642066,0.786657,1.0


In [229]:
# get the data fields ready for stacking
concat_sub['is_iceberg_max'] = concat_sub.iloc[:, 1:model_amount].max(axis=1)
concat_sub['is_iceberg_min'] = concat_sub.iloc[:, 1:model_amount].min(axis=1)
concat_sub['is_iceberg_mean'] = concat_sub.iloc[:, 1:model_amount].mean(axis=1)
concat_sub['is_iceberg_median'] = concat_sub.iloc[:, 1:model_amount].median(axis=1)

In [230]:
# set up cutoff threshold for lower and upper bounds, easy to twist 
cutoff_lo = 0.5
cutoff_hi = 0.5

In [231]:
# load the model with best base performance
sub_base = pd.read_csv('Submissions/subvgg16mobile.csv')

In [232]:
concat_sub['is_iceberg'] = concat_sub['is_iceberg_mean']

concat_sub[['id', 'is_iceberg']].to_csv('mean.csv', 
                                        index=False, float_format='%.6f')

concat_sub['is_iceberg'] = concat_sub['is_iceberg_median']

concat_sub[['id', 'is_iceberg']].to_csv('median.csv', 
                                        index=False, float_format='%.6f')


In [233]:
concat_sub['is_iceberg_base'] = sub_base['is_iceberg']
concat_sub['is_iceberg'] = np.where(np.all(concat_sub.iloc[:,1:model_amount] > cutoff_lo, axis=1), 
                                    concat_sub['is_iceberg_max'], 
                                    np.where(np.all(concat_sub.iloc[:,1:model_amount] < cutoff_hi, axis=1),
                                             concat_sub['is_iceberg_min'], 
                                             concat_sub['is_iceberg_base']))
concat_sub[['id', 'is_iceberg']].to_csv('stack_minmax_bestbase.csv', 
                                        index=False, float_format='%.6f')

In [234]:
clip_min = 0.0001
clip_max = 0.9999

concat_sub['is_iceberg'] = np.where(np.all(concat_sub.iloc[:,1:model_amount] > cutoff_lo, axis=1), 
                                    concat_sub['is_iceberg_max'], 
                                    np.where(np.all(concat_sub.iloc[:,1:model_amount] < cutoff_hi, axis=1),
                                             concat_sub['is_iceberg_min'], 
                                             concat_sub['is_iceberg_median']))

concat_sub['is_iceberg'] = np.clip(concat_sub['is_iceberg'].values, clip_min, clip_max)

concat_sub[['id', 'is_iceberg']].to_csv('stack_minmax_median.csv', 
                                        index=False, float_format='%.6f')

In [235]:
blah = np.where(np.all(concat_sub.iloc[:,1:model_amount] > 0.9725, axis=1), 
                                    concat_sub['is_iceberg_max'], 
                                    np.where(np.all(concat_sub.iloc[:,1:model_amount] < 0.025, axis=1),
                                             concat_sub['is_iceberg_min'], 
                                             concat_sub['is_iceberg_median']))

pseudo_data = concat_sub[concat_sub["is_iceberg_median"] != blah]
pseudo_data[['id', 'is_iceberg']].to_csv('pseudo_data.csv', 
                                        index=False, float_format='%.6f')

In [266]:
cutoff_lo = 0.5
cutoff_hi = 0.5
alt_sub = concat_sub.copy(deep = True)

alt_sub['is_iceberg'] = np.clip(concat_sub['is_iceberg_median'], clip_min, clip_max)
count_high = 0
count_high_2 = 0
count_low = 0
count_low_2 = 0
count_guessing = 0

for index, row in alt_sub.iterrows():
    high_count = 0
    low_count = 0
    
#     model_4 = 0
#     model_6 = 0
    count = 0
    
    for model_result in row[1:model_amount]:
        count += 1
        
        if model_result > cutoff_lo:
            high_count += 1
        elif model_result < cutoff_hi:
            low_count += 1
            
#         if count == 4:
#             model_4 = model_result
        
#         if count == 6:
#             model_6 = model_result
    
    if high_count == 6:
        count_high += 1
        max_val = np.clip(row['is_iceberg_max'], clip_min, clip_max)
        alt_sub.set_value(index, 'is_iceberg', max_val)
    elif low_count == 6:
        count_low += 1
        min_val = np.clip(row['is_iceberg_min'], clip_min, clip_max)
        alt_sub.set_value(index, 'is_iceberg', min_val)
    elif ((row["is_iceberg_median"] > 0.4) & (row["is_iceberg_median"] < 0.6)) & ((row["is_iceberg_base"] > 0.6) | (row["is_iceberg_base"] < 0.4)):
        count_guessing += 1
        best_val = np.clip(row['is_iceberg_base'], 0.2, 0.8)
        alt_sub.set_value(index, 'is_iceberg', best_val)
    # concat_sub[((concat_sub["is_iceberg_median"] > 0.4) & (concat_sub["is_iceberg_median"] < 0.6)) & ((concat_sub["is_iceberg_base"] > 0.6) | (concat_sub["is_iceberg_base"] < 0.4))]
#     elif high_count == 5:
#         count_high += 1
#         not_bad_high = np.all(row.iloc[1:7] > 0.40)
#         if not_bad_high and row['is_iceberg_median'] > 0.9:
#             count_high_2 += 1
        
#             high_val = (row['is_iceberg_max'] + row['is_iceberg_median']) / 2
#             alt_sub.set_value(index, 'is_iceberg', np.clip(high_val, clip_min, clip_max))
#     elif low_count == 5:
#         count_low += 1
#         not_bad_low = np.all(row.iloc[1:7] < 0.60)
#         if not_bad_low and row['is_iceberg_median'] < 0.1:
#             count_low_2 += 1
# #             print("Median:", row['is_iceberg_median'])
# #             print("Min:", row['is_iceberg_min'])
#             low_val = np.clip((row['is_iceberg_min'] + row['is_iceberg_median']) / 2, clip_min, clip_max)
# #             print("Low:", low_val)
#             alt_sub.set_value(index, 'is_iceberg', low_val)

        
alt_sub[['id', 'is_iceberg']].to_csv('stack_minmax_median_alt.csv', 
                                        index=False, float_format='%.6f')

print("high:", count_high)
print("high 2:", count_high_2)
print("low:", count_low)
print("low 2:", count_low_2)
print("count guessing:", count_guessing)


high: 1326
high 2: 0
low: 2445
low 2: 0
count guessing: 421


In [267]:
# 50 / 50: high = 1326, low = 2445
# 525 / 475: high = 1295, low = 2397
# 475 / 525: high = 1352, low = 2397
# 55 / 45: high = 1277, low = 2355


In [268]:
# 3772 not using min or max values
# 653 where median is between 0.4 and 0.6 (virtually guessing)
# 422 where median is as above, and best model is more confident

# concat_sub.where(concat_sub["is_iceberg_median"] == concat_sub["is_iceberg"]).dropna()
alt_sub[((alt_sub["is_iceberg_median"] > 0.4) & (alt_sub["is_iceberg_median"] < 0.6)) & ((alt_sub["is_iceberg_base"] > 0.6) | (alt_sub["is_iceberg_base"] < 0.4))]

Unnamed: 0,id,is_iceberg_0,is_iceberg_1,is_iceberg_2,is_iceberg_3,is_iceberg_4,is_iceberg_5,is_iceberg_max,is_iceberg_min,is_iceberg_mean,is_iceberg_median,is_iceberg,is_iceberg_base
44,0bbf1a3f,1.531911e-03,0.179410,0.354160,0.867033,0.567576,0.895743,0.895743,1.531911e-03,0.477575,0.460868,0.800000,0.895743
75,69bf1e8c,6.045442e-01,0.094348,0.032105,0.997431,0.230090,0.664971,0.997431,3.210463e-02,0.437248,0.417317,0.664971,0.664971
78,dbd27d22,8.685240e-01,0.240179,0.067472,0.718577,0.252752,0.624461,0.868524,6.747215e-02,0.461994,0.438606,0.624461,0.624461
101,4a311bb0,1.520169e-02,0.262944,0.050775,0.877703,0.587658,0.842179,0.877703,1.520169e-02,0.439410,0.425301,0.800000,0.842179
104,f8a119f5,7.373833e-01,0.147547,0.028810,0.997243,0.069369,0.757454,0.997243,2.880984e-02,0.456301,0.442465,0.757454,0.757454
113,f7fb320d,8.656891e-01,0.400228,0.090834,0.806971,0.019349,0.676068,0.865689,1.934936e-02,0.476523,0.538148,0.676068,0.676068
117,4978666a,1.448098e-01,0.834338,0.076645,0.782662,0.073831,0.745419,0.834338,7.383075e-02,0.442951,0.445114,0.745419,0.745419
130,1c0a891e,5.365017e-01,0.190317,0.405982,0.724180,0.032057,0.687909,0.724180,3.205734e-02,0.429491,0.471242,0.687909,0.687909
151,8bbd8909,8.988781e-05,0.225953,0.136369,0.977400,0.714475,0.916842,0.977400,8.988781e-05,0.495188,0.470214,0.800000,0.916842
208,f5b54fb6,7.081897e-01,0.094619,0.212959,0.299930,0.593259,0.611758,0.708190,9.461888e-02,0.420119,0.446594,0.611758,0.611758


In [194]:
concat_sub[concat_sub["is_iceberg"] != alt_sub["is_iceberg"]]
# print(concat_sub["is_iceberg"][21])
# print(alt_sub["is_iceberg"][21])
# print(concat_sub["is_iceberg"][128])
# print(alt_sub["is_iceberg"][128])


Unnamed: 0,id,is_iceberg_0,is_iceberg_1,is_iceberg_2,is_iceberg_3,is_iceberg_4,is_iceberg_5,is_iceberg_6,is_iceberg_7,is_iceberg_max,is_iceberg_min,is_iceberg_mean,is_iceberg_median,is_iceberg,is_iceberg_base
0,5941774d,0.049185,0.018919,2.138313e-02,1.001100e-02,0.003657,1.645529e-02,0.220130,2.347304e-02,0.220130,3.657331e-03,0.045402,0.020151,0.003657,2.347304e-02
1,4023181e,0.403480,0.390338,6.183937e-01,4.049912e-01,0.020802,9.866307e-01,0.014379,3.756849e-01,0.986631,1.437879e-02,0.401837,0.396909,0.396909,3.756849e-01
2,b20200e4,0.002890,0.001632,9.098931e-07,7.188838e-03,0.001704,1.559171e-03,0.000041,6.848715e-03,0.007189,9.098931e-07,0.002733,0.001668,0.000100,6.848715e-03
3,e7f018bb,0.993543,0.998026,9.738217e-01,9.973954e-01,0.999148,9.995090e-01,0.992729,9.986560e-01,0.999509,9.738217e-01,0.994104,0.997711,0.999509,9.986560e-01
5,a8d9b1fd,0.275026,0.110968,4.091924e-05,1.395310e-01,0.036811,8.380730e-01,0.082405,5.532939e-01,0.838073,4.091924e-05,0.254519,0.125249,0.125249,5.532939e-01
6,29e7727e,0.066558,0.049727,1.594431e-01,1.177437e-03,0.061324,1.044588e-01,0.034815,3.812961e-02,0.159443,1.177437e-03,0.064454,0.055525,0.001177,3.812961e-02
7,92a51ffb,0.997030,0.999123,9.893829e-01,9.997948e-01,0.999978,9.997451e-01,0.994780,9.985010e-01,0.999978,9.893829e-01,0.997292,0.998812,0.999900,9.985010e-01
8,c769ac97,0.015027,0.000324,4.145949e-13,2.410319e-04,0.001725,9.538788e-07,0.087789,4.076076e-04,0.087789,4.145949e-13,0.013189,0.000366,0.000100,4.076076e-04
9,aee0547d,0.013312,0.000232,1.269119e-16,1.919658e-04,0.004978,1.497171e-05,0.074415,2.713778e-04,0.074415,1.269119e-16,0.011677,0.000252,0.000100,2.713778e-04
10,565b28ac,0.000616,0.000096,8.310469e-14,1.849478e-04,0.001926,9.474445e-11,0.001578,6.782145e-06,0.001926,8.310469e-14,0.000551,0.000140,0.000100,6.782145e-06


In [65]:
blah2 = np.where(np.all(concat_sub.iloc[:,1:model_amount] > cutoff_lo, axis=1), 
                                    concat_sub['is_iceberg_max'], 
                                    np.where(np.all(concat_sub.iloc[:,1:model_amount] < cutoff_hi, axis=1),
                                             concat_sub['is_iceberg_min'], 
                                             concat_sub['is_iceberg_median']))


In [94]:
blah2 = np.all(concat_sub.iloc[:,1:model_amount] > 0.8, axis=1)
print(blah2[blah2 == True].shape)
blah2 = np.all(concat_sub.iloc[:,1:model_amount] > 0.7, axis=1)
print(blah2[blah2 == True].shape)
blah2 = np.all(concat_sub.iloc[:,1:model_amount] > 0.6, axis=1)
print(blah2[blah2 == True].shape)
blah_iceberg = np.all(concat_sub.iloc[:,1:model_amount] > 0.5, axis=1)
print(blah_iceberg[blah_iceberg == True].shape)

blah2 = np.all(concat_sub.iloc[:,1:model_amount] < 0.2, axis=1)
print(blah2[blah2 == True].shape)
blah2 = np.all(concat_sub.iloc[:,1:model_amount] < 0.3, axis=1)
print(blah2[blah2 == True].shape)
blah2 = np.all(concat_sub.iloc[:,1:model_amount] < 0.4, axis=1)
print(blah2[blah2 == True].shape)
blah_ship = np.all(concat_sub.iloc[:,1:model_amount] < 0.5, axis=1)
print(blah_ship[blah_ship == True].shape)

len_all = len(concat_sub)
len_iceberg_processed = len(blah_iceberg[blah_iceberg == True])
len_ship_processed = len(blah_ship[blah_ship == True])
len_unprocessed = len_all - len_iceberg_processed - len_ship_processed
print(len_all)
print(len_iceberg_processed)
print(len_ship_processed)
print(len_unprocessed)


# Things to try:
# - Straight (0.6, 0.4) and (0.5, 0.5) consensus thresholds
# - Calculate the difference between the median value and the min / max value
# - If thresholds / consensus is "good enough", use diff b/w median and min / max
# - Maybe see if there's consensus between the top 2 / 3 models... if so, use diff b/w median and min / max

# - clipping (0.001, 0.999)
# - different thresholds (0.55, 0.45)

(937,)
(1098,)
(1220,)
(1326,)
(1828,)
(2068,)
(2278,)
(2445,)
8424
1326
2445
4653


In [67]:
# Find values that are on the fence...

def test(val):
    if val < 0.5:
        return -1
    elif val > 0.5:
        return -1
    
    return val 

blah = concat_sub.apply(lambda x: test(x["is_iceberg"]), axis=1)
blah = blah[blah >= 0]

In [68]:
print(blah.shape)
blah

(0,)


Series([], dtype: int64)

In [69]:
concat_sub

Unnamed: 0,id,is_iceberg_0,is_iceberg_1,is_iceberg_2,is_iceberg_3,is_iceberg_4,is_iceberg_5,is_iceberg_max,is_iceberg_min,is_iceberg_mean,is_iceberg_median,is_iceberg_base,is_iceberg
0,5941774d,2.138313e-02,1.001100e-02,0.003657,1.645529e-02,0.220130,2.347304e-02,0.220130,3.657331e-03,0.049185,0.018919,2.347304e-02,3.657331e-03
1,4023181e,6.183937e-01,4.049912e-01,0.020802,9.866307e-01,0.014379,3.756849e-01,0.986631,1.437879e-02,0.403480,0.390338,3.756849e-01,3.903381e-01
2,b20200e4,9.098931e-07,7.188838e-03,0.001704,1.559171e-03,0.000041,6.848715e-03,0.007189,9.098931e-07,0.002890,0.001632,6.848715e-03,9.098931e-07
3,e7f018bb,9.738217e-01,9.973954e-01,0.999148,9.995090e-01,0.992729,9.986560e-01,0.999509,9.738217e-01,0.993543,0.998026,9.986560e-01,9.995090e-01
4,4371c8c3,4.257437e-01,6.751659e-02,0.001759,9.976440e-01,0.074289,3.550633e-02,0.997644,1.758835e-03,0.267076,0.070903,3.550633e-02,7.090292e-02
5,a8d9b1fd,4.091924e-05,1.395310e-01,0.036811,8.380730e-01,0.082405,5.532939e-01,0.838073,4.091924e-05,0.275026,0.110968,5.532939e-01,1.109680e-01
6,29e7727e,1.594431e-01,1.177437e-03,0.061324,1.044588e-01,0.034815,3.812961e-02,0.159443,1.177437e-03,0.066558,0.049727,3.812961e-02,1.177437e-03
7,92a51ffb,9.893829e-01,9.997948e-01,0.999978,9.997451e-01,0.994780,9.985010e-01,0.999978,9.893829e-01,0.997030,0.999123,9.985010e-01,9.999777e-01
8,c769ac97,4.145949e-13,2.410319e-04,0.001725,9.538788e-07,0.087789,4.076076e-04,0.087789,4.145949e-13,0.015027,0.000324,4.076076e-04,4.145949e-13
9,aee0547d,1.269119e-16,1.919658e-04,0.004978,1.497171e-05,0.074415,2.713778e-04,0.074415,1.269119e-16,0.013312,0.000232,2.713778e-04,1.269119e-16


In [121]:
# Understanding log loss

from sklearn.metrics import log_loss

print("Log loss: Volume of data")
print("========================")
print("Tiny data, extremely confident and accurate:", log_loss([1, 0], [1, 0]))
print("Slightly more data, confident and accurate:", log_loss([1, 0], [0.999, 0.001]))
print("Even more data, confident and accurate:", log_loss([1, 0, 1, 0, 0, 1, 0], [0.999, 0.001, 0.999, 0.001, 0.001, 0.999, 0.001]))
print("All accurate except one 50/50:", log_loss([1, 0, 1, 0, 0, 1, 0], [0.999, 0.001, 0.999, 0.001, 0.5, 0.999, 0.001]))

print("\nLog loss: Some examples")
print("=========================")
print("14 records, perfect:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0]))
print("14 records, very slight clipping:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.9999, 0.0001, 0.9999, 0.0001, 0.0001, 0.9999, 0.0001, 0.9999, 0.0001, 0.9999, 0.0001, 0.0001, 0.9999, 0.0001]))
print("14 records, slight clipping:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.999, 0.001, 0.999, 0.001, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.001, 0.999, 0.001]))
print("14 records, 2 50/50 guesses:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.999, 0.001, 0.999, 0.001, 0.5, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.5, 0.999, 0.001]))
print("14 records, all 50/50 guesses:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]))
print("14 records, 1 very wrong:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.999, 0.001, 0.999, 0.001, 0.999, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.001, 0.999, 0.001]))
print("14 records, 1 perfectly wrong:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.999, 0.001, 0.999, 0.001, 1, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.001, 0.999, 0.001]))

print("\nLog loss: Effect of right answers")
print("===================================")
print("14 records, perfect:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0]))
print("14 records, very slight clipping:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.9999, 0.0001, 0.9999, 0.0001, 0.0001, 0.9999, 0.0001, 0.9999, 0.0001, 0.9999, 0.0001, 0.0001, 0.9999, 0.0001]))
print("14 records, 95 percent sure:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.95, 0.05, 0.95, 0.05, 0.05, 0.95, 0.05, 0.95, 0.05, 0.95, 0.05, 0.05, 0.95, 0.05]))
print("14 records, 90 percent sure:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.9, 0.1, 0.9, 0.1, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.1, 0.9, 0.1]))
print("14 records, 80 percent sure:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.8, 0.2, 0.8, 0.2, 0.2, 0.8, 0.2, 0.8, 0.2, 0.8, 0.2, 0.2, 0.8, 0.2]))
print("14 records, 50 percent sure:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]))
print("14 records, 1 weak accurate:", log_loss([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0], [0.999, 0.001, 0.999, 0.001, 0.3, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.001, 0.999, 0.001]))

print("\nLog loss: Effect of wrong answers")
print("===================================")
print("14 records, perfect:", log_loss([0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1], [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0]))
print("14 records, very slight clipping:", log_loss([0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1], [0.9999, 0.0001, 0.9999, 0.0001, 0.0001, 0.9999, 0.0001, 0.9999, 0.0001, 0.9999, 0.0001, 0.0001, 0.9999, 0.0001]))
print("14 records, 95 percent sure:", log_loss([0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1], [0.95, 0.05, 0.95, 0.05, 0.05, 0.95, 0.05, 0.95, 0.05, 0.95, 0.05, 0.05, 0.95, 0.05]))
print("14 records, 90 percent sure:", log_loss([0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1], [0.9, 0.1, 0.9, 0.1, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.1, 0.9, 0.1]))
print("14 records, 80 percent sure:", log_loss([0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1], [0.8, 0.2, 0.8, 0.2, 0.2, 0.8, 0.2, 0.8, 0.2, 0.8, 0.2, 0.2, 0.8, 0.2]))
print("14 records, 50 percent sure:", log_loss([0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1], [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]))
print("14 records, 1 weak accurate:", log_loss([0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1], [0.999, 0.001, 0.999, 0.001, 0.3, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.001, 0.999, 0.001]))

print("\nLog loss: How many wrongs make a right")
print("========================================")

perf_data = [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0]
perf_pred = [1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0]
print("Perfect, " + str(len(perf_data)) + " records, but 1 wrong:", log_loss(perf_data, perf_pred))

vs_data = [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0]
vs_pred = [0.9999, 0.0001, 0.9999, 0.9999, 0.0001, 0.9999, 0.0001, 0.9999, 0.0001, 0.9999, 0.0001, 0.0001, 0.9999, 0.0001, 0.9999, 0.0001, 0.9999, 0.0001, 0.0001, 0.9999, 0.0001]
print("Very slight clipping, " + str(len(vs_data)) + " records, 1 wrong:", log_loss(vs_data, vs_pred))

s_data = [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0]
s_pred = [0.999, 0.001, 0.999, 0.001, 0.999, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.001, 0.999, 0.001, 0.001]
print("Slight clipping, " + str(len(s_data)) + " records, 1 wrong:", log_loss(s_data, s_pred))

vs_pred_2 = np.clip(perf_pred, 0.0001, 0.9999)
print("Very slight clipping, " + str(len(vs_pred_2)) + " records, 1 wrong:", log_loss(perf_data, vs_pred_2))

s_pred_2 = np.clip(perf_pred, 0.001, 0.999)
print("Slight clipping, " + str(len(s_pred_2)) + " records, 1 wrong:", log_loss(perf_data, s_pred_2))


Log loss: Volume of data
Tiny data, extremely confident and accurate: 9.99200722163e-16
Slightly more data, confident and accurate: 0.00100050033358
Even more data, confident and accurate: 0.00100050033358
All accurate except one 50/50: 0.0998785975088

Log loss: Some examples
14 records, perfect: 9.99200722163e-16
14 records, very slight clipping: 0.000100005000333
14 records, slight clipping: 0.00100050033358
14 records, 2 50/50 guesses: 0.0998785975088
14 records, all 50/50 guesses: 0.69314718056
14 records, 1 very wrong: 0.49434012738
14 records, 1 perfectly wrong: 2.46804160691

Log loss: Effect of right answers
14 records, perfect: 9.99200722163e-16
14 records, very slight clipping: 0.000100005000333
14 records, 95 percent sure: 0.0512932943876
14 records, 90 percent sure: 0.105360515658
14 records, 80 percent sure: 0.223143551314
14 records, 50 percent sure: 0.69314718056
14 records, 1 weak accurate: 0.026405817734

Log loss: Effect of wrong answers
14 records, perfect: 34.53911