In [5]:
import pandas as pd
import numpy as np

In [6]:
def f2_score(y_true, y_pred, eps=1e-7) -> float:
    # same as fbeta_score(y_true, y_pred, beta=2, average='samples')
    # but faster
    if isinstance(y_true, pd.DataFrame):
        y_true = y_true.as_matrix()
    if isinstance(y_pred, pd.DataFrame):
        y_pred = y_pred.as_matrix()
    tp = (y_true * y_pred).sum(axis=1)
    r = tp / y_true.sum(axis=1)
    p = tp / (y_pred.sum(axis=1) + eps)
    beta2 = 4
    f2 = (1 + beta2) * p * r / (beta2 * p + r + eps)
    return f2.mean()

In [7]:
valid_pred = pd.read_hdf('../../_data/20170718-003045-b2-albu-eval.h5')
valid_pred[:3]

Unnamed: 0_level_0,agriculture,artisinal_mine,bare_ground,blooming,blow_down,clear,cloudy,conventional_mine,cultivation,habitation,haze,partly_cloudy,primary,road,selective_logging,slash_burn,water
b'image_name',Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
train_0,0.0029,0.00137,0.001533,0.001599,0.001442,0.733193,0.001396,0.001365,0.002094,0.001792,0.828538,0.001803,0.998538,0.001924,0.001486,0.001427,0.003176
train_1,0.893066,0.002227,0.009441,0.001569,0.001566,0.9981,0.00137,0.001451,0.642272,0.023163,0.001569,0.001671,0.998555,0.704373,0.002417,0.009954,0.88321
train_2,0.002361,0.001377,0.001498,0.001468,0.001442,0.99831,0.001365,0.001356,0.001742,0.001737,0.001508,0.001479,0.998572,0.001655,0.001452,0.001411,0.00193


In [8]:
train_flat = pd.read_csv('../../_data/train_flat.csv', index_col=0)
train_flat[:3]

Unnamed: 0_level_0,agriculture,artisinal_mine,bare_ground,blooming,blow_down,clear,cloudy,conventional_mine,cultivation,habitation,haze,partly_cloudy,primary,road,selective_logging,slash_burn,water
image_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
train_0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0
train_1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1
train_2,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0


In [22]:
threshold = 0.2
original_score = f2_score(train_flat, valid_pred > threshold)
original_score
losses = []
for cls in valid_pred.columns:
    t = valid_pred.copy()
    t[cls] = train_flat[cls]
    loss = f2_score(train_flat, t > threshold) - original_score
    
    t = valid_pred.copy()
    idx = t[cls] > threshold
    t.loc[idx, cls] = train_flat.loc[idx, cls]
    fp_loss = f2_score(train_flat, t > threshold) - original_score
    
    t = valid_pred.copy()
    idx = t[cls] < threshold
    t.loc[idx, cls] = train_flat.loc[idx, cls]
    fn_loss = f2_score(train_flat, t > threshold) - original_score
    
    losses.append((cls, loss, fn_loss, fp_loss, train_flat[cls].sum() / len(train_flat)))
losses.sort(key=lambda x: x[1], reverse=True)

print('{:<18} {:>6} {:>6} {:>6} {}'.format('Class', 'Loss', 'FN', 'FP', 'Support'))
for cls, loss, fn_loss, fp_loss, support in losses:
    print('{:<18} {:.4f} {:.4f} {:.4f} {:>6.1%}'.format(cls, loss, fn_loss, fp_loss, support))

Class                Loss     FN     FP Support
cultivation        0.0088 0.0046 0.0042  11.1%
agriculture        0.0084 0.0030 0.0055  30.4%
water              0.0077 0.0040 0.0037  18.3%
road               0.0061 0.0030 0.0031  19.9%
haze               0.0058 0.0031 0.0027   6.7%
clear              0.0049 0.0016 0.0034  70.2%
habitation         0.0041 0.0022 0.0019   9.0%
primary            0.0037 0.0012 0.0024  92.7%
partly_cloudy      0.0031 0.0009 0.0021  17.9%
bare_ground        0.0029 0.0020 0.0008   2.1%
cloudy             0.0026 0.0007 0.0020   5.2%
blooming           0.0019 0.0016 0.0003   0.8%
selective_logging  0.0012 0.0009 0.0003   0.8%
slash_burn         0.0008 0.0007 0.0001   0.5%
blow_down          0.0005 0.0004 0.0001   0.2%
artisinal_mine     0.0002 0.0001 0.0001   0.8%
conventional_mine  0.0002 0.0001 0.0000   0.2%
