In [5]:
import pandas as pd
import numpy as np

In [6]:
def f2_score(y_true, y_pred, eps=1e-7) -> float:
    # same as fbeta_score(y_true, y_pred, beta=2, average='samples')
    # but faster
    if isinstance(y_true, pd.DataFrame):
        y_true = y_true.as_matrix()
    if isinstance(y_pred, pd.DataFrame):
        y_pred = y_pred.as_matrix()
    tp = (y_true * y_pred).sum(axis=1)
    r = tp / y_true.sum(axis=1)
    p = tp / (y_pred.sum(axis=1) + eps)
    beta2 = 4
    f2 = (1 + beta2) * p * r / (beta2 * p + r + eps)
    return f2.mean()

In [7]:
valid_pred = pd.read_hdf('../../_data/20170718-003045-b2-albu-eval.h5')
valid_pred[:3]

Unnamed: 0_level_0,agriculture,artisinal_mine,bare_ground,blooming,blow_down,clear,cloudy,conventional_mine,cultivation,habitation,haze,partly_cloudy,primary,road,selective_logging,slash_burn,water
b'image_name',Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
train_0,0.0029,0.00137,0.001533,0.001599,0.001442,0.733193,0.001396,0.001365,0.002094,0.001792,0.828538,0.001803,0.998538,0.001924,0.001486,0.001427,0.003176
train_1,0.893066,0.002227,0.009441,0.001569,0.001566,0.9981,0.00137,0.001451,0.642272,0.023163,0.001569,0.001671,0.998555,0.704373,0.002417,0.009954,0.88321
train_2,0.002361,0.001377,0.001498,0.001468,0.001442,0.99831,0.001365,0.001356,0.001742,0.001737,0.001508,0.001479,0.998572,0.001655,0.001452,0.001411,0.00193


In [8]:
train_flat = pd.read_csv('../../_data/train_flat.csv', index_col=0)
train_flat[:3]

Unnamed: 0_level_0,agriculture,artisinal_mine,bare_ground,blooming,blow_down,clear,cloudy,conventional_mine,cultivation,habitation,haze,partly_cloudy,primary,road,selective_logging,slash_burn,water
image_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
train_0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0
train_1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1
train_2,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0


In [17]:
threshold = 0.2
original_score = f2_score(train_flat, valid_pred > threshold)
original_score
losses = []
for cls in valid_pred.columns:
    t = valid_pred.copy()
    t[cls] = train_flat[cls]
    loss = f2_score(train_flat, t > threshold) - original_score
    
    t = valid_pred.copy()
    idx = t[cls] > threshold
    t.loc[idx, cls] = train_flat.loc[idx, cls]
    fp_loss = f2_score(train_flat, t > threshold) - original_score
    
    t = valid_pred.copy()
    idx = t[cls] < threshold
    t.loc[idx, cls] = train_flat.loc[idx, cls]
    fn_loss = f2_score(train_flat, t > threshold) - original_score
    
    losses.append((cls, loss, fn_loss, fp_loss, train_flat[cls].sum() / len(train_flat)))
losses.sort(key=lambda x: x[1], reverse=True)

print('{:<20} {:>7}  {:>7}  {:>7}  {}'.format('Class', 'Loss', 'TN Loss', 'FP Loss', 'Support'))
for cls, loss, fn_loss, fp_loss, support in losses:
    print('{:<20} {:.5f}  {:.5f}  {:.5f}  {:>7.2%}'.format(cls, loss, fn_loss, fp_loss, support))

Class                   Loss  TN Loss  FP Loss  Support
cultivation          0.00884  0.00464  0.00420   11.06%
agriculture          0.00844  0.00297  0.00547   30.42%
water                0.00770  0.00401  0.00369   18.31%
road                 0.00608  0.00297  0.00311   19.94%
haze                 0.00578  0.00308  0.00270    6.66%
clear                0.00492  0.00155  0.00337   70.24%
habitation           0.00413  0.00224  0.00189    9.04%
primary              0.00366  0.00123  0.00243   92.67%
partly_cloudy        0.00307  0.00093  0.00213   17.94%
bare_ground          0.00288  0.00203  0.00085    2.13%
cloudy               0.00263  0.00068  0.00196    5.16%
blooming             0.00192  0.00160  0.00033    0.82%
selective_logging    0.00122  0.00095  0.00028    0.84%
slash_burn           0.00081  0.00068  0.00013    0.52%
blow_down            0.00048  0.00042  0.00006    0.24%
artisinal_mine       0.00021  0.00013  0.00009    0.84%
conventional_mine    0.00019  0.00015  0.00004  