In [4]:
# This notebook computes and plots the results in Table 1

import numpy as np
from expected_cost import ec, utils
from expected_cost.data import get_llks_for_multi_classif_task
from sklearn.metrics import f1_score, recall_score, precision_score



In [5]:
#dataset = '../data/cifar10_resnet-50/'
dataset = 'gaussian_sim'

if dataset == 'gaussian_sim':
    # Simulation parameters
    N = 10000       # total number of samples
    var = 0.20      # within-class standard deviation of the features, determines the difficulty of the problem
    P0 = 0.8        # Prior for class 0, 0.8 or 0.5 for the results in the paper
    K = 10          # Number of classes
    priors = np.array([P0] + [(1 - P0) / (K - 1)] * (K - 1))
else:
    priors, var, N = None, None, None
    std = None

# Generate or read the log posteriors

targets, _, logpost = get_llks_for_multi_classif_task(dataset, priors=priors, sim_params={'feat_var': var}, N=N, logpost=True)

if dataset != 'gaussian_sim':
    N = len(targets)
    K = logpost.shape[1]
    priors = np.bincount(targets)/N

# Define various costs matrices
costs = {}

# Standard 0-1 cost
costs['C01'] = ec.CostMatrix.zero_one_costs(K)

# Balanced error rate. The costs are inversely proportional to the priors.
costm = (1 - np.eye(K))/np.atleast_2d(priors).T/K
costs['CinvP']  = ec.CostMatrix(costm)

# A 0-1 cost with the last rwo replaced by 100 to simulate a case in which the errors in that class
# are much more costly than the errors in other classes.
costm = 1 - np.eye(K)
costm[-1,:] *= 100
costs['Cimp']  = ec.CostMatrix(costm)

# Finally, two cost functions with abstention options with different cost.
costs['Cabs1'] = ec.CostMatrix.zero_one_costs(K, abstention_cost=0.05)
costs['Cabs2'] = ec.CostMatrix.zero_one_costs(K, abstention_cost=0.30)

In [6]:
# Print the best naive decision for each cost matrix

for costn, cost in costs.items():
    naive_dec = np.argmin(np.dot(priors.T, cost.get_matrix()))+1
    print(f"Best naive decision for {costn:7s}: {naive_dec}")
print("")

# Table Header

sep = '  ' # Field separator for printing 
print(f"Decisions{sep:s} ", end='')
for costn, cost in costs.items():
    print(f" {costn:10s}  {sep:s}", end='')
    if 'abs' in costn:
        print(f"%Abs {sep:s}", end='')
print("")

# Argmax decisions, which are the same for all cost matrices

argmax_decisions = np.argmax(logpost, axis=-1)
    
# Print the various ECs for each of the decision algorithms     
    
for dec in ['Naive', 'Argmax', 'Bayes']:

    print(f"{dec:6s}   {sep:s}", end='')
    
    for costn, cost in costs.items():

        if dec == 'Argmax':
            decisions = argmax_decisions
        elif dec == 'Bayes':
            decisions, _ = ec.bayes_decisions(logpost, cost, score_type='log_posteriors')
        elif dec == 'Naive':
            decisions = np.ones_like(targets) * np.argmin(np.dot(priors.T, cost.get_matrix())) 
        else:
            print("Unknown decision algorithm")
            continue
        
        ecval  = ec.average_cost(targets, decisions, cost, adjusted=False)
        ecvaln = ec.average_cost(targets, decisions, cost, adjusted=True)
        
        print(f" {ecval:5.2f}{sep:s}{ecvaln:5.2f}{sep:s}", end='')
        norm = np.min(np.dot(priors.T, cost.get_matrix()))
        if 'abs' in costn:
            perc_abs = np.sum(decisions == K) / len(decisions) * 100
            print(f"{perc_abs:5.0f}{sep:s}", end='')
    
    print('')



Best naive decision for C01    : 1
Best naive decision for CinvP  : 9
Best naive decision for Cimp   : 10
Best naive decision for Cabs1  : 11
Best naive decision for Cabs2  : 1

Decisions    C01            CinvP          Cimp           Cabs1         %Abs    Cabs2         %Abs   
Naive        0.20   1.00    0.90   1.00    0.98   1.00    0.05   1.00    100    0.20   1.00      0  
Argmax       0.06   0.32    0.28   0.31    0.36   0.37    0.06   1.29      0    0.06   0.32      0  
Bayes        0.06   0.32    0.23   0.26    0.08   0.08    0.02   0.35     25    0.06   0.28      7  
