In [1]:
import _base_path
import json
import numpy as np
import pandas as pd
from resources.data_io import load_mappings

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from typing import Union, Iterable

In [2]:
DATA  = 'incidents'
LABEL = 'hazard_category'

# Load Class-Mappings:

In [3]:
class_map = load_mappings(f'../data/{DATA}/splits', LABEL)
class_map

array(['allergens', 'biological', 'chemical',
       'food additives and flavourings', 'food contact materials',
       'foreign bodies', 'fraud', 'migration', 'organoleptic aspects',
       'other hazard', 'packaging defect'], dtype='<U30')

In [4]:
with open('../data/incidents/support_zones.json', 'r') as file:
    high_support, low_support = json.load(file)[LABEL]

In [5]:
high_support

['biological']

In [6]:
low_support

['foreign bodies',
 'chemical',
 'fraud',
 'other hazard',
 'packaging defect',
 'organoleptic aspects',
 'food additives and flavourings',
 'migration',
 'food contact materials']

In [7]:
counts = pd.read_csv('data/incidents/incidents_final.csv')[LABEL].value_counts()

class_map = list(zip(
    class_map,
    range(len(class_map)),
    [counts[c] if c in counts else 0 for c in class_map]
))
class_map.sort(key=lambda row:row[2], reverse=True)
class_map

[('biological', 1, 2579),
 ('allergens', 0, 2553),
 ('foreign bodies', 5, 946),
 ('chemical', 2, 584),
 ('fraud', 6, 538),
 ('other hazard', 9, 189),
 ('packaging defect', 10, 101),
 ('organoleptic aspects', 8, 81),
 ('food additives and flavourings', 3, 32),
 ('migration', 7, 14),
 ('food contact materials', 4, 1)]

# Load Results:

In [11]:
results = pd.read_csv(f'results/palm/palm-{LABEL}.csv').fillna('')
results['label'] = [l for l in results['label']]

results = results[['label', 'baseline_all', 'baseline_lim', 'conformal']]

results.head()

Unnamed: 0,cv_split,label,baseline_all,baseline_all_norm,baseline
0,0,fraud,misbranding,other hazard,misbranding
1,0,allergens,allergens,allergens,allergens
2,0,fraud,fraud,fraud,fraud
3,0,allergens,allergens,allergens,allergens
4,0,foreign bodies,foreign bodies,foreign bodies,foreign bodies


In [9]:
def calculate_metrics(classes=[c.lower() for c, _, _ in class_map]):
    metrics = {}

    for col in results.drop(columns=['cv_split', 'label']).columns:
        f1        = []
        recall    = []
        precision = []
        accuracy  = []

        for i in range(5):
            r = results[results['cv_split'] == i][['label', col]].values
            
            y_true, y_pred = [], []
            for labels, prediction in r:
                y_true.append([c in labels for c in classes])
                y_pred.append([c.lower() == prediction.lower() for c in classes])
                #y_pred.append([all([t in prediction.lower() for t in c.split()]) or all([t.lower() in c for t in prediction.split()]) for c in classes])
            y_true = np.array(y_true, dtype=float)
            y_pred = np.array(y_pred, dtype=float)

            f1.append(f1_score(y_true, y_pred, average='macro'))
            recall.append(recall_score(y_true, y_pred, average='macro'))
            precision.append(precision_score(y_true, y_pred, average='macro'))
            accuracy.append(accuracy_score(y_true, y_pred))

        metrics[col] = {
            'f1':        (np.mean(f1), np.std(f1)),
            'recall':    (np.mean(recall), np.std(recall)),
            'precision': (np.mean(precision), np.std(precision)),
            'accuracy':  (np.mean(accuracy), np.std(accuracy))
        }
    return metrics

In [None]:
metrics_all = calculate_metrics()
metrics_high_support = calculate_metrics(high_support)
metrics_low_support = calculate_metrics(low_support)

In [11]:
def print_table(metrics:Iterable[str] = ['f1', 'accuracy']):
    for model in metrics_all:
        row =  f'{model.upper()} &\n'

        if model in metrics_all:
            row += ' & '.join([f'${metrics_all[model][metric][0]:.2f} \pm {metrics_all[model][metric][1]:.2f}$' for metric in metrics])
        else:
            row += ' &'*(len(metrics)-1)
            
        row += ' &\n'

        if model in metrics_high_support:
            row += ' & '.join([f'${metrics_high_support[model][metric][0]:.2f} \pm {metrics_high_support[model][metric][1]:.2f}$' for metric in metrics])
        else:
            row += ' &'*(len(metrics)-1)

        row += ' &\n'

        if model in metrics_high_support:
            row += ' & '.join([f'${metrics_low_support[model][metric][0]:.2f} \pm {metrics_low_support[model][metric][1]:.2f}$' for metric in metrics])
        else:
            row += ' &'*(len(metrics)-1)
        row += ' \\\\\n'
        print(row)

In [12]:
print_table()

BASELINE &
$0.14 \pm 0.01$ & $0.39 \pm 0.01$ &
$0.66 \pm 0.02$ & $0.81 \pm 0.01$ &
$0.12 \pm 0.01$ & $0.55 \pm 0.02$ \\

CONFORMAL &
$0.14 \pm 0.01$ & $0.39 \pm 0.01$ &
$0.67 \pm 0.02$ & $0.82 \pm 0.01$ &
$0.12 \pm 0.01$ & $0.54 \pm 0.02$ \\



# Failure analysis:

In [13]:
results = pd.read_csv(f'results/palm/palm-{LABEL}.csv').drop(columns=['Unnamed: 0', 'prompt_all', 'prompt_limited', 'prompt_conformal_20%']).fillna('')
label_types = set(results.label.values)
label_types

{'2-chloroethanol',
 '2-chloroethanol and ethylene oxide',
 'abnormal colour',
 'abnormal smell',
 'absence of expiry/use by dates',
 'absence of labelling',
 'addition',
 'adulteration (ema)',
 'adverse reaction',
 'aeromonas hydrophila',
 'aflatoxin',
 'alcohol content',
 'algae',
 'aliphatic hydrocarbons',
 'alkaloids',
 'allergens',
 'allergic reaction',
 'almond',
 'altered organoleptic characteristics',
 'aluminium',
 'amygdalin',
 'animal matter',
 'anthraquinone',
 'antibiotics, vet drugs',
 'appearance',
 'apple stems',
 'arsenic',
 'atropine',
 'attempt to illegally import',
 'azinphos-methyl',
 'bacillus cereus',
 'bacillus cytotoxicus',
 'bacillus spp.',
 'bad smell / off odor',
 'barley',
 'benzo(a)pyrene',
 'biocontaminants',
 'biological',
 'biotoxins (other)',
 'bone fragment',
 'botulinum toxin',
 'brazil nut',
 'breakage',
 'bromate',
 'bse',
 'bulging packaging',
 'bursting possibility of bottle due to pressure build-up by secondary fermentation',
 'campylobacter col

Baseline:

In [29]:
f1 = [f1_score(
    results[results.cv_split == i].label.values,
    results[results.cv_split == i].baseline.apply(lambda p: p if p in label_types else 'fail'),
    average='macro'
) for i in range(5)]

acc = [accuracy_score(
    results[results.cv_split == i].label.values,
    results[results.cv_split == i].baseline.apply(lambda p: p if p in label_types else 'fail')
) for i in range(5)]

fail = [np.mean(
    results[results.cv_split == i].baseline.apply(lambda p: not (p in label_types))
) for i in range(5)]

print(f'F1:       {np.mean(f1):.2f} \u00b1 {np.std(f1):.2f}')
print(f'Accuracy: {np.mean(acc):.2f} \u00b1 {np.std(acc):.2f}')
print(f'Failed:   {np.mean(fail):.2f} \u00b1 {np.std(fail):.2f}')

F1:       0.26 ± 0.02
Accuracy: 0.42 ± 0.01
Failed:   0.23 ± 0.02


Conformal:

In [30]:
f1 = [f1_score(
    results[results.cv_split == i].label.values,
    results[results.cv_split == i].conformal.apply(lambda p: p if p in label_types else 'fail'),
    average='macro'
) for i in range(5)]

acc = [accuracy_score(
    results[results.cv_split == i].label.values,
    results[results.cv_split == i].conformal.apply(lambda p: p if p in label_types else 'fail')
) for i in range(5)]

fail = [np.mean(
    results[results.cv_split == i].conformal.apply(lambda p: not (p in label_types))
) for i in range(5)]

print(f'F1:       {np.mean(f1):.2f} \u00b1 {np.std(f1):.2f}')
print(f'Accuracy: {np.mean(acc):.2f} \u00b1 {np.std(acc):.2f}')
print(f'Failed:   {np.mean(fail):.2f} \u00b1 {np.std(fail):.2f}')

F1:       0.26 ± 0.01
Accuracy: 0.44 ± 0.01
Failed:   0.15 ± 0.01
