In [1]:
import pandas as pd

from sklearn.metrics import cohen_kappa_score

# Load files:

In [2]:
data = pd.read_csv('ground_truth.csv', sep=';', index_col=0).fillna('')
data['ProdCat_true'] = [s[2:-2].split("' '")[0] for s in data['productCategory'].values]
data['HazCat_true'] = [s[2:-2].split("' '")[0] for s in data['hazardCategory'].values]
data = data[['text', 'ProdCat_true', 'HazCat_true']]

In [3]:
labels_annotators = pd.read_csv('sample-annotators.csv', sep=';', index_col=0).fillna('')
data['ProdCat_annotator'] = labels_annotators['productCategory']
data['HazCat_annotator'] = labels_annotators['hazardCategory']

In [4]:
labels_expert = pd.read_csv('sample-expert.csv', sep=';', index_col=0).fillna('')
data['ProdCat_expert'] = labels_expert['productCategory']
data['HazCat_expert'] = labels_expert['hazardCategory']

In [5]:
cols_sorted = list(data.columns)
cols_sorted.sort()
data = data[cols_sorted[::-1]]

In [6]:
data.to_csv('sample-combined.csv')
data

Unnamed: 0,text,ProdCat_true,ProdCat_expert,ProdCat_annotator,HazCat_true,HazCat_expert,HazCat_annotator
0,Notification - Parador Plus brand Bouillon Chi...,"soups, broths, sauces and condiments",poultry meat and poultry meat products,poultry meat and poultry meat products,biological,biological,biological
1,KP Snacks is recalling Popchips Veg Vibes Sea ...,prepared dishes and snacks,prepared dishes and snacks,prepared dishes and snacks,allergens,allergens,allergens
2,"Long Island City, NY - SHRI SHIVA Foods Inc. i...",herbs and spices,herbs and spices,herbs and spices,biological,biological,biological
3,PRA No. 2011/12883 Date published 25 Oct 2011 ...,cereals and bakery products,cereals and bakery products,cereals and bakery products,allergens,fraud,allergens
4,Recall of Batches of Seven Seas Omega-3 and Im...,"dietetic foods, food supplements, fortified foods","dietetic foods, food supplements, fortified foods","dietetic foods, food supplements, fortified foods",chemical,chemical,chemical
5,Recall of a Batch of The Wild Wok Vegan Gyoza ...,prepared dishes and snacks,prepared dishes and snacks,cereals and bakery products,foreign bodies,foreign bodies,foreign bodies
6,"WASHINGTON, March 1, 2015 – Oscar’s Hickory Ho...",meat and meat products (other than poultry),meat and meat products (other than poultry),meat and meat products (other than poultry),other hazard,biological,biological
7,Page Content Cerebos (Australia) Ltd has recal...,ices and desserts,ices and desserts,ices and desserts,foreign bodies,foreign bodies,foreign bodies
8,The Co-operative Group recalls certain batch c...,non-alcoholic beverages,non-alcoholic beverages,non-alcoholic beverages,organoleptic aspects,organoleptic aspects,organoleptic aspects
9,"WASHINGTON, Aug. 3, 2018 – Star Natural Meats,...",meat and meat products (other than poultry),meat and meat products (other than poultry),meat and meat products (other than poultry),fraud,fraud,allergens


# Cohen's $\kappa$:

## Annotators/Expert vs Ground Truth: 

In [7]:
print('Hazard Category: ', cohen_kappa_score(
    data['HazCat_annotator'].values,
    data['HazCat_true'].values
))

print('Product Category:', cohen_kappa_score(
    data['ProdCat_annotator'].values,
    data['ProdCat_true'].values
))

Hazard Category:  0.7609561752988048
Product Category: 0.6394230769230769


In [8]:
print('Hazard Category: ', cohen_kappa_score(
    data['HazCat_expert'].values,
    data['HazCat_true'].values
))

print('Product Category: ', cohen_kappa_score(
    data['ProdCat_expert'].values,
    data['ProdCat_true'].values
))

Hazard Category:  0.6356275303643725
Product Category:  0.6038415366146459


## Annotators vs Expert:

In [9]:
print('Hazard Category: ', cohen_kappa_score(
    data['HazCat_annotator'].values,
    data['HazCat_expert'].values
))

print('Product Category: ', cohen_kappa_score(
    data['ProdCat_annotator'].values,
    data['ProdCat_expert'].values
))

Hazard Category:  0.5873452544704264
Product Category:  0.7777777777777778
