Exploring annotator disagreements in Si-NLI

In [None]:
import pandas as pd
import numpy as np
np.random.seed(42)

In [None]:
# SI-NLI train set
train = pd.read_csv('data/SI-NLI/train.tsv', sep="\t")

In [None]:
# ratio of each label
train['label'].value_counts() / len(train)

In [None]:
len(train)

In [None]:
disagreements = []  # list of indices of disagreements
anns_labels = []  # list of [label, annotations, index] for each row
for i, row in train.iterrows():
	annotations = []
	for annotator in ['1', '2', '3']:
		an = row['annotation_'+annotator]
		if not pd.isnull(an):
			annotations.append(an)
	label = row['label'][0].upper()
	# check if there is any difference from label
	if any([a != label for a in annotations]):
		disagreements.append(i)
	anns_labels.append([label, annotations, i])

print(len(disagreements))

In [None]:
# get majority annotator annotations
majority_annotations_disagreements = []
for label, anns, i in anns_labels:
	# count how many times each annotation appears
	counts = {}
	for a in anns:
		if a not in counts:
			counts[a] = 0
		counts[a] += 1
	# get the most common annotation
	if len(counts) > 0:
		most_common = max(counts, key=counts.get)
	else :
		most_common = None
	majority_annotations_disagreements.append([label, most_common, i])

len(majority_annotations_disagreements)

In [None]:
# check if there are any disagreements between the majority annotations and the correct label
[a for a in majority_annotations_disagreements if a[0] != a[1] and a[1] is not None]

## Confusion matrix

In [None]:
LABELS = ['E', 'N', 'C']
confusion_matrix = np.zeros((len(LABELS), len(LABELS)))
for i, label in enumerate(LABELS):
	for j, ann in enumerate(LABELS):
		confusion_matrix[j, i] = len([a for a in anns_labels if a[0] == label and ann in a[1]])
		confusion_matrix[j, i] = sum([len([aa for aa in a[1] if aa == ann]) for a in anns_labels if a[0] == label])

In [None]:
confusion_matrix

In [None]:
len([a for a in anns_labels if a[0] == 'E' and 'N' in a[1]])

In [None]:
plt.figure(figsize=(5, 4))
cm = confusion_matrix
sns.heatmap(cm, annot=True, xticklabels=['implikacija', 'nevtralno', 'kontradikcija'], yticklabels=['implikacija', 'nevtralno', 'kontradikcija'], cmap='gray_r', vmax=600, fmt='g')
plt.xlabel('Napovedana oznaka')
plt.ylabel('Pravilna oznaka')
plt.title('Si-NLI validacijska')
plt.savefig(f'./images/sinli-cm-val.pdf', bbox_inches='tight')
plt.show()

Percentage of annotations that agree with the labels:

In [None]:
confusion_matrix.diagonal().sum() / confusion_matrix.sum()

Percentage of examples where annotators agree:

In [None]:
1 - len(disagreements) / len(train)

## Export unanimous samples of the train set to csv

In [None]:
# export unanimous as tsv

unanimous_df = train[~train.index.isin(disagreements)]
unanimous_df.to_csv('data_out/SI-NLI/train_unanimous.tsv', sep="\t", index=False)
