Direct comparison of expert and crowdsourced image annotations of the same imagery

Analytic code supporting "Observer variability in manual-visual interpretation of aerial imagery of wildlife, with implications for deep learning" - Converse et al. submitted Feb 2024

In [None]:
import csv
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import box
from sklearn.metrics import confusion_matrix
import pandas as pd

#Load image annotations from the two observer groups
crowd_labels_file = 'path/to/crowdlabels.csv'
expert_labels_file = 'path/to/expertlabels.csv'

In [None]:
# This function takes four arguments: the x and y coordinates of the top-left corner of a bounding box (x1, y1), 
# the width (w), and the height (h) of the bounding box. It returns a list of four values: the x and y coordinates 
# of the top-left corner of the bounding box (x1, y1), and the x and y coordinates of the bottom-right corner of the 
# bounding box (x1 + w, y1 + h), in the format required by the Pascal VOC dataset.

def coco_to_pascal_voc(x1, y1, w, h):
    return [x1,y1, x1 + w, y1 + h]

In [None]:
# Define dictionaries to map class IDs to class names
class_map1 = {1: 'Crane', 2: 'Goose', 3: 'Duck'}
class_map2 = {1: 'Crane', 2: 'Goose', 3: 'Duck'}

# Read in the labels from file 1
crowd_labels = defaultdict(list)

with open(crowd_labels_file) as csvfile:
    reader = csv.reader(csvfile)
    next(reader)  # skip header
    for row in reader:
        if len(row)>0:
            label_id, filename, bbox, class_id = row
            bbox = [float(x) for x in bbox[1:-1].split(',')]
            bbox=coco_to_pascal_voc(*bbox)
            crowd_labels[filename].append((label_id, bbox, int(class_id)))


# # Read in the labels from file 2
expert_labels = defaultdict(list)
with open(expert_labels_file) as csvfile:
    reader = csv.reader(csvfile)
    next(reader)  # skip header
    for row in reader:
        if len(row)>0:
            label_id, filename, bbox, class_id = row
            filename=filename.replace('.JPG','')
            bbox = [float(x) for x in bbox[1:-1].split(',')]
            bbox=coco_to_pascal_voc(*bbox)
            expert_labels[filename].append((label_id, bbox, int(class_id)))


In [None]:
# Compute the confusion matrix

# Define the IOU threshold and read in the labels
iou_thresh = 0.5

# Initialize empty lists to store matching labels
truth_list=[]
test_list=[]

# Iterate through files that have labels from both experts and volunteers
for filename in set(crowd_labels.keys()) & set(expert_labels.keys()):
    
    # Check if the current file has labels from both sources
    if filename in crowd_labels and filename in expert_labels:
        
        # Iterate through each label from the expert
        for expert_label in expert_labels[filename]:
            
            # Iterate through each label from the crowdworker
            for crowd_label in crowd_labels[filename]:

                # Convert the bounding box coordinates into a shapely object
                bbox1 = box(*crowd_label[1])
                bbox2 = box(*expert_label[1])
                
                # Compute the intersection over union (IOU) between the two bounding boxes
                iou = bbox1.intersection(bbox2).area / bbox1.union(bbox2).area
                
                # If the IOU is above the threshold, consider the labels a "match" and add them to the list
                if iou >= iou_thresh:
                    truth_list.append(expert_label[2])
                    test_list.append(crowd_label[2])

# Compute a confusion matrix
conf_matrix=confusion_matrix(truth_list,test_list)

# Print the confusion matrix
print('Confusion Matrix:')
print(conf_matrix)

In [None]:
#Visualizing the confusion matrix

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate percentages from the confusion matrix
total_samples = np.sum(conf_matrix)
conf_matrix_percent = conf_matrix / total_samples * 100

# Create the confusion matrix visualization
fig, ax = plt.subplots(figsize=(10, 10))
sns.heatmap(conf_matrix, annot=False, fmt='g', cmap='Blues', ax=ax)

# Add percentages to the confusion matrix cells
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        value = conf_matrix[i, j]
        percent = conf_matrix_percent[i, j]
        text = f'{value} ({percent:.1f}%)'
        color = 'white' if percent > 50 else 'black'
        ax.text(j + 0.5, i + 0.5, text,
                ha='center', va='center', fontsize=20, color=color, weight='bold')

# Add labels and title with increased font size
ax.set_xlabel('Expert Labels', fontsize=28)
ax.set_ylabel('Crowdsourced Labels', fontsize=28)
ax.set_title('IOU Threshold: 0.5', fontsize=28)

# Add class names as tick labels with increased font size
ax.set_xticklabels(list(class_map2.values()), fontsize=28)
ax.set_yticklabels(list(class_map1.values()), fontsize=28)

plt.rcParams.update({'font.size': 28})
plt.show()

In [None]:
#T-test to determine significance of difference between the two groups' consensus counts

import pandas as pd
from scipy.stats import ttest_rel

# Read the CSV files into pandas DataFrames
group1_data = pd.read_csv(expert_labels_file)
group2_data = pd.read_csv(crowd_labels_file)

# Find the images that exist in both groups
common_images = set(group1_data['filename']).intersection(set(group2_data['filename']))

# Filter the data to include only the common images
group1_filtered = group1_data[group1_data['filename'].isin(common_images)]
group2_filtered = group2_data[group2_data['filename'].isin(common_images)]

# Calculate the number of bounding boxes per image for each group
group1_counts = group1_filtered.groupby('filename').size()
group2_counts = group2_filtered.groupby('filename').size()

# Perform the paired t-test
t_statistic, p_value = ttest_rel(group1_counts, group2_counts)

# Print the results
print("Paired t-test results:")
print(f"T-statistic: {t_statistic:.4f}")
print(f"P-value: {p_value:.4f}")