In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import Counter
from statsmodels.stats.inter_rater import fleiss_kappa

ModuleNotFoundError: No module named 'statsmodels'

In [None]:
# Read in the CSV file
file_path = 'annotationsManual_ALL.csv'

data = pd.read_csv(file_path)
print(data.shape[0])
data.head()

# Majority Voting to select the labels with Calculate Fleiss' Kappa
Use statsmodels to Calculate Fleiss' Kappa:

Fleiss' Kappa is calculated using the statsmodels library, which accepts a NumPy array of counts.
Interpret Results:

- < 0.0: Poor agreement.
- 0.01–0.20: Slight agreement.
- 0.21–0.40: Fair agreement.
- 0.41–0.60: Moderate agreement.
- 0.61–0.80: Substantial agreement.
- 0.81–1.00: Almost perfect agreement.


In [None]:
def create_count_table(data, attribute_prefix):
    # Collect unique possible values for the attribute
    possible_values = data.filter(like=attribute_prefix).melt()['value'].dropna().unique()
    
    # Initialize the count table with images as the index
    count_table = pd.DataFrame(0, index=data['Image'], columns=possible_values)
    
    # Count occurrences of each label from the annotators
    for column in data.filter(like=attribute_prefix).columns:
        temp_counts = pd.get_dummies(data[column], prefix='', prefix_sep='').groupby(data['Image']).sum()
        count_table = count_table.add(temp_counts, fill_value=0)
    
    return count_table


# Create count tables for Gender, Age, and Ethnicity
gender_counts = create_count_table(data, 'Gender')
age_counts = create_count_table(data, 'Age')
ethnicity_counts = create_count_table(data, 'Ethnicity')
ethnicity_counts

In [None]:
from statsmodels.stats.inter_rater import fleiss_kappa

# Calculate Fleiss' Kappa for Gender
gender_kappa = fleiss_kappa(gender_counts.to_numpy())
print(f"Fleiss' Kappa for Gender: {gender_kappa}")

# Calculate Fleiss' Kappa for Age
age_kappa = fleiss_kappa(age_counts.to_numpy())
print(f"Fleiss' Kappa for Age: {age_kappa}")

# Calculate Fleiss' Kappa for Ethnicity
ethnicity_kappa = fleiss_kappa(ethnicity_counts.to_numpy())
print(f"Fleiss' Kappa for Ethnicity: {ethnicity_kappa}")


We have for each categories substantial agreement.    - These values suggest that annotators have a reasonable level of consistency, particularly for gender, which has the highest agreement.

In [None]:
def majority_vote_with_ambiguity(data, attribute_prefix):
    # Select relevant columns based on the attribute prefix (e.g., Age, Gender, Ethnicity)
    relevant_columns = [col for col in data.columns if col.startswith(attribute_prefix)]
    
    # Create a list to store final labels with ambiguity check
    final_labels = []
    
    for index, row in data[relevant_columns].iterrows():
        # Count the frequency of each label in the row
        label_counts = row.value_counts()
        max_count = label_counts.max()
        
        # Check for ambiguity (more than one label has the same maximum count)
        if (label_counts == max_count).sum() > 1:
            final_labels.append("Ambiguous")  # Flag as ambiguous
        else:
            # Otherwise, select the label with the maximum count
            final_labels.append(label_counts.idxmax())
    
    # Return the final labels as a Series
    return pd.Series(final_labels, index=data.index)

# Apply the majority voting with ambiguity handling
data['Final_Age'] = majority_vote_with_ambiguity(data, 'Age')
data['Final_Gender'] = majority_vote_with_ambiguity(data, 'Gender')
data['Final_Ethnicity'] = majority_vote_with_ambiguity(data, 'Ethnicity')

# Save the reset DataFrame to a CSV file
final_labels_file_path = 'FinalManualLabels.csv'  # Update the path as needed
data.to_csv(final_labels_file_path, index=False)

data.head()

In [None]:
# Plot 1: Bar Plot of Final Labels for Gender, Ethnicity, and Age
fig, ax = plt.subplots(1, 3, figsize=(18, 5))

# Final Gender Label Distribution
data['Final_Gender'].value_counts().plot(kind='bar', ax=ax[0], color='black')
ax[0].set_title("Final Gender Label Distribution")
ax[0].set_xlabel("Gender")
ax[0].set_ylabel("Count")

# Final Ethnicity Label Distribution
data['Final_Ethnicity'].value_counts().plot(kind='bar', ax=ax[1], color='black')
ax[1].set_title("Final Ethnicity Label Distribution")
ax[1].set_xlabel("Ethnicity")
ax[1].set_ylabel("Count")

# Final Age Label Distribution
data['Final_Age'].value_counts().plot(kind='bar', ax=ax[2], color='black')
ax[2].set_title("Final Age Label Distribution")
ax[2].set_xlabel("Age Group")
ax[2].set_ylabel("Count")

plt.tight_layout()
plt.show()


In [None]:
df= data.copy()

# Define the annotator columns
annotator_columns = [
    ('age', ['Age_A1', 'Age_A2', 'Age_A3']),
    ('gender', ['Gender_A1', 'Gender_A2', 'Gender_A3']),
    ('ethnicity', ['Ethnicity_A1', 'Ethnicity_A2', 'Ethnicity_A3'])
]

# Function to plot grouped horizontal bar charts for annotations by category and annotator
def plot_grouped_bar_horizontal(attribute, annotator_cols):
    # Count frequencies of each category for each annotator
    category_counts = pd.DataFrame(columns=['Annotator', 'Category', 'Count'])
    for col in annotator_cols:
        counts = df[col].value_counts().reset_index()
        counts.columns = ['Category', 'Count']
        counts['Annotator'] = col
        category_counts = pd.concat([category_counts, counts], ignore_index=True)
    
    # Plot grouped horizontal bar chart
    plt.figure(figsize=(8, 5))
    sns.barplot(
        data=category_counts, 
        y='Category', 
        x='Count', 
        hue='Annotator', 
        palette='Set2',
        orient='h'
    )
    plt.title(f"{attribute.capitalize()} Annotations by Annotator", fontsize=12)
    plt.ylabel(f"{attribute.capitalize()} Categories", fontsize=10)
    plt.xlabel("Frequency", fontsize=10)
    plt.legend(title="Annotators", fontsize=8)
    plt.tight_layout()
    plt.show()

# Loop through the attributes and their corresponding columns for horizontal bar charts
for attribute, annotator_cols in annotator_columns:
    plot_grouped_bar_horizontal(attribute, annotator_cols)


#  Ambiguous Cases

In [None]:
import os
import matplotlib.pyplot as plt
from PIL import Image

# Set base directory path
base_dir = "../datasets/AffectNet/Test"  # Replace with the actual path

# Set up the grid dimensions
fig, axes = plt.subplots(7, 6, figsize=(15, 12))  

# Flatten the axes array for easy iteration
axes = axes.flatten()

# Track the index of the image we're displaying
for idx, (index, row) in enumerate(data.iterrows()):
    if idx >= 42:  # Stop after displaying 42 images (6x7 grid)
        break

    image_name = row["Image"]
    folder_name = row["FolderNames"]
    
     # Construct the full image path
    image_path = os.path.join(base_dir, folder_name, image_name)
    
    # Try to open the image file
    if os.path.exists(image_path):
        img = Image.open(image_path)
    else:
        print(f"Image not found: {image_name} in folder {folder_name}")
        continue  # Skip to the next image if the file does not exist

    # Display the image on the grid
    axes[idx].imshow(img)
    axes[idx].axis("off")
    
    # Prepare title with age, gender, and ethnicity information
    age = row["Final_Age"]
    gender = row["Final_Gender"]
    ethnicity = row["Final_Ethnicity"]

    # Check if any label is "Ambiguous" and adjust style accordingly
    if age == "Ambiguous" or gender == "Ambiguous" or ethnicity == "Ambiguous":
        # Highlight ambiguous labels in red with larger font
        title = f"{age}\n{gender}\n{ethnicity}"
        axes[idx].set_title(title, fontsize=12, color="orange", fontweight="bold")
    else:
        # Regular title for non-ambiguous labels
        title = f"{age}\n{gender}\n{ethnicity}"
        axes[idx].set_title(title, fontsize=10, color="black")

# Hide any remaining empty subplots
for j in range(idx + 1, len(axes)):
    axes[j].axis("off")

# Add a main title for the entire figure
fig.suptitle("Final Labels After Majority Voting", fontsize=16, fontweight="bold")

plt.tight_layout(rect=[0, 0.03, 1, 0.95])  # Adjust layout to fit the main title
plt.show()

 - The ambiguous labels were highlighted in the grid plot, indicating cases where the annotators did not agree on a majority label.

In [None]:
# Filter ambiguous cases
ambiguous_cases = data[(data['Final_Age'] == "Ambiguous") |
                       (data['Final_Gender'] == "Ambiguous") |
                       (data['Final_Ethnicity'] == "Ambiguous")]

# Count ambiguous cases by attribute
ambiguous_summary = {
    "Age": (data['Final_Age'] == "Ambiguous").sum(),
    "Gender": (data['Final_Gender'] == "Ambiguous").sum(),
    "Ethnicity": (data['Final_Ethnicity'] == "Ambiguous").sum()
}

print("Ambiguous Cases Summary:")
print(ambiguous_summary)

# Display ambiguous cases for review
ambiguous_cases.head()


# Annotation Bias

With a low sample size, it's more likely that the ambiguity stems from inherent challenges in the images themselves rather than systematic biases among annotators. Overlapping characteristics in certain categories (e.g., age or ethnicity) can make annotation inherently subjective. For the woman’s case, subjectivity in age perception plays a major role, while for the baby’s case, the lack of distinct ethnic features contributes to the disagreement.

Since we do not have access to the true labels for these pictures, the best course of action is to accept the final labels, including ambiguous ones, as the ground truth for this dataset. This approach assumes that the observed annotations and their associated majority voting outcomes, even when ambiguous, represent the most reliable consensus available given the data.