In [None]:
import pandas as pd


# Staff


In [None]:
data_staff = pd.read_csv('data/annotations/staff_annotations.csv', sep=';').dropna()
data_staff.head(5)


In [None]:
from sklearn.metrics import cohen_kappa_score


# Compute Cohen's Kappa using the cohen_kappa_score function from scikit-learn
kappa = cohen_kappa_score(data_staff['nlabel_staff1'], data_staff['nlabel_staff2'])

print(f"Pairs len {len(data_staff)}")
print(kappa)


print(f"Pairs with agreement {len(data_staff[data_staff.nlabel_staff1 == data_staff.nlabel_staff2])} --> {round(len(data_staff[data_staff.nlabel_staff1 == data_staff.nlabel_staff2])/len(data_staff), 2)}")

# Crowdsourcing

In [None]:
data_mturk = pd.read_csv('data/annotations/mturk_annotations.csv', sep=';').set_index(
    ["set_id", "ver_id", "reference_yt_id", "candidate_yt_id", "sample_group"])
data_mturk.head(5)


In [None]:
def fleiss_kappa_pivot(df_hit_pivot: pd.DataFrame):
    from statsmodels.stats.inter_rater import fleiss_kappa, aggregate_raters

    try:
        arr, categories = aggregate_raters(df_hit_pivot.dropna().astype('int64'), 4)
        return round(fleiss_kappa(arr), 2)
    except ValueError:
        return None


def krippendorff_pivot(df_hit_pivot: pd.DataFrame):
    import krippendorff
    df_hit_pivot = df_hit_pivot[[f"worker_ind{i}" for i in range(5)]]
    try:
        return round(krippendorff.alpha(df_hit_pivot.T, level_of_measurement='ordinal'), 2)
    except AssertionError:
        return None
    except ValueError:
        return None
    
krippendorff_pivot(data_mturk)


# Evaluation of MTurk against Staff
Here we evaluate MTurk annotations using staff annotations as ground truth.
First, we load the file and aggregate by majority vote.

In [None]:
data_mturk_staff = pd.read_csv('data/annotations/mturk_staff.csv', sep=';')

def attach_mv_labels(data):
    
    from scipy import stats
    
    # filter MTurk vote columns
    data_votes = data[["worker_ind" + str(i) for i in range(5)]]
    
    mode, count = stats.mode(data_votes.T)
    
    import numpy as np
    
    data["mv_nlabel"] = np.where(count[0] > 2, mode[0], np.nan)
    
    return data

data_mturk_staff = attach_mv_labels(data_mturk_staff)

data_mturk_staff = pd.merge(data_mturk_staff.iloc[:,1:], data_staff[["set_id", "candidate_yt_id", "nlabel_staff1", "nlabel_staff2"]], 
        on=["set_id", "candidate_yt_id"], how='left')

data_mturk_staff = data_mturk_staff.query("~nlabel_staff1.isna() & ~nlabel_staff2.isna()")

print(f"Videos all: {len(data_mturk_staff)}")




In [None]:
alpha = krippendorff_pivot(data_mturk_staff[
    ["worker_ind0", "worker_ind1", "worker_ind2", "worker_ind3", "worker_ind4"]])


print(f"Pairs len {len(data_mturk_staff)}")
print(f"alpha {alpha}")
kappa = cohen_kappa_score(data_mturk_staff["nlabel_staff1"], data_mturk_staff["nlabel_staff2"])
print(f"kappa {kappa}")


print(f"with MV Label {len(data_mturk_staff.query('~mv_nlabel.isna()'))}")
print(f"with MV Label {round(len(data_mturk_staff.query('~mv_nlabel.isna()'))/len(data_mturk_staff), 2)}")

data_mturk_staff_with_mv = data_mturk_staff.query('~mv_nlabel.isna()')
kappa = cohen_kappa_score(data_mturk_staff_with_mv["mv_nlabel"], data_mturk_staff_with_mv["nlabel_staff"])

print(f"Kappa MV Label and Students: {round(kappa, 2)}")


In [None]:
data = pd.read_csv("data/SHS-YT.csv", sep=';')

def rename_sample_groups(x):
    if x != x: # this works to detect nans
        return x
    else:
        return x.replace("re-move_favs", 
                         "DisAgrMus").replace("ditto_favs", 
                                     "DisAgrTxt").replace("mutual_unconfident", "MutUnc")

# Create a list of worker columns
worker_cols = ['worker_ind0', 'worker_ind1', 'worker_ind2', 'worker_ind3', 'worker_ind4']

# Create a new dataframe with the relevant columns
pivot_data = data[worker_cols + ['sample_group', 'label']]


# Group by 'sample_group' and 'mv_nlabel' and count occurrences
pivot_table = pivot_data.groupby(['sample_group', 'label']).size().unstack(fill_value=0)


custom_sort_order = ['Match', 'Version', 'Other', 'No Music']

# Reindex the DataFrame based on the custom sort order
pivot_table = pivot_table.reindex(columns=custom_sort_order)

  
for sample_group in data.sample_group.unique():
    pivot_table.loc[sample_group, "Agreement"] = krippendorff_pivot(data.query(f"sample_group == '{sample_group}'"))
    pivot_table.loc[sample_group, "Expert Curated"] = int(data.query(f"sample_group == '{sample_group}'").label_expert.count())
    pivot_table.loc[sample_group, "Author Re-Annotated"] = int(data.query(f"sample_group == '{sample_group}' & (nlabel_expert != nlabel_mturk) & ~nlabel_expert.isna()").label_expert.count())


pivot_table.index = pivot_table.index.to_series().apply(rename_sample_groups).fillna("None")
pivot_table = pivot_table.rename_axis("Group").rename_axis("Label", axis=1)
pivot_table

