In [1]:
import pandas as pd
import numpy as np

In [2]:
crowd_df = pd.read_csv("../../data/crowd_data.tsv", sep="\t")
crowd_df.head()

Unnamed: 0,HITId,HITTypeId,Title,Reward,AssignmentId,WorkerId,AssignmentStatus,WorkTimeInSeconds,LifetimeApprovalRate,Input1ID,Input2ID,Input3ID,AnswerID,AnswerLabel,FixPosition,FixValue
0,1,7QT,Is this triple correct or incorrect?,$0.50,1,2133ICYWE97,Submitted,60,99%,wd:Q11621,wdt:P2142,792910554,1.0,CORRECT,,
1,1,7QT,Is this triple correct or incorrect?,$0.50,2,2133U7HKDLO,Submitted,40,40%,wd:Q11621,wdt:P2142,792910554,1.0,CORRECT,yes,yes
2,1,7QT,Is this triple correct or incorrect?,$0.50,3,928UJANWZ12,Submitted,50,98%,wd:Q11621,wdt:P2142,792910554,2.0,INCORRECT,,
3,1,7QT,Is this triple correct or incorrect?,$0.50,4,1726JMZQW,Submitted,80,70%,wd:Q11621,wdt:P2142,792910554,1.0,CORRECT,,
4,1,7QT,Is this triple correct or incorrect?,$0.50,5,2134U7HKDMM,Submitted,2,70%,wd:Q11621,wdt:P2142,792910554,1.0,CORRECT,,


In [3]:
def convert_percentage_to_float(percentage_string):
    """

    Convert a percentage string to a float.

    For example, '99%' becomes 0.99.

    """

    if isinstance(percentage_string, str):
        return float(percentage_string.strip("%")) / 100

    return percentage_string


crowd_df["LifetimeApprovalRate"] = crowd_df["LifetimeApprovalRate"].apply(
    convert_percentage_to_float
)

In [4]:
crowd_df.describe()

Unnamed: 0,HITId,AssignmentId,WorkTimeInSeconds,LifetimeApprovalRate,AnswerID
count,305.0,305.0,305.0,305.0,304.0
mean,31.0,153.0,108.8,0.747443,1.473684
std,17.635752,88.190136,95.548706,0.204038,0.50013
min,1.0,1.0,2.0,0.4,1.0
25%,16.0,77.0,4.0,0.69,1.0
50%,31.0,153.0,120.0,0.8,1.0
75%,46.0,229.0,200.0,0.98,2.0
max,61.0,305.0,337.0,0.99,2.0


In [5]:
crowd_df = crowd_df.drop(crowd_df[crowd_df.LifetimeApprovalRate < 0.5].index)
crowd_df = crowd_df.drop(crowd_df[crowd_df.WorkTimeInSeconds < 40].index)

In [6]:
crowd_df.describe()

Unnamed: 0,HITId,AssignmentId,WorkTimeInSeconds,LifetimeApprovalRate,AnswerID
count,183.0,183.0,183.0,183.0,183.0
mean,31.0,152.251366,174.065574,0.881257,1.508197
std,17.655121,88.228115,66.240206,0.100444,0.501304
min,1.0,1.0,50.0,0.7,1.0
25%,16.0,76.5,120.0,0.8,1.0
50%,31.0,152.0,200.0,0.85,2.0
75%,46.0,227.5,240.0,0.98,2.0
max,61.0,303.0,337.0,0.99,2.0


In [7]:
crowd_df.head()

Unnamed: 0,HITId,HITTypeId,Title,Reward,AssignmentId,WorkerId,AssignmentStatus,WorkTimeInSeconds,LifetimeApprovalRate,Input1ID,Input2ID,Input3ID,AnswerID,AnswerLabel,FixPosition,FixValue
0,1,7QT,Is this triple correct or incorrect?,$0.50,1,2133ICYWE97,Submitted,60,0.99,wd:Q11621,wdt:P2142,792910554,1.0,CORRECT,,
2,1,7QT,Is this triple correct or incorrect?,$0.50,3,928UJANWZ12,Submitted,50,0.98,wd:Q11621,wdt:P2142,792910554,2.0,INCORRECT,,
3,1,7QT,Is this triple correct or incorrect?,$0.50,4,1726JMZQW,Submitted,80,0.7,wd:Q11621,wdt:P2142,792910554,1.0,CORRECT,,
6,2,7QT,Is this triple correct or incorrect?,$0.50,7,2133ICYWE97,Submitted,120,0.99,wd:Q603545,wdt:P2142,4300000,1.0,CORRECT,,
7,2,7QT,Is this triple correct or incorrect?,$0.50,8,928UJANWZ12,Submitted,60,0.98,wd:Q603545,wdt:P2142,4300000,1.0,CORRECT,,


In [8]:
# Grouping the data by the input identifiers and finding the most common fix value
grouped_data = crowd_df.groupby(["HITId"])

# Revised function to apply the majority rule for correction
def apply_majority_correction(group):
    # Count the frequency of each fix value
    fix_value_counts = group['FixValue'].value_counts()

    # Determine the most common fix value
    most_common_fix = fix_value_counts.idxmax() if len(fix_value_counts) > 0 and fix_value_counts.iloc[0] > (len(group) / 2) else None

    # Apply the correction for each row in the group
    def apply_row_correction(row, common_fix):
        if common_fix is not None:
            row['CorrectedValue'] = common_fix
            # Check the fix position and apply the correction
            if row['FixPosition'] == 'Subject':
                row['Input1ID'] = common_fix
            elif row['FixPosition'] == 'Object':
                row['Input3ID'] = common_fix
            # Update the AnswerLabel to 'CORRECTED'
            row['AnswerLabel'] = 'CORRECTED'
        else:
            row['CorrectedValue'] = None

        return row

    return group.apply(apply_row_correction, common_fix=most_common_fix, axis=1)

# Apply the function to each group
corrected_data = grouped_data.apply(apply_majority_correction)
corrected_data = corrected_data.reset_index(drop=True)

# Display a sample of the corrected data
corrected_data.sample(10)


Unnamed: 0,HITId,HITTypeId,Title,Reward,AssignmentId,WorkerId,AssignmentStatus,WorkTimeInSeconds,LifetimeApprovalRate,Input1ID,Input2ID,Input3ID,AnswerID,AnswerLabel,FixPosition,FixValue,CorrectedValue
37,13,7QT,Is this triple correct or incorrect?,$0.50,62,928UJANWZ12,Submitted,240,0.98,wd:Q61928601,ddis:indirectSubclassOf,wd:Q95074,1.0,CORRECT,,,
168,57,9QT,Is this triple correct or incorrect?,$0.50,281,AALKMII97,Submitted,240,0.98,wd:Q223596,wdt:P1431,wd:Q457180,1.0,CORRECT,,,
58,20,7QT,Is this triple correct or incorrect?,$0.50,97,928UJANWZ12,Submitted,236,0.98,wd:Q15239622,ddis:indirectSubclassOf,wd:Q27096213,2.0,INCORRECT,,,
147,50,9QT,Is this triple correct or incorrect?,$0.50,246,AALKMII97,Submitted,240,0.98,wd:Q931557,wdt:P750,wd:Q80948336,1.0,CORRECT,,,
12,5,7QT,Is this triple correct or incorrect?,$0.50,21,2133ICYWE97,Submitted,120,0.99,wd:Q1628022,wdt:P577,1951-01-01,1.0,CORRECT,,,
6,3,7QT,Is this triple correct or incorrect?,$0.50,11,2133ICYWE97,Submitted,140,0.99,wd:Q16911843,wdt:P577,2014-01-18,2.0,INCORRECT,Object,2014-02-18,
116,39,8QT,Is this triple correct or incorrect?,$0.50,194,GGUI83657S,Submitted,120,0.85,wd:Q814781,wdt:.P344,wd:Q40087803,1.0,CORRECT,,,
54,19,7QT,Is this triple correct or incorrect?,$0.50,91,2133ICYWE97,Submitted,140,0.99,wd:Q15715406,ddis:indirectSubclassOf,wd:Q27096213,2.0,INCORRECT,,,
87,30,8QT,Is this triple correct or incorrect?,$0.50,146,AALKMII98,Submitted,238,0.98,wd:Q23999890,wdt:P577,2015-01-05,1.0,CORRECT,,,
154,52,9QT,Is this triple correct or incorrect?,$0.50,257,HHCKW1111,Submitted,200,0.8,wd:Q696646,wdt:P3174,wd:Q1315917,1.0,CORRECT,,,


In [9]:
# Recalculate the majority vote
majority_vote = corrected_data.groupby("HITId")["AnswerLabel"].agg(
    lambda x: x.value_counts().index[0]
)

final_answers = majority_vote.reset_index()

# Get the distribution of answers for each HITId
answer_distribution = (
    corrected_data.groupby("HITId")["AnswerLabel"].value_counts().unstack(fill_value=0)
)

# Join the majority vote table with the answer distribution
final_answers_with_distribution = final_answers.join(answer_distribution, on="HITId")

# Include the correct inputs
correct_inputs = (
    corrected_data[corrected_data["AnswerLabel"] != "INCORRECT"]
    .groupby("HITId")
    .first()[["Input1ID", "Input2ID", "Input3ID"]]
)

final_answers_with_distribution = final_answers_with_distribution.join(
    correct_inputs, on="HITId"
)

# Adding the batch identifier (HITTypeId) to the final table
final_answers_with_batch = final_answers_with_distribution.join(
    corrected_data[["HITId", "HITTypeId"]].drop_duplicates().set_index("HITId"), on="HITId"
)

# Display the updated table with HITTypeId
final_answers_with_batch.sample(10)

Unnamed: 0,HITId,AnswerLabel,CORRECT,CORRECTED,INCORRECT,Input1ID,Input2ID,Input3ID,HITTypeId
19,20,INCORRECT,1,0,2,wd:Q15239622,ddis:indirectSubclassOf,wd:Q27096213,7QT
44,45,CORRECTED,0,3,0,wd:Q1032889,wdt:P58,wd:Q4762311,9QT
18,19,INCORRECT,1,0,2,wd:Q15715406,ddis:indirectSubclassOf,wd:Q27096213,7QT
35,36,CORRECTED,0,3,0,wd:Q28974159,wdt:P750,wd:Q907311,8QT
7,8,INCORRECT,0,0,3,,,,7QT
52,53,CORRECT,2,0,1,wd:Q6782400,wdt:P945,wd:Q17,9QT
48,49,CORRECT,2,0,1,wd:Q4335275,wdt:P520,wd:Q52382294,9QT
11,12,CORRECT,3,0,0,wd:Q104649845,ddis:indirectSubclassOf,wd:Q43229,7QT
30,31,CORRECT,2,0,1,wd:Q841233,wdt:P2142,10696220,8QT
23,24,CORRECTED,0,3,0,wd:Q427386,wdt:P2142,176997168,8QT


In [10]:
from statsmodels.stats.inter_rater import fleiss_kappa

# Grouping data by HITTypeId and then by HITId within each group, and counting the answers
grouped_data = crowd_df.groupby(['HITTypeId', 'HITId', 'AnswerLabel']).size().unstack(fill_value=0)

# Calculating Fleiss' Kappa for each HITTypeId
fleiss_kappa_results = {}
for hit_type_id, group in grouped_data.groupby(level=0):
    contingency_table = group.reset_index(level=0, drop=True)  # Drop the HITTypeId index
    kappa = fleiss_kappa(contingency_table)
    fleiss_kappa_results[hit_type_id] = kappa

# Converting results to a DataFrame for better presentation
fleiss_kappa_df = pd.DataFrame(list(fleiss_kappa_results.items()), columns=['HITTypeId', 'FleissKappa'])

fleiss_kappa_df.head()

Unnamed: 0,HITTypeId,FleissKappa
0,7QT,0.236364
1,8QT,0.04
2,9QT,0.19911


In [11]:
final_joined_df = final_answers_with_batch.merge(fleiss_kappa_df, on='HITTypeId').reset_index(drop=True)
final_joined_df.sample(10)


Unnamed: 0,HITId,AnswerLabel,CORRECT,CORRECTED,INCORRECT,Input1ID,Input2ID,Input3ID,HITTypeId,FleissKappa
20,21,INCORRECT,1,0,2,wd:Q708135,ddis:indirectSubclassOf,wd:Q618779,7QT,0.236364
55,56,CORRECT,2,0,1,wd:Q10623856,wdt:P20,wd:Q2096,9QT,0.19911
28,29,CORRECTED,0,3,0,wd:Q639070,wdt:P161,Q5423258,8QT,0.04
30,31,CORRECT,2,0,1,wd:Q841233,wdt:P2142,10696220,8QT,0.04
37,38,CORRECTED,0,3,0,wd:Q885281,wdt:P161,wd:Q3087184,8QT,0.04
27,28,CORRECT,3,0,0,wd:Q15055043,wdt:P2142,511200000,8QT,0.04
29,30,CORRECT,3,0,0,wd:Q23999890,wdt:P577,2015-01-05,8QT,0.04
44,45,CORRECTED,0,3,0,wd:Q1032889,wdt:P58,wd:Q4762311,9QT,0.19911
43,44,CORRECTED,0,3,0,wd:Q2188914,wdt:P161,Q17350908,9QT,0.19911
50,51,CORRECT,3,0,0,wd:Q2235250,wdt:P161,wd:Q599673,9QT,0.19911


In [12]:
final_only_correct = final_joined_df.drop(final_joined_df[final_joined_df.AnswerLabel == "INCORRECT"].index)
final_only_correct.sample(10)

Unnamed: 0,HITId,AnswerLabel,CORRECT,CORRECTED,INCORRECT,Input1ID,Input2ID,Input3ID,HITTypeId,FleissKappa
31,32,CORRECTED,0,3,0,Q1168152,wdt:P161,wd:Q105825,8QT,0.04
44,45,CORRECTED,0,3,0,wd:Q1032889,wdt:P58,wd:Q4762311,9QT,0.19911
56,57,CORRECT,2,0,1,wd:Q223596,wdt:P1431,wd:Q457180,9QT,0.19911
33,34,CORRECTED,0,3,0,Q908556,wdt:P161,wd:Q17386547,8QT,0.04
51,52,CORRECT,3,0,0,wd:Q696646,wdt:P3174,wd:Q1315917,9QT,0.19911
22,23,CORRECTED,0,3,0,wd:Q598752,wdt:P577,2011-01-01,8QT,0.04
23,24,CORRECTED,0,3,0,wd:Q427386,wdt:P2142,176997168,8QT,0.04
21,22,CORRECTED,0,3,0,wd:Q1720855,wdt:P577,2010-01-01,8QT,0.04
17,18,CORRECT,3,0,0,wd:Q104649845,ddis:indirectSubclassOf,wd:Q43229,7QT,0.236364
38,39,CORRECT,2,0,1,wd:Q814781,wdt:.P344,wd:Q40087803,8QT,0.04


In [13]:
final_only_correct.to_csv("crowdsourcing_final.csv", index=False)

In [18]:
filtered_df = final_only_correct[((final_only_correct['Input1ID'] == "wd:Q885281")) & (final_only_correct['Input2ID'] == "wdt:P161")]

In [19]:
filtered_df

Unnamed: 0,HITId,AnswerLabel,CORRECT,CORRECTED,INCORRECT,Input1ID,Input2ID,Input3ID,HITTypeId,FleissKappa
37,38,CORRECTED,0,3,0,wd:Q885281,wdt:P161,wd:Q3087184,8QT,0.04


In [20]:
correct = filtered_df["CORRECT"].item()
incorrect = filtered_df["INCORRECT"].item()
result = filtered_df["Input3ID"].item()
kappa = round(filtered_df["FleissKappa"].item(), 2)

answer = f"{result} - according to the crowd, who had an inter-rater agreement of {kappa} in this batch. The answer distribution for this specific task was {correct} support votes and {incorrect} reject votes."

In [24]:
if "wd" in result.lower():
    print(result[3:])

Q3087184
