In [1]:
from pandas import Series, DataFrame
import pandas as pd
import math
import numpy as np

## Q1 Read in data

In [2]:
gold = pd.read_csv('gold.txt', header=None, names=['url', 'category'], sep='\t')

In [3]:
labels = pd.read_csv('labels.txt', header=None, names=['turk', 'url', 'category'], sep='\t')

## Q2 Split into two DataFrames

In [4]:
#gets a dataframe of only rows from labels whose url is in the gold dataframe
mask = (labels['url'].isin(gold['url']))
labels_on_gold = labels[mask]

In [5]:
#gets a dataframe of only rows from labels whose url is not in the gold dataframe
mask = (~labels['url'].isin(gold['url']))
labels_unknown = labels[mask]

## Q3 Compute accuracies of turks

In [6]:
#dataframe of each turks guess along with the correct answer
labels_and_answers = pd.merge(labels_on_gold, gold, on=['url'], suffixes=['_guess', '_answer'])

In [7]:
#get first column for rater_goodness dataframe: number of ratings
turk_rating_count = DataFrame(labels_and_answers['turk'].value_counts()).reset_index()
turk_rating_count.columns = ['turk', 'number_of_ratings']

In [8]:
#get second column for rater_goodness dataframe: accuracy

#for each turk's guess, see if they got it right
labels_and_answers['result'] = labels_and_answers['category_guess'] == labels_and_answers['category_answer']
labels_and_answers['result'] = labels_and_answers['result'].astype(int) #converts True/False to int for easy averaging

In [9]:
#averages how many correct guesses a turk made
turk_accuracies = DataFrame(labels_and_answers.groupby('turk')['result'].mean()).reset_index()
turk_accuracies.columns = ['turk', 'average_correctness']

In [10]:
rater_goodness = pd.merge(turk_rating_count, turk_accuracies, on='turk')

In [11]:
rater_goodness.set_index('turk', inplace=True)

## Q4 Odds ratios

In [12]:
# odds = p / (1.001 - p) and here p = rater_goodness['average_correctness']
rater_goodness['odds'] = rater_goodness['average_correctness'] / (1.001 - rater_goodness['average_correctness'])

## Q5 Most accurate turks

In [23]:
#fiter to turks who have rated 20 or more urls
mask = (rater_goodness['number_of_ratings'] >= 20)
twenty_plus_raters = rater_goodness[mask]

In [27]:
#get top 10 turks (with 20 or more url ratings) by average correctness 
twenty_plus_raters.sort_values(by='average_correctness', ascending=False)[:10].index

Index(['A2U0R4X38GUKZE', 'A22C0PJUBFJTI0', 'A23YQUBXZPKILZ', 'ATVALOQVDCMZW',
       'A1HIXWH4OXT8S4', 'A3220HG1O83HQ4', 'A20PWAB7G3HDHU', 'A32W20KGQXS0LL',
       'AJSJVK40F5HM6', 'A31OCN4MNHUQ6W'],
      dtype='object', name='turk')

## Q6 Most accurate turks