## 3) Data Annotation
Create Amazon Mechanical Turk HITs. Note that this is from June 2019, so the code may no longer be compatible with the current Amazon Mechanical Turk API. It is primarily provided as reference. Because we are not releasing the data at the time, the image links within the qualifying task will no longer work.

### Setup

In [None]:
import json
import pandas as pd
from amt_utils import AMT
%load_ext autoreload
%autoreload 2

In [None]:
csv_file = '../data/second_MTurk_test.csv'
output_csv_file = csv_file[:-len('.csv')]+'_filled.csv'

### Load Dataframe

In [None]:
df = pd.read_csv(csv_file, index_col='index')

In [None]:
df

### Create HIT

In [None]:
amt = AMT(production=False)
print("Account balance:", amt.balance())

In [None]:
html_layout = open('./MTurkTask.html', 'r').read()
QUESTION_XML = """<HTMLQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2011-11-11/HTMLQuestion.xsd">
        <HTMLContent><![CDATA[{}]]></HTMLContent>
        <FrameHeight>650</FrameHeight>
        </HTMLQuestion>"""
original_xml = QUESTION_XML.format(html_layout)

In [None]:
# only needed once; will throw an error if the qualifying task already exists
csv_file = '../data/qualifying_task_filled.csv'
amt.generate_qualifying_task(pd.read_csv(csv_file, index_col='index'), example_indices=[28,132,331,3])

In [None]:
good_workers = ['SOME_ID_HERE']
#amt.give_qualification('TASK_ID_HERE', good_workers)
amt.email_workers(good_workers, {"Subject":"New HITs Available", "MessageText":"I have just posted the full batch of HITs for the Question/Response Classification Task. Note that it is under a new account, but your qualifications have been transferred. You can view the HITs here: https://www.mturk.com/mturk/preview?groupId=ID_HERE"})

In [None]:
TaskAttributes = {
    'MaxAssignments': 1, # only one Turker per HIT            
    'LifetimeInSeconds': 24*60*60*4, # put up for 4 days
    'AssignmentDurationInSeconds': 60*5, # give max 5 minutes to complete the HIT
    'Reward': '0.06', # at 20s per HIT, 6 cents per HIT results in ~$12/hr                    
    'Title': 'Answer 2 Yes/No Questions about 2 Image/Question/Response Triples',
    'Keywords': 'visual question answering',
    'Description': 'Determine whether the question asked about an image is valid and whether the response given is valid. You can then simply copy/paste the relevant section of the response.',
    'QualificationRequirements': [{'QualificationTypeId':'ID_HERE',
                                           'Comparator': 'EqualTo',
                                           'IntegerValues':[100]}],
    'AutoApprovalDelayInSeconds':24*60*60*7 # give 7 days before the HIT is auto-approved
}

In [None]:
amt.create_hits(original_xml, TaskAttributes, df)
df.to_csv(output_csv_file, index_label='index')
# if you use df.head(5) or something which creates a copy of the df, this may not be true
assert('hit_id' in df.columns) 

### Retrieve Results

In [None]:
# reload the csv if necessary
df = pd.read_csv(output_csv_file, index_col='index')

In [None]:
reviewable_hits = amt.get_reviewable_HITs()

In [None]:
hitIds = amt.populate_results(df)
df.to_csv(output_csv_file, index_label="index")

### Approve and Delete HITs

In [None]:
len(hitIds)

In [None]:
amt.approve_HITs(hitIds)

In [None]:
amt.delete_HITs(reviewable_hits)

#### Emergency Code to Delete All HITs

hitIds = amt.get_all_HITs()#filter_fn=lambda x: x['HITStatus'] == 'Reviewable')
#amt.approve_HITs(hitIds)
#amt.expire_HITs(hitIds)
#amt.delete_HITs(hitIds)