## Table of Contents
* [Setup](#Setup)
	* [estimate cost](#estimate-cost)
	* [load dataset](#load-dataset)
* [Submitting HITs](#Submitting-HITs)
* [Retrieve results](#Retrieve-results)
* [Interact with workers](#Interact-with-workers)
* [Accepting and deleting HITs... careful with these](#Accepting-and-deleting-HITs...-careful-with-these)


In [20]:
sandbox = True
sent_file = "/Users/nikett/quick/sciencesrl-annotation-afresh/sample_input.tsv"
num_sentences = 50  # -1 indicates take all sentences, anynumber >0 indicates restriction
outfile='/Users/nikett/quick/sciencesrl-annotation-afresh/results/batch_v1.pkl' if not sandbox else '/Users/nikett/quick/sciencesrl-annotation-afresh/results/sandbox_v0.pkl'

static_params = {
    'title': "Answer simple questions about the actions in a sentence.",
    'description': "Answer simple questions about the actions in a sentence.",
    'keywords': ['English verbs'],
    'frame_height': 1000,
    'amount': 0.01,
    'duration': 3600 * 1,
    'lifetime': 3600 * 24 * 2,
    'max_assignments': 5,
    'locales': ['US', 'CA', 'AU', 'NZ', 'GB']
}

In [21]:
import numpy as np
import pandas as pd
import re
import pickle
import boto
import json
import os
import random
from copy import deepcopy
from tqdm import tqdm

from IPython.core.display import HTML
from mturk_utils.annotation_collection import prepare_hit
from mturk_utils.annotation_collection import write_task_page
from mturk_utils.mturk import expected_cost
import mturk_ai2
from mturk_utils.mturk import MTurk

# Submitting HITs

In [22]:
turk_account = mturk_ai2
rw_host='mechanicalturk.amazonaws.com'
amt_con = MTurk(turk_account.access_key, turk_account.access_secret_key) if sandbox else MTurk(turk_account.access_key, turk_account.access_secret_key, host=rw_host)
amt_con.get_account_balance()

$10,000.00

In [4]:
df = pd.read_csv(sent_file, sep='\t', header=0)
print("First few inputs are ...")
df.head(5)

First few inputs are ...


Unnamed: 0,sentid,sent,verbspans
0,1,The life span of a worker bee varies according...,7-8
1,2,"When a chemical change takes place , new subst...","4-5,10-11"


In [5]:
def removeNonAscii(s): return "".join(i for i in s if ord(i)<128)

build_hit_group = [prepare_hit(row.sentid, removeNonAscii(row.sent), row.verbspans, static_params) for index, row in (df.head(num_sentences).iterrows() if num_sentences > 0 else df.iterrows())]
write_task_page(random.choice(build_hit_group)['html'])
expected_cost(build_hit_group, static_params, amt_con)

0.1

In [None]:
hit_group = [amt_con.create_html_hit(single_hit) for single_hit in tqdm(build_hit_group)]

# Retrieve results

In [15]:
#from mturk_utils.annotation_collection import create_result
from mturk_utils.annotation_collection import get_assignments
from mturk_utils.annotation_collection import pickle_this
from mturk_utils.annotation_collection import filter_hits_by_date

all_hits = amt_con.get_all_hits()
start_date = (2017, 12, 22)
end_date = (2017, 12, 22)
recent_hits = filter_hits_by_date(all_hits, start_date, end_date)

all_assignments = get_assignments(amt_con.connection, recent_hits)
pickle_this(all_assignments, outfile)

In [None]:
# %%time
# all_hits = amt_con.get_reviewable_hits(detailed=False)

In [None]:
# start_date = (2017, 12, 15)
# end_date = (2017, 12, 15)
# recent_hits = filter_hits_by_date(all_hits, start_date, end_date)

In [None]:
#%%time
# new_4b_assignemnts_10_26 = get_assignments(amt_con.connection, new_4b_hits)

In [None]:
# pickle_this(new_4b_assignemnts_10_26, 'new_4b_assignemnts_10_26.pkl')

# Interact with workers

## reject assignments and ban workers

In [None]:
def ban_bad_workers(mturk_connection, worker_ids):
    for worker in worker_ids:
        reason_for_block = """
        Your HITs contained many sentences with action verbs, but they were marked 'no action verbs'
        """
        print('blocking ' + str(worker))
        mturk_connection.block_worker(worker, reason_for_block)

In [None]:
workers_to_ban = unpickle_this('workers_to_ban_prod_1_4.pkl')

In [None]:
ban_bad_workers(amt_con.connection, list(workers_to_ban))

In [None]:
to_reject = unpickle_this('assignments_to_reject_prod_1_4.pkl')

In [None]:
def reject_assignments(mturk_connection, assignments_to_reject):
    feedback_message = """
    Your HITs contained many frames with characters, but they were marked 'empty frame'
    """
    reject_count = len(assignments_to_reject)
    for assignment_id in tqdm(assignments_to_reject):
        try:
            mturk_connection.reject_assignment(assignment_id, feedback_message)
        except boto.mturk.connection.MTurkRequestError:
            print('assignment ' + str(assignment_id) + ' already accepted or rejected')

    return reject_count

In [None]:
# reject_assignments(amt_con.connection, list(to_reject))

In [None]:
def pay_bonuses(bonuses_to_pay):
    total_payed = 0
    bonus_reason_template = 'For Flinstones character labels written.'
    for aid, vals in tqdm(bonuses_to_pay.items()):
        n_chars = vals['count']
        wid = vals['worker_id']
        bonus_ammount = boto.mturk.price.Price(0.01 * n_chars)
        total_payed += bonus_ammount.amount
        bonus_reason = bonus_reason_template
        amt_con.connection.grant_bonus(wid, aid, bonus_ammount, bonus_reason)
    return total_payed

In [None]:
# bonuses = unpickle_this('bonuses_to_pay_3.pkl')

In [None]:
# pay_bonuses(bonuses)

# Accepting and deleting HITs

Uncomment only when ready to accept or delete hits

reject assignments carefully

In [None]:
from mturk_utils.annotation_collection import un_pickle_this
accept_results = un_pickle_this(outfile)

In [None]:
e_count = 0
for assignment_threeturkers in tqdm(accept_results):
    try:
#         amt_con.approve_assignment(assignment=None, assignment_id=str(assignment_id))
        #amt_con.approve_assignment(assignment=assignment_id)
        for a in accept_results[assignment_threeturkers]:
            amt_con.approve_assignment(assignment=a)
    except boto.mturk.connection.MTurkRequestError as e:
        print e
        e_count += 1
print str(e_count) + " exceptions"

disable hits

In [None]:
# _ = [amt_con.disable_hit(hit) for hit in tqdm(all_hits)]

deletes hits

In [23]:
amt_con.delete_all_hits()