In [None]:
import boto3
import pandas as pd
from logging import log, CRITICAL, ERROR, INFO, DEBUG, WARN
from datetime import datetime, timedelta

### Setup Environment
* Verify that you have set AWS credentials in your user data
* Location: ~/.aws/credentials
* Verify you are in correct sandbox / production setting

Format: 

[default]  
aws_access_key_id=XXXXXXX  
aws_secret_access_key=XXXXX


In [None]:
def check_response(resp):
    status = resp['ResponseMetadata']['HTTPStatusCode']
    if status != 200:
        log(ERROR, resp)  

SANDBOX = True
if SANDBOX:
    sandbox_ep = 'https://mturk-requester-sandbox.us-east-1.amazonaws.com'
    mturk = boto3.client('mturk', region_name = 'us-east-1', endpoint_url = sandbox_ep)
else:
    mturk = boto3.client('mturk', region_name = 'us-east-1')

def get_balance():
    resp = mturk.get_account_balance()
    check_response(resp)
    balance = resp['AvailableBalance']
    return balance
    
def print_env():
    print("Available Balance: ", get_balance())
    if SANDBOX:
        print("SANDBOX Environment")
    else:
        log(WARN, "PRODUCTION Environment")
        
        

In [None]:
print_env()

### Worker Qualifications

https://requestersandbox.mturk.com/qualification_types

Searh these strings:   
* Training and Qualification Phase
* Production Phase

* Sandbox:   
* * Training: ...  
* * Live: ...

#### Accuracy and Coverage Qualifications
* Sandbox:
* * Coverage: ...  
* * Accuracy: ...

* Production
* * Coverage: ...
* * Accuracy: ...


In [None]:
res = mturk.list_workers_with_qualification_type(QualificationTypeId="...")
for rec in res['Qualifications']:
    print(rec['WorkerId'], rec["GrantTime"])

In [None]:
mturk.associate_qualification_with_worker(WorkerId="...", QualificationTypeId="...")

In [None]:
mturk.disassociate_qualification_from_worker(WorkerId="...", QualificationTypeId="...")

In [None]:

def hits_df(hits):
    cols = ['h_id', 't_id', 'status', 'review_status', 'reward', 'title', 'create_date', 'expire_date', 
             'duration_sec', 'auto_approve_sec',
             'assign_available', 'assign_completed','assign_pending', 'MaxAssignments', 
             'id', 'hit_type', 'hit_group_id', 'Keywords', 'Description']
    if not len(hits):
        return pd.DataFrame(columns=cols)
    
    df = pd.DataFrame.from_records(hits)
    df.drop(['QualificationRequirements', 'Question'], axis='columns', inplace=True)

    df.rename(columns={
        'HITId': 'id',
        'HITTypeId': 'hit_type',
        'HITGroupId': 'hit_group_id',
        'HITStatus': 'status',
        'HITReviewStatus': 'review_status',
        'AssignmentDurationInSeconds':'duration_sec', 
        'AutoApprovalDelayInSeconds': 'auto_approve_sec', 
        'NumberOfAssignmentsAvailable': 'assign_available',
        'NumberOfAssignmentsCompleted': 'assign_completed',
        'NumberOfAssignmentsPending': 'assign_pending',
        'CreationTime': 'create_date',
        'Expiration': 'expire_date',
        'Reward': 'reward',
        'Title': 'title'}, inplace=True)

    df['h_id'] = df.id.str.slice(-5)
    df['t_id'] = df.hit_type.str.slice(-5)
    df = df[['h_id', 't_id', 'status', 'review_status', 'reward', 'title', 'create_date', 'expire_date', 
             'duration_sec', 'auto_approve_sec',
             'assign_available', 'assign_completed','assign_pending', 'MaxAssignments', 
             'id', 'hit_type', 'hit_group_id', 'Keywords', 'Description']]
    return df



In [None]:
def refresh():
    print_env()
    hits = []
    resp = mturk.list_hits(MaxResults=100)
    
    has_more = resp["NextToken"]    
    while resp["NumResults"] > 0:
        hits.extend(resp['HITs'])
        next_tok = resp["NextToken"]
        print("Returned: ", resp["NumResults"], " results")
        print("Trying next token: ", next_tok)        
        resp = mturk.list_hits(MaxResults=100, NextToken=next_tok)
            
    hits = hits_df(hits)
    return hits

In [None]:
hits = refresh()


In [None]:
hits.status.value_counts()

In [None]:
hits.groupby(["title", "hit_type", 'status' ]).size().rename("hit_count").reset_index().sort_values("hit_count", ascending=False)

### Expire and Delete HITs

In [None]:
GEN = "..."

In [None]:
hits[hits.hit_type == GEN][['id', 'review_status', 'assign_available', 'assign_completed', 'assign_pending', 'MaxAssignments']]

In [None]:
        
def expire(hit_id):
    distant_past = datetime(year=2000, month=1, day=1)    
    resp = mturk.update_expiration_for_hit(HITId=hit_id, ExpireAt=distant_past)
    check_response(resp)
        
def delete(hit_id):
    resp = mturk.delete_hit(HITId=hit_id)
    check_response(resp)

In [None]:
to_expire = hits[(hits.hit_type == GEN) &( hits.status != "Unassignable")].id
for id_to_exp in to_expire:
    print(id_to_exp)
    expire(id_to_exp)
    print("deleting...")
    delete(id_to_exp)
