In [1]:
from google.colab import files
uploaded = files.upload()

### Auth

In [0]:
import boto3
import pandas as pd

CREDENTIALS_FILE = 'credentials.csv'
credentials = pd.read_csv(CREDENTIALS_FILE).to_dict('records')[0]
aws_access_key_id = credentials['Access key ID']
aws_secret_access_key = credentials['Secret access key']

region_name = 'us-east-1'
endpoint_url = 'https://mturk-requester-sandbox.us-east-1.amazonaws.com'

# Uncomment this line to use in production
# endpoint_url = 'https://mturk-requester.us-east-1.amazonaws.com'
 
client = boto3.client(
    'mturk',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    endpoint_url=endpoint_url,
    region_name=region_name
)

In [3]:
print(client.get_account_balance())

{'AvailableBalance': '10000.00', 'ResponseMetadata': {'RequestId': '6383206e-b7e8-44ec-a6d8-38b7f8b2bd0e', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '6383206e-b7e8-44ec-a6d8-38b7f8b2bd0e', 'content-type': 'application/x-amz-json-1.1', 'content-length': '31', 'date': 'Mon, 25 May 2020 14:55:58 GMT'}, 'RetryAttempts': 0}}


In [0]:
import pandas as pd

def sent_length(text):
  tokens = text.split(' ')
  return len(tokens)
  
df = pd.read_csv("https://gist.githubusercontent.com/bshmueli/c99fc0abf56460e644bd610bf3024e83/raw/720285d133c85d94e0aa3fe3edcc199f6d99e3f7/lab4-data.csv")
df['length'] = df['text'].apply(sent_length)
df['pay'] = df['length'] > df['length'].mean()

### Create HITS

In [0]:
one_minute = 60 # seconds
one_hour = 60 * one_minute
one_day = 24 * one_hour

hit_type_response_10 = client.create_hit_type(
    AutoApprovalDelayInSeconds=60 * one_minute, # Payment is 1 hour after completion
    AssignmentDurationInSeconds=30 * one_minute, # 30 minutes to complete each HIT
    Reward='1.00',
    Title='Semantic Segmentation',
    Keywords='text,emotion,language,nctu',
    Description='Choose the emotion mostly matching your feeling in the sentence',
    QualificationRequirements=[
        {
            'QualificationTypeId': '00000000000000000071', # see https://docs.aws.amazon.com/AWSMechTurk/latest/AWSMturkAPI/ApiReference_QualificationRequirementDataStructureArticle.html#ApiReference_QualificationType-IDs
            'Comparator': 'In',
            'LocaleValues': [
                {
                    'Country': 'TW'
                },
                {
                    'Country': 'IN'
                },
                {
                    'Country': 'CA'
                },
                {
                    'Country': 'SG'
                },
                {
                    'Country': 'US'
                },
            ],
            'RequiredToPreview': True,
            'ActionsGuarded': 'PreviewAndAccept'
        },
               
        { # adult content alert
            'QualificationTypeId': '00000000000000000060', # see https://docs.aws.amazon.com/AWSMechTurk/latest/AWSMturkAPI/ApiReference_QualificationRequirementDataStructureArticle.html#ApiReference_QualificationType-IDs
            'Comparator': 'EqualTo',
            'IntegerValues': [1],
            'ActionsGuarded': 'PreviewAndAccept'
        },
  
    ]
)
hit_type_response_5 = client.create_hit_type(
    AutoApprovalDelayInSeconds=60 * one_minute,
    AssignmentDurationInSeconds=30 * one_minute,
    Reward='0.5',
    Title='Semantic Segmentation',
    Keywords='text,emotion,language,nctu',
    Description='Choose the emotion mostly matching your feeling in the sentence',
    QualificationRequirements=[
        {
            'QualificationTypeId': '00000000000000000071', # see https://docs.aws.amazon.com/AWSMechTurk/latest/AWSMturkAPI/ApiReference_QualificationRequirementDataStructureArticle.html#ApiReference_QualificationType-IDs
            'Comparator': 'In',
            'LocaleValues': [
                {
                    'Country': 'TW'
                },
                {
                    'Country': 'IN'
                },
                {
                    'Country': 'CA'
                },
                {
                    'Country': 'SG'
                },
                {
                    'Country': 'US'
                },
            ],
            'RequiredToPreview': True,
            'ActionsGuarded': 'PreviewAndAccept'
        },
       
        { # adult content alert
            'QualificationTypeId': '00000000000000000060', # see https://docs.aws.amazon.com/AWSMechTurk/latest/AWSMturkAPI/ApiReference_QualificationRequirementDataStructureArticle.html#ApiReference_QualificationType-IDs
            'Comparator': 'EqualTo',
            'IntegerValues': [1],
            'ActionsGuarded': 'PreviewAndAccept'
        },
       
    ]
)


In [0]:
hit_type_id_10 = hit_type_response_10['HITTypeId']
hit_type_id_5 = hit_type_response_5['HITTypeId']

#### The text length above average (Pay==True)

In [0]:
for idx, item in df.iterrows():
  if item.pay:
    question='''<?xml version="1.0" encoding="UTF-8"?>
<ExternalQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd">
  <ExternalURL>https://sychen6192.github.io/'''+str(item.idx)+'''.html</ExternalURL>
  <FrameHeight>800</FrameHeight>
</ExternalQuestion>'''

    response = client.create_hit_with_hit_type(
        HITTypeId=hit_type_id_10,
        MaxAssignments=3, # 3 assignments per HIT
        LifetimeInSeconds= 8* 7* one_day, # HITs expire in 8 weeks
        
        Question=question,
        RequesterAnnotation=str(item.idx),)

#### The text length below average (Pay==False)

In [0]:
for idx, item in df.iterrows():
  if item.pay is False:
    question='''<?xml version="1.0" encoding="UTF-8"?>
<ExternalQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd">
  <ExternalURL>https://sychen6192.github.io/'''+str(item.idx)+'''.html</ExternalURL>
  <FrameHeight>800</FrameHeight>
</ExternalQuestion>'''

    response = client.create_hit_with_hit_type(
        HITTypeId=hit_type_id_5,
        MaxAssignments=3,
        LifetimeInSeconds= 8* 7* one_day,
        Question=question,
        RequesterAnnotation=str(item.idx),)

In [0]:
# list_hits_response = client.list_hits()
# hit_id = list_hits_response['HITs'][0]
# response = client.list_assignments_for_hit(HITId=hit_id)

In [0]:
hits_paginator = client.get_paginator('list_hits')
assignments_paginator = client.get_paginator('list_assignments_for_hit')

for hits in hits_paginator.paginate():
    for hit in hits['HITs']:
        for assignments in assignments_paginator.paginate(HITId=hit['HITId']):
            for assignment in assignments['Assignments']:
                print(assignment['Answer'])

<?xml version="1.0" encoding="ASCII"?><QuestionFormAnswers xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2005-10-01/QuestionFormAnswers.xsd"><Answer><QuestionIdentifier>valence</QuestionIdentifier><FreeText>5</FreeText></Answer><Answer><QuestionIdentifier>arousal</QuestionIdentifier><FreeText>5</FreeText></Answer><Answer><QuestionIdentifier>dominance</QuestionIdentifier><FreeText>5</FreeText></Answer></QuestionFormAnswers>
<?xml version="1.0" encoding="ASCII"?><QuestionFormAnswers xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2005-10-01/QuestionFormAnswers.xsd"><Answer><QuestionIdentifier>color</QuestionIdentifier><FreeText>blue</FreeText></Answer></QuestionFormAnswers>
<?xml version="1.0" encoding="ASCII"?><QuestionFormAnswers xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2005-10-01/QuestionFormAnswers.xsd"><Answer><QuestionIdentifier>darknessVal</QuestionIdentifier><FreeText>deeppink</FreeText></Answer></Qu