In [12]:
import dotenv
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
import datetime
import os

import boto3
import dotenv
import joblib
import pandas as pd
import numpy as np
rng = np.random.default_rng()

from mtclient import MTClient
import mtglobals

In [3]:
# For testing the distribution
#unique, counts = np.unique(sample, return_counts=True)
#print(np.asarray((unique, counts)).T)

In [4]:
client = MTClient()

Your account balance is 2470.88


### (a) Get all hits since `date_cutoff`

In [6]:
all_hits = client.download_all_hits()

Downloading list of hits from 2021-10-30 00:00:00+00:00 to 2022-09-21 16:55:09.434973-07:53
p2:cDrOJMHmHZ6hUjLmq+7CSMGp5gErDsytccKarewnaG68+IBkXp9ZfHEh62Vl9p4=
Saving downloaded HIT data to ../results_2stage/all_hit_data.pkl


In [7]:
# Filter to get just the stage-1 HITs
stage1_hits = [h for h in all_hits if "instantly unlock" in h['Title']]
[(h['Title'], h['HITId'], h['CreationTime']) for h in stage1_hits]

[('Quick 3-question survey about work [<15 seconds], instantly unlock 2nd-stage HIT with higher reward',
  '3EHIMLB7GKKSFG8JX9I12SH0EU48H4',
  datetime.datetime(2022, 9, 21, 16, 21, 14, tzinfo=tzlocal()))]

In [8]:
# Important: put the HITIds for the stage-1 HIT whose submissions you want to process here
stage1_ids = ['3BCRDCM0OEV9AO8YCYWZ0G46CS4K6A']

### (b) Get subset of HITs for all workers who submitted the stage 1 hit(s) specified above

In [19]:
stage1_hits = [h for h in stage1_hits if h['HITId'] in stage1_ids]
# Now only the specified stage1 hits should remain
#[h['HITId'] for h in stage1_hits]
# Finally, get the list of submitter IDs
all_stage1_submitters = []
for cur_hit in stage1_hits:
    cur_hit_id = cur_hit['HITId']
    cur_submissions = client.get_hit_submissions(cur_hit_id)
    print(len(cur_submissions))
    cur_submitters = [s['WorkerId'] for s in cur_submissions]
    all_stage1_submitters.extend(cur_submitters)

1000


In [20]:
#print(all_stage1_submitters, end='')
len(all_stage1_submitters)

1000

### (c) For each stage-1 submitter, create, launch, and record info for their custom stage-2 HIT

In [14]:
# Convert the stage-2 question to xml, if it hasn't been converted already
dotenv.load_dotenv(mtglobals.dotenv_fpath)
stage2_html_fpath = os.getenv("STAGE2_HTML_FPATH")
stage2_xml_fpath = mtglobals.gen_xml(stage2_html_fpath)

In [None]:
# Load the already-launched HITs, if the file exists, for quick checking
if os.path.isfile(mtglobals.stage2_launched_fpath):
    launched_df = pd.read_csv(mtglobals.stage2_launched_fpath)
else:
    launched_df = None
# Loop over workers who submitted stage 1
for worker_num, cur_worker_id in enumerate(all_stage1_submitters):
    print(f"Processing worker #{worker_num}")
    launched_qual_name, launched_qual_num, launched_offer_amt = mtglobals.check_launched(launched_df, cur_worker_id)
    if launched_qual_num != -1:
        print(f"Stage 2 HIT for worker {cur_worker_id} already launched")
        continue
    # If we're here, this worker_id hasn't had a stage 2 HIT launched yet
    cur_qual_info = mtglobals.get_current_qual()
    cur_qual_name = cur_qual_info['qual_name']
    cur_qual_id = cur_qual_info['qual_id']
    last_qual_num = cur_qual_info['last_qual_num']
    last_offer = cur_qual_info['last_offer_amt']
    print(f"Last used qual num: {last_qual_num}")
    print(f"Last used offer amt: {last_offer}")
    # Generate random offer amount
    # New new: randomly sampled
    cur_offer_amt = mtglobals.random_wage()
    # The qual num just increments until it reaches 100, the highest allowed, then drops back to 0.
    # (just for tracking purposes)
    cur_qual_num = (last_qual_num + 1) % 100
    print(f"Processing worker {cur_worker_id}, offer {cur_offer_amt}, qual_num {cur_qual_num}")
    # Using the functions to create the HIT params and content
    # Generate the xml for the question
    stage2_question = mtglobals.gen_custom_hit(stage2_xml_fpath, cur_worker_id,
                                               cur_offer_amt)
    # Assign the custom qualification num for this worker
    qual_response = client.assign_stage2_quals(cur_worker_id, cur_qual_name,
                                               cur_qual_id, cur_qual_num,
                                               cur_offer_amt)
    print("Qualification assignment response:")
    print(qual_response)
    # Generate the data structure which enforces the qualification restriction
    stage2_requirements = mtglobals.gen_qual_restriction(cur_qual_id, cur_qual_num)
    # And launch the HIT
    cur_hit_title = f'Custom workplace survey HIT for worker id {cur_worker_id}'
    cur_hit_description = f'Custom workplace survey HIT for worker id {cur_worker_id}, 30 questions, ~15mins to complete'
    cur_hit_keywords = 'survey,workplace,work'
    launch_response = client.launch_custom_hit(cur_worker_id, cur_offer_amt,
                                               cur_hit_title, cur_hit_description,
                                               cur_hit_keywords, stage2_question,
                                               stage2_requirements)
    print("Launch response:")
    print(launch_response['ResponseMetadata']['HTTPStatusCode'])
    launched_time = launch_response['HIT']['CreationTime']
    # The response includes several fields that will be helpful later
    cur_hit_type_id = launch_response['HIT']['HITTypeId']
    cur_hit_id = launch_response['HIT']['HITId']
    #print("Created HITTypeId: {}".format(hit_type_id))
    print(f"Created HITId {cur_hit_id} for worker {cur_worker_id}")
    custom_url = client.mturk_environment['preview_url'] + f"?groupId={cur_hit_type_id}"
    print(f"You can work the HIT here: {custom_url}")
    mtglobals.write_log(
        f"Created custom stage-2 HIT for worker {cur_worker_id}, offer {cur_offer_amt}, URL {custom_url}"
    )
    # And append the data for this iteration to the .csv of launched workers
    launched_fpath = mtglobals.add_posted_worker(cur_worker_id, cur_offer_amt, cur_qual_name, cur_qual_id,
                                       cur_qual_num, cur_hit_id, cur_hit_type_id, custom_url, launched_time)
    print(f"New launched HIT successfully added to {launched_fpath}")
    print("=====[ end loop iteration ]=====")


Processing worker #0
Stage 2 HIT for worker A22V0BPWJIVMHW already launched
Processing worker #1
Stage 2 HIT for worker A1PAY3X73PQ16S already launched
Processing worker #2
Stage 2 HIT for worker A16XDO6MW9ZKMI already launched
Processing worker #3
Stage 2 HIT for worker A1A6R5X6YELRAV already launched
Processing worker #4
Stage 2 HIT for worker A2HILJXCL1Z6JU already launched
Processing worker #5
Stage 2 HIT for worker A27W7YF5LN69UP already launched
Processing worker #6
Stage 2 HIT for worker A2KWGCHV2FILJO already launched
Processing worker #7
Stage 2 HIT for worker A2IP0EIWXDWAMJ already launched
Processing worker #8
Stage 2 HIT for worker A2UPJWDQFVHZV8 already launched
Processing worker #9
Stage 2 HIT for worker ARHELGKHTCBAV already launched
Processing worker #10
Stage 2 HIT for worker A32XW59UVW9YCO already launched
Processing worker #11
Stage 2 HIT for worker A1OREUR6RUEVOX already launched
Processing worker #12
Stage 2 HIT for worker A157YDLCSQDF1T already launched
Processing

### Now run 02b_NotifyStage2.ipynb