In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime
import os

import pandas as pd
import numpy as np
import boto3
import joblib
import pytz
pacific = pytz.timezone('US/Pacific')

import mtglobals

In [3]:
client, mturk_environment = mtglobals.gen_client()

Your account balance is 498654.83


## Load the launched Stage 2 HITs

In [4]:
stage2_launched_df = pd.read_csv(mtglobals.stage2_launched_fpath)
len(stage2_launched_df)

3242

## Functions used in the loop

In [5]:
def notify_worker(cur_worker_id, custom_msg):
    response = client.notify_workers(
        Subject='Custom Workplace Survey 2nd-Stage HIT',
        # to send custom_msg
        MessageText=custom_msg,
        # to send payment msg
        #MessageText=payment_msg,
        WorkerIds=[
            cur_worker_id
        ]
    )
    return response

def update_launched_worker(cur_worker_id, notified_time):
    # And add to list of already-launched workers
    launched_fpath = mtglobals.stage2_launched_fpath
    launched_df = pd.read_csv(launched_fpath)
    # Find the row for this worker and set the notified_time
    launched_df.at[launched_df['worker_id'] == cur_worker_id, 'notified_time'] = notified_time
    launched_df.to_csv(launched_fpath, index=False)
    return launched_fpath

In [6]:
# Loop over workers who submitted stage 1
results = []
for row_index, cur_row in stage2_launched_df.iterrows():
    cur_result = {}
    cur_worker_id = cur_row['worker_id']
    cur_result['worker_id'] = cur_worker_id
    print(f"Processing worker {cur_worker_id}")
    custom_url = cur_row['url']
    # But, if notified_time is already non-null, then skip (they've already been notified)
    notified_time = cur_row['notified_time']
    if not pd.isna(notified_time):
        #print(f"{cur_worker_id} already notified")
        # Already notified
        cur_result['notify_success'] = True
        cur_result['notify_time'] = notified_time
        results.append(cur_result)
        continue
    
    # And notify worker about their custom HIT, with link
    custom_msg = ("Hello, you have qualified for a custom HIT based on your completion of the "
              "initial workplace survey HIT. Please visit the following URL to access your "
              "custom HIT (if the link does not work, please email us at columbiatextlab@gmail.com): "
              f"{custom_url} \n\nIf "
              "this link does not work, please also try searching for \"Columbia TextLab\" on the "
              "web UI, or by visiting this URL: "
              "https://worker.mturk.com/projects?filters%5Bsearch_term%5D=textlab&page_size=20&page_number=1")
    notify_success = False
    try:
        notify_response = notify_worker(cur_worker_id, custom_msg)
        #print("Notify response:")
        #print(notify_response)
        print("Successfully notified")
        notify_success = True
    except Exception as e:
        print(f"Error with notify_worker: {e}")
        
    cur_result['notify_success'] = notify_success
    notified_time = datetime.datetime.now(pacific)
    cur_result['notify_time'] = notified_time
    results.append(cur_result)
    
    # Log the results of this loop iteration
    if notify_success:
        mtglobals.write_log(f"Notified worker {cur_worker_id}")
        # And update the .csv of launched workers
        launched_fpath = update_launched_worker(cur_worker_id, notified_time)
        print(f"{launched_fpath} successfully updated")
    else:
        mtglobals.write_log(f"Failed to notify {cur_worker_id} because of RequestError")
    print("=====[ end loop iteration ]=====")


Processing worker A1PTH9KTRO06EG
Processing worker A13YTGRLTS80MU
Processing worker AROOCBM042SJD
Processing worker A1T643M1P572AA
Processing worker A3I6KVLRM43E8D
Processing worker A304UJAE051J89
Processing worker A2SENAPNSXG9L
Processing worker ACCQN14OV2GG3
Processing worker A16TH68KJQZXKC
Processing worker A34GSERJN16K5N
Processing worker A18B29YUSCWQTT
Processing worker A2ZCNTKI76RIWI
Processing worker A28RL79ONUXTET
Processing worker AWW4ZOM0FU56A
Processing worker AJ9IY4IHOGB8
Processing worker A2BNFKU3IXQQ7W
Processing worker A34HAO70WP7YVT
Processing worker A2CK0OXMPOR9LE
Processing worker A2L2DIY8QMSL8V
Processing worker A3H6KECCE83132
Processing worker A3NXP1GPRJIZ0A
Processing worker A39D2O58VQ5GMO
Processing worker AE4P5KVUE9HST
Processing worker A2VZGR99TJC2BZ
Processing worker A2JFL3H254VGZ7
Processing worker A2QPMJ4GADLUW2
Processing worker A11F3MA5FWH6SJ
Processing worker AS4NIEQJWCG3M
Processing worker A3E8GX5T7ZERPV
Processing worker A3K3VRE4AL5GC0
Processing worker 

In [7]:
len(results)

3242

In [8]:
# Replace the dts with naive dts
#for cur_result in results:
#    #cur_result['notify_time_naive'] = cur_result['notify_time'].replace(tzinfo=None)
#    print(type(cur_result['notify_time']))

In [9]:
results[-1]

{'worker_id': 'A2FG1X6DQT1MSK',
 'notify_success': True,
 'notify_time': datetime.datetime(2022, 2, 2, 16, 32, 14, 47705, tzinfo=<DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>)}

In [10]:
result_df = pd.DataFrame(results)

In [11]:
result_df[~result_df['notify_success']]

Unnamed: 0,worker_id,notify_success,notify_time
151,A3F3DEDK8V0MIY,False,2022-02-02 16:21:44.519354-08:00
188,A2WO40ZF0ISEQS,False,2022-02-02 16:21:45.613459-08:00
202,A234QZB4MU0QZF,False,2022-02-02 16:21:46.316852-08:00
205,A2YP03GHMUNAHQ,False,2022-02-02 16:21:46.891295-08:00
726,A1ZQS6LHJ8PDBK,False,2022-02-02 16:21:47.911979-08:00
...,...,...,...
2973,AU3Y03566DKU4,False,2022-02-02 16:29:01.154911-08:00
2981,A2B3B0T0V8RTH8,False,2022-02-02 16:29:05.000791-08:00
3010,A217IK0RA64CQO,False,2022-02-02 16:29:15.166156-08:00
3024,A3R3UB4E8U9J5O,False,2022-02-02 16:29:20.552203-08:00


In [12]:
result_df.drop(columns=['notify_time'], inplace=True)

In [13]:
result_df['notify_success'].value_counts()

True     3147
False      95
Name: notify_success, dtype: int64

In [14]:
result_df.to_stata("../results_2stage/suspended_workers_20220121.dta")