In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime

import pandas as pd
import numpy as np
import boto3
import joblib

from mtclient import MTClient
import mtglobals

In [3]:
client = MTClient()

Loading environment variables from .env
Using LIVE MTurk requester API
Your account balance is 4644.17


### (a) Load the .csv containing all launch info

In [4]:
stage2_launched_df = pd.read_csv(mtglobals.stage2_launched_fpath)
len(stage2_launched_df)

290

### (b) Send notifications for all not-already-notified workers

(this is checked based on whether or not the `notify_time` column is filled for
that worker)

In [5]:
# Loop over workers who submitted stage 1
results = []
for row_index, cur_row in stage2_launched_df.iterrows():
    cur_result = {}
    cur_worker_id = cur_row['worker_id']
    cur_result['worker_id'] = cur_worker_id
    print(f"Processing worker {cur_worker_id}")
    custom_url = cur_row['url']
    # But, if notified_time is already non-null, then skip (they've already been notified)
    notified_time = cur_row['notified_time']
    if not pd.isna(notified_time):
        #print(f"{cur_worker_id} already notified")
        # Already notified
        cur_result['notify_success'] = True
        cur_result['notify_time'] = notified_time
        results.append(cur_result)
        continue
    # Otherwise, send notification and update notify_time
    custom_msg = mtglobals.gen_custom_msg(cur_row['hit_type_id'])
    notify_success = False
    try:
        notify_response = client.notify_worker(cur_worker_id, custom_msg)
        #print("Notify response:")
        #print(notify_response)
        print("Successfully notified")
        notify_success = True
    except Exception as e:
        print(f"Error with notify_worker: {e}")
        
    cur_result['notify_success'] = notify_success
    notified_time = datetime.datetime.now(mtglobals.local_tz)
    cur_result['notify_time'] = notified_time
    results.append(cur_result)
    # Log the results of this loop iteration
    if notify_success:
        mtglobals.write_log(f"Notified worker {cur_worker_id}")
        # And update the .csv of launched workers
        launched_fpath = mtglobals.update_launched_worker(cur_worker_id, notified_time)
        print(f"{launched_fpath} successfully updated")
    else:
        mtglobals.write_log(f"Failed to notify {cur_worker_id} because of RequestError")
    print("=====[ end loop iteration ]=====")


Processing worker AMWIBULQ4S8K3
Processing worker A85FXHMLP0978
Processing worker A3VPD34C23PQTQ
Processing worker A1FMK21DN5YUDC
Processing worker A6O1VFV32LBS8
Processing worker AXTDEGFFQ0ZE7
Processing worker A37AWU2VH1ZIXX
Processing worker A348DIWEUQGAZA
Processing worker A3F966OJMV2DUB
Processing worker AUKALHTHFF9CY
Processing worker A3M9ZVPFDH5BGJ
Processing worker A1BWB7YNYB6SIK
Processing worker AGZMOKYKPXN8X
Processing worker A3PYT74DUPKM9R
Processing worker A2IC2PVR57RY5C
Processing worker AVEV8X13Y4JSR
Processing worker A2JIYLV1WGFURS
Processing worker AY52II1OC5RZ1
Processing worker A165NK27A4EMG4
Processing worker A23JDIW9TXD4ZH
Processing worker A3OO6TSHQEFF5Y
Processing worker A3RBO5QSANY9W0
Processing worker A2Q7O0E83L1W74
Processing worker ACXP8KHFX06KR
Processing worker A1BDMG2RNPP0EZ
Processing worker A36CN4SSP6UN3D
Processing worker AG6UL22QLCKOG
Processing worker A3HSFRJPM3E89Z
Processing worker A149YZJBFRDWBJ
Processing worker A1T5FB9SJMCDG1
Processing worker A3

In [6]:
# Check how many workers we have total
len(results)

290

In [7]:
# Replace the dts with naive dts
#for cur_result in results:
#    #cur_result['notify_time_naive'] = cur_result['notify_time'].replace(tzinfo=None)
#    print(type(cur_result['notify_time']))

In [8]:
results[-1]

{'worker_id': 'AQQZ96P85B3B',
 'notify_success': True,
 'notify_time': datetime.datetime(2022, 11, 21, 15, 21, 53, 616790, tzinfo=<DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>)}

### (c) Create a dataset recording notification success/failure and notification time for each worker

In [9]:
result_df = pd.DataFrame(results)

In [10]:
# Check the non-successful notifications
result_df[~result_df['notify_success']]

Unnamed: 0,worker_id,notify_success,notify_time


In [11]:
result_df.drop(columns=['notify_time'], inplace=True)

In [12]:
result_df['notify_success'].value_counts()

True    190
Name: notify_success, dtype: int64

### (d) Export notification error data to Stata .dta format (to exclude suspended workers, if need be)

In [13]:
timestamp = mtglobals.gen_timestamp()
result_df.to_stata(f"../results_2stage/suspended_workers_{timestamp}.dta")