Notebook to facilitate the management of prolific studies

By Warren Woodrich Pettine, M.D. Last updated 2023-11-16

In [1]:
import os
import numpy as np
import pandas as pd
import dill as pickle
import requests
from datetime import datetime, timedelta

from prolificinterfacefunctions import getProlificRequestHeader, prolificEndpointURLS, studyParams, payProlificSubjects, \
    getSessionData, createReplicationStudy

from dbinterfacefunctions import connectDroneReconDB, getPriorProlificRecruitment, getAnalysisData

from analysisfunctions import calcBrierScore, calcCalibration, convertCalibration, cleanDataOnlyProlificSubjects

# Launch studies
## Create study parameters

In [None]:
prolific_ids = None 

payment_participate=4
payment_performance=2
total_available_places=1
internal_name = 'Drone Recon Development pilot 2'
exclude_prior_subjects=True
webapp_use='both'
description_type='both'

study_description = \
    '<p>Answer questions then play a game where you perform reconnaissance on different types of drones! You will:</p>' + \
    '<ul>' + \
    '<li>Answer a series of confidential questions (~5 min);</li>' + \
    '<li>Complete a tutorial that you must pass to move onto the main game (~5 min);</li>' + \
    '<li>Play the main game (~10-20 min).</li>' + \
    '</ul> ' + \
    '<p>If you do well on the game, you will be invited back to play again.</p>' + \
    '<p>Payment:</p>' + \
    '<ul>' + \
    f'<li>${payment_participate} for active participation (what the hourly rate is based on);</li>' + \
    f'<li>${payment_performance} bonus for high performance.</li>' + \
    '</ul> ' +\
    '</p>'
                
params = studyParams(prolific_ids=prolific_ids,description_type=description_type,payment_participate=payment_participate,
    exclude_prior_subjects=exclude_prior_subjects,study_description=study_description,
    payment_performance=payment_performance,total_available_places=total_available_places,
    estimated_completion_time=20,internal_name=internal_name,webapp_use=webapp_use)

display('Params created')

## Post draft
Post the study to ones prolific account

In [None]:
prolific_requester_token = None # ENTER YOUR PROLIFIC TOKEN HERE

header = getProlificRequestHeader(prolific_requester_token=prolific_requester_token)
url_post_study_draft = 'https://api.prolific.co/api/v1/studies/'

r_create_draft = requests.post(url_post_study_draft, headers=header, json=params)
print(r_create_draft)
r_create_draft.json()

# Manage Studies
## Pay subjects
### Load subject data and calculate the adjusted reward

In [None]:
start_date='2023-08-06'
end_date='2023-08-19'
external_study_id='put_study_id_here'

data = getAnalysisData(cursor=None,start_date='2023-09-07',end_date='2023-09-13',external_source='prolific',task=None,
    size_bool=True,confidence_bool=True,session_completed_bool=True,sleep_bool=False)

data = cleanDataOnlyProlificSubjects(data)

# Get the external IDs
external_ids = data.external_id.unique()
idx = [len(external_id)>22 for external_id in external_ids]
external_ids = external_ids[idx]

accuracy = np.zeros(len(external_ids))
brier_score = np.zeros(len(external_ids))
calibration = np.zeros(len(external_ids))
calibration_converted_score = np.zeros(len(external_ids))
accuracy_calibrationconverted_mean = np.zeros(len(external_ids))

for i, external_id in enumerate(external_ids):
    # Get the data for the current external ID
    data_subject = data.loc[data.external_id == external_id, :]
    accuracy[i] = data_subject.loc[data_subject.block=='test',:].correct.mean()
    estimates_ = data_subject.loc[data_subject.block=='test',:].confidence.to_numpy().astype(int)
    outcomes_ = data_subject.loc[data_subject.block=='test',:].correct.to_numpy().astype(int)
    brier_score[i] = calcBrierScore(estimates_,outcomes_)
    calibration[i] = calcCalibration(estimates_,outcomes_)
    calibration_converted_score[i] = convertCalibration(estimates_,outcomes_)
    accuracy_calibrationconverted_mean[i] = (accuracy[i]+calibration_converted_score[i])/2

task_summaries_df = pd.DataFrame({
        'external_ID':external_ids,
        'accuracy':accuracy,
        'brier_score':brier_score,
        'calibration_converted_score': calibration_converted_score,
        'accuracy_calibrationconverted_mean': accuracy_calibrationconverted_mean
})


data_session = getSessionData(start_date=start_date,end_date=start_date,cursor=None,external_study_id=external_study_id,task=None)
data_session = pd.merge(data_session,task_summaries_df,on='external_ID',how='right')

### Issue payment

In [None]:
estimate_cost = False
hit_id = 'put_study_id_here'
completion_payment=3.00
performance_payment=2.00
accuracy_calibrationconverted_mean_threshold = 0.8
prolific_requester_token = None # ENTER YOUR PROLIFIC TOKEN HERE

base_cost, bonus = payProlificSubjects(data_session=data_session,prolific_requester_token=prolific_requester_token,
            completion_payment=completion_payment,performance_payment=performance_payment,hit_id=hit_id,cursor=None,conn=None,
            skip_balance_check=True,assignment_statuses=['Submitted', 'Approved'],estimate_cost=estimate_cost,platform_fee_percentage=0.2,
            accuracy_calibrationconverted_mean_threshold=accuracy_calibrationconverted_mean_threshold)

print(f'Base subject = {base_cost[0]}, Prolific = {base_cost[1]}, Total = {sum(base_cost+bonus)}')

## Re-test study
Programaticall ceate a retest study

In [None]:
get_estimate=False

drop_retest = False
drop_recruited = False

start_date = '2023-08-22'
end_date = '2023-09-08' # Of when they performed the initial test
start_date_recruitment = '2023-09-08' #Check for prior recruitment
external_study_id = 'put_study_id_here'

payment_participate=3
payment_performance=2
internal_name='Drone Re-test 2, pilot 1'
recruitment_note = 'This is the second retest of the drone recon study.'
study_description = \
    "<p>Come play Drone Recon again!  The agency needs you. There's a new batch of enemy drones, and we require our best agents."+\
    '<p>Payment:</p>' + \
    '<ul>' + \
    f'<li>${payment_participate} for active participation (what the hourly rate is based on);</li>' + \
    f'<li>${payment_performance} bonus for high performance.</li>' + \
    '</ul> ' +\
    '</p>'
    
asd_subjects_only = True
n_subjects = 234
webapp_use = 'task'
project_id='enter_project_id_here'
target_study_hits = [] # list of study hits from which to recruit. If empty, all hits are used.
completion_code = 'put_completion_code_here'
failed_attention_code = 'put_failed_attention_code_here'

data_session = getSessionData(start_date=start_date,end_date=end_date,cursor=None,external_study_id=external_study_id,task=None)
eligible_prolific_subject_ids = data_session[data_session.session_completed].external_ID.unique()


# if 'conn' not in locals():
cursor, conn = connectDroneReconDB()
    
study_id, external_ids = createReplicationStudy(start_date,end_date,target_study_hits=target_study_hits,recruitment_note=recruitment_note,
    task="category_metacog-v0",get_estimate=get_estimate,internal_name=internal_name,cursor=cursor,payment_participate=payment_participate,
    payment_performance=payment_performance,conn=conn,eligible_prolific_subject_ids=eligible_prolific_subject_ids,
    start_date_recruitment=start_date_recruitment,webapp_url='https://dronerecongame.azurewebsites.net',webapp_use=webapp_use,message_body=None,
    n_subjects=n_subjects,study_title="The agency needs you again for Drone Recon!",study_description=study_description,project_id=project_id,
    completion_code=completion_code,failed_attention_code=failed_attention_code,drop_retest=drop_retest,drop_recruited=drop_recruited)

print(f'Study ID is {study_id}')
print(len(external_ids))