References:

- https://api.qualtrics.com/docs
- https://api.qualtrics.com/reference
- https://researcher-help.prolific.co/hc/en-gb/articles/360009220993-Recording-participant-IDs-in-your-study-survey
- https://blog.mturk.com/getting-great-survey-results-from-mturk-and-qualtrics-be1704ff9786
- https://www.cloudresearch.com/resources/blog/workerid-and-all-mturk-fields-sent-to-qualtrics/
- https://medium.com/@jlroo/integrating-a-qualtrics-survey-with-amazon-mechanical-turk-561cdeebe239
- https://blog.mturk.com/tutorial-understanding-requirements-and-qualifications-99a26069fba2

In [1]:
import socket
import pandas as pd
from timeit import default_timer as timer
import os
import requests
import json
import numpy as np
import pyarrow.parquet as pq
from glob import glob

print('Hostname:', socket.gethostname())
if 'samuel' in socket.gethostname().lower():
    path_to_data='../../data'
else:
    path_to_data='/scratch/spf248/twitter/data'
    
model='BERT'
iteration='0'

# Setting user Parameters
with open(os.path.join(path_to_data,'keys/qualtrics/apiToken'),'r') as f:
    apiToken = eval(f.readline())
dataCenter = "nyu.ca1"
SurveyName = "labor-market-tweets"
SurveySourceID = "SV_4VMcMBs2MXU1tdz"
QuestionTemplateID = "QID1"
QuestionConsentID = "QID2"
QuestionWorkerID = "QID3"
QuestionCompletionID = "QID4"
QuestionDescriptionID = "QID5"

country_code = "US"
print(country_code)

block_size=50
print('# tweets per worker:', block_size)

max_tweets=2400
print('Max. # tweets:', max_tweets)

Hostname: Samuels-MBP.home
US
# tweets per worker: 50
Max. # tweets: 2400


In [2]:
def get_env_var(varname,default):
    
    if os.environ.get(varname) != None:
        var = int(os.environ.get(varname))
        print(varname,':', var)
    else:
        var = default
        print(varname,':', var,'(Default)')
    return var

# Choose Number of Nodes To Distribute Credentials: e.g. jobarray=0-4, cpu_per_task=20, credentials = 90 (<100)
SLURM_JOB_ID            = get_env_var('SLURM_JOB_ID',0)
SLURM_ARRAY_TASK_ID     = get_env_var('SLURM_ARRAY_TASK_ID',0)
SLURM_ARRAY_TASK_COUNT  = get_env_var('SLURM_ARRAY_TASK_COUNT',1)

SLURM_JOB_ID : 0 (Default)
SLURM_ARRAY_TASK_ID : 0 (Default)
SLURM_ARRAY_TASK_COUNT : 1 (Default)


# Import Sample

In [3]:
checks={'US':['I lost my job today.','I got hired today.']}[country_code]
print('Attention Checks:\n')
print('\n'.join(checks))

Attention Checks:

I lost my job today.
I got hired today.


In [4]:
tweets=pq.ParquetDataset(glob(os.path.join(path_to_data,'classification',country_code,'labeling',model,iteration,'sample','*.parquet'))).read().to_pandas()
tweets=tweets.drop_duplicates('tweet_id')
tweets.tweet_id=tweets.tweet_id.astype(str)
tweets=np.array_split(tweets,SLURM_ARRAY_TASK_COUNT)[SLURM_ARRAY_TASK_ID]
print('# tweets:', tweets.shape[0])

# tweets: 9800


In [15]:
ids_labeled=list(pd.read_pickle(os.path.join(path_to_data,'classification',country_code,'labeling',model,iteration,'labels.pkl'))['tweet_id'].values)
print('# labeled tweets:', len(ids_labeled))

tweets=tweets[-tweets.tweet_id.isin(ids_labeled)].reset_index(drop=True)
print('# remaining tweets:', tweets.shape[0])

if tweets.shape[0]>=max_tweets:
    tweets=tweets.sample(n=max_tweets,random_state=0)
    print('# remaining tweets after sampling:', tweets.shape[0])

# labeled tweets: 9565
# remaining tweets: 235


In [16]:
n_workers=(tweets.shape[0]//(block_size-len(checks)))*2
print('# workers (2 workers per tweets + 2 attention checks per worker):', n_workers)

# workers (2 workers per tweets + 2 attention checks per worker): 8


In [17]:
# Create two version of each tweet
tweets_0=tweets.sample(frac=1,random_state=0).set_index('tweet_id')['text']
tweets_0.index=tweets_0.index.map(lambda x:x+'-v0')
tweets_1=tweets.sample(frac=1,random_state=1).set_index('tweet_id')['text']
tweets_1.index=tweets_1.index.map(lambda x:x+'-v1')

# Split tweets into chunks with two labels per tweet
chunks=np.array_split(tweets_0,n_workers//2)+np.array_split(tweets_1,n_workers//2)

# Add attention checks
chunks=[chunk.append(pd.Series({
'check-0-worker-'+str(i):checks[0],
'check-1-worker-'+str(i):checks[1]})).sample(frac=1,random_state=0) 
for i,chunk in enumerate(chunks)]

print('# Tweets by chunk:', np.mean([chunk.shape[0] for chunk in chunks]))
print('# Tweets to label:', sum([chunk.shape[0] for chunk in chunks]))

# Tweets by chunk: 60.75
# Tweets to label: 486


# Library

In [None]:
def create_survey(SurveyName="MySurvey", apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions".format(
        dataCenter)
    
    headers = {
        "x-api-token": apiToken,
        "content-type": "application/json",
        "Accept": "application/json"
    }

    data = {
        "SurveyName": SurveyName,
        "Language": "EN",
        "ProjectCategory": "CORE"
    }

    response = requests.post(baseUrl, json=data, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
    
    SurveyID = json.loads(response.text)['result']['SurveyID']
    DefaultBlockID = json.loads(response.text)['result']['DefaultBlockID']

    return SurveyID, DefaultBlockID

In [None]:
def get_options(SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/options".format(
        dataCenter, SurveyID)
    
    headers = {
        "x-api-token": apiToken,
    }

    response = requests.get(baseUrl, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
        
    return json.loads(response.text)["result"]

In [None]:
def update_options(SurveyOptions, SurveyID, apiToken=apiToken, dataCenter=dataCenter):
    
    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/options".format(
        dataCenter, SurveyID)
    
    headers = {
    'accept': "application/json",
    "content-type": "application/json",
    "x-api-token": apiToken,
    }

    response = requests.put(baseUrl, json=SurveyOptions, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])

In [None]:
def get_flow(SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/flow".format(
        dataCenter, SurveyID)
    
    headers = {
        "x-api-token": apiToken,
    }

    response = requests.get(baseUrl, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
        
    return json.loads(response.text)["result"]

In [None]:
def update_flow(SurveyFlow, SurveyID, apiToken=apiToken, dataCenter=dataCenter):
    
    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/flow".format(
        dataCenter, SurveyID)
    
    headers = {
    'accept': "application/json",
    "content-type": "application/json",
    "x-api-token": apiToken,
    }

    response = requests.put(baseUrl, json=SurveyFlow, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])

In [None]:
def create_block(BlockName, SurveyID, apiToken=apiToken, dataCenter=dataCenter):
    
    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/blocks".format(
        dataCenter, SurveyID)

    headers = {
       'accept': "application/json",
       'content-type': "application/json",
       "x-api-token": apiToken,
    }

    BlockTemplate = {
    "Type": "Standard",
    "Description": BlockName,
    }
    
    response = requests.post(baseUrl, json=BlockTemplate, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
    
    BlockID = json.loads(response.text)['result']['BlockID']
    FlowID = json.loads(response.text)['result']['FlowID']
    
    return BlockID, FlowID

In [None]:
def get_block(BlockID, SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/blocks/{2}".format(
        dataCenter, SurveyID, BlockID)
    
    headers = {
        "x-api-token": apiToken,
    }

    response = requests.get(baseUrl, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
        
    return json.loads(response.text)["result"]

In [None]:
def update_block(BlockData, BlockID, SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/blocks/{2}".format(
        dataCenter, SurveyID, BlockID)

    headers = {
       'accept': "application/json",
       'content-type': "application/json",
       "x-api-token": apiToken,
    }

    response = requests.put(baseUrl, json=BlockData, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])

In [None]:
def create_question(QuestionData, SurveyID, apiToken=apiToken, dataCenter=dataCenter):
    
    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/questions".format(
        dataCenter, SurveyID)

    headers = {
       'accept': "application/json",
       'content-type': "application/json",
       "x-api-token": apiToken,
    }

    response = requests.post(baseUrl, json=QuestionData, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
    
    return json.loads(response.text)['result']['QuestionID']

In [None]:
def get_question(QuestionID, SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/questions/{2}".format(
        dataCenter, SurveyID, QuestionID)
    
    headers = {
        "x-api-token": apiToken,
    }

    response = requests.get(baseUrl, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
        
    return json.loads(response.text)["result"]

In [None]:
def update_question(QuestionData, QuestionID, SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/questions/{2}".format(
        dataCenter, SurveyID, QuestionID)

    headers = {
       'accept': "application/json",
       'content-type': "application/json",
       "x-api-token": apiToken,
    }

    response = requests.put(baseUrl, json=QuestionData, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])

# Create Survey

In [None]:
print('Create New Survey')
start = timer()

SurveyID, BlockID = create_survey(SurveyName)

print("Done in", round(timer()-start), "sec")

In [None]:
print('Fetch Template Question')
QuestionTemplateData = get_question(QuestionTemplateID, SurveySourceID)

In [None]:
start = timer()
print("Create Questions")

for i,chunk in enumerate(chunks):
    
    BlockData = get_block(BlockID, SurveyID)
    BlockData['Type'] = 'Standard'
    update_block(BlockData, BlockID, SurveyID)

    print('Worker', i+1)
    BlockID, FlowID = create_block("Worker "+str(i+1), SurveyID)

    BlockData = get_block(BlockID, SurveyID)
    BlockData['Type'] = 'Default'
    update_block(BlockData, BlockID, SurveyID)
    
    for (tweet_id, tweet) in chunk.iteritems():
    
        text='Please answer the following questions about the following tweet:\n\n"'+tweet+'""'
        QuestionID = create_question(QuestionTemplateData, SurveyID)
        QuestionData = get_question(QuestionID, SurveyID)
        QuestionData['QuestionText'] = tweet
        QuestionData['QuestionDescription'] = tweet
        QuestionData['QuestionText_Unsafe'] = tweet
        QuestionData['DataExportTag'] = 'ID_'+tweet_id
        update_question(QuestionData, QuestionID, SurveyID)
    
    BlockData = get_block(BlockID, SurveyID)
    BlockData['Options'] = {
    "BlockLocking": "false",
    "RandomizeQuestions": "false",
    "BlockVisibility": "Collapsed",
    }
    update_block(BlockData, BlockID, SurveyID)

print("Done in", round(timer()-start), "sec")

In [None]:
BlockData = get_block(BlockID, SurveyID)
BlockData['Type'] = 'Standard'
update_block(BlockData, BlockID, SurveyID)

print('Create Completion Block')
BlockID, FlowID = create_block("Completion", SurveyID)

BlockData = get_block(BlockID, SurveyID)
BlockData['Type'] = 'Default'
update_block(BlockData, BlockID, SurveyID)

print('Create Completion Question')
QuestionCompletionData = get_question(QuestionCompletionID, SurveySourceID)
QuestionID = create_question(QuestionCompletionData, SurveyID)
QuestionData = get_question(QuestionID, SurveyID)
QuestionData['DataExportTag'] = 'QIDCompletion'
update_question(QuestionData, QuestionID, SurveyID)

print('Close Block')
BlockData = get_block(BlockID, SurveyID)
BlockData['Options'] = {
"BlockLocking": "false",
"RandomizeQuestions": "false",
"BlockVisibility": "Collapsed",
}
update_block(BlockData, BlockID, SurveyID)

In [None]:
SurveyFlow = get_flow(SurveyID)

print('Randomize Survey Flow')
# Create a Randomizer Drawing One Block At Random Except Intro And Completion Block
Randomizer = {
'Type': 'BlockRandomizer',
'FlowID': 'FL_'+str(max([int(el['FlowID'].split('_')[1]) for el in SurveyFlow['Flow']])+1),
'SubSet': '1',
'EvenPresentation': True,
'Flow':SurveyFlow['Flow'][1:-1]}

SurveyFlow['Flow'] = [
SurveyFlow['Flow'][0],
Randomizer,
SurveyFlow['Flow'][-1],
]

SurveyFlow['Properties']['Count']+=1
SurveyFlow['Properties'].update({'RemovedFieldsets': []})

print('Embbeded Worker ID')
EmbeddedData = {'Type': 'EmbeddedData',
 'FlowID': 'FL_'+str(max([int(el['FlowID'].split('_')[1]) for el in SurveyFlow['Flow']])+1),
 'EmbeddedData': [{'Description': 'Random ID',
   'Type': 'Custom',
   'Field': 'Random ID',
   'VariableType': 'String',
   'DataVisibility': [],
   'AnalyzeText': False,
   'Value': '${rand://int/1000000000:9999999999}'}]}

SurveyFlow['Flow'] = [EmbeddedData]+SurveyFlow['Flow']
SurveyFlow['Properties']['Count']+=1

update_flow(SurveyFlow, SurveyID)

In [None]:
# Switch Default Block From Current ...
BlockData = get_block(BlockID, SurveyID)
BlockData['Type'] = 'Standard'
update_block(BlockData, BlockID, SurveyID)

# ... to Intro
BlockID = SurveyFlow['Flow'][1]['ID']
BlockData = get_block(BlockID, SurveyID)
BlockData['Type'] = 'Default'
update_block(BlockData, BlockID, SurveyID)

print('Add Consent Question')
QuestionConsentData = get_question(QuestionConsentID, SurveySourceID)
QuestionID = create_question(QuestionConsentData, SurveyID)
QuestionData = get_question(QuestionID, SurveyID)
QuestionData['DataExportTag'] = 'QIDConsent'
update_question(QuestionData, QuestionID, SurveyID)

print('Add Worker ID Question')
QuestionWorkerData = get_question(QuestionWorkerID, SurveySourceID)
QuestionID = create_question(QuestionWorkerData, SurveyID)
QuestionData = get_question(QuestionID, SurveyID)
QuestionData['DataExportTag'] = 'QIDWorker'
update_question(QuestionData, QuestionID, SurveyID)

print('Add Description Question')
QuestionDescriptionData = get_question(QuestionDescriptionID, SurveySourceID)
QuestionID = create_question(QuestionDescriptionData, SurveyID)
QuestionData = get_question(QuestionID, SurveyID)
QuestionData['DataExportTag'] = 'QIDDescription'
update_question(QuestionData, QuestionID, SurveyID)

print('Close Intro Block')
BlockData = get_block(BlockID, SurveyID)
BlockData['Options'] = {
"BlockLocking": "false",
"RandomizeQuestions": "false",
"BlockVisibility": "Collapsed",
}
BlockData['Description'] = 'Intro'
update_block(BlockData, BlockID, SurveyID)

In [None]:
print('Update Survey Options')

SurveyOptions = get_options(SurveyID)

SurveyOptions.update({
 'BackButton': 'false',
 'SaveAndContinue': 'true',
 'SurveyProtection': 'PublicSurvey',
 'BallotBoxStuffingPrevention': 'true',
 'NoIndex': 'Yes',
 'SecureResponseFiles': 'true',
 'SurveyExpiration': None,
 'SurveyTermination': 'DefaultMessage',
 'Header': '',
 'Footer': '',
 'ProgressBarDisplay': 'None',
 'PartialData': '+3 days',
 'PreviousButton': ' ← ',
 'NextButton': ' → ',
 'SkinLibrary': 'nyu',
 'SkinType': 'templated',
 'Skin': {'brandingId': None,
  'templateId': '*base',
  'overrides': {'contrast': 0.3, 'questionsContainer': {'on': True}}},
 'NewScoring': 1,
 'CustomStyles': [],
 'QuestionsPerPage': '1',
 'PageTransition': 'fade',
 'EOSMessage': '',
 'ShowExportTags': 'false',
 'CollectGeoLocation': 'false',
 'SurveyTitle': 'Online Survey Software | Qualtrics Survey Solutions',
 'SurveyMetaDescription': 'Qualtrics sophisticated online survey software solutions make creating online surveys easy. Learn more about Research Suite and get a free account today.',
 'PasswordProtection': 'No',
 'AnonymizeResponse': 'No',
 'Password': '',
 'RefererCheck': 'No',
 'RefererURL': 'http://',
 'UseCustomSurveyLinkCompletedMessage': None,
 'SurveyLinkCompletedMessage': '',
 'SurveyLinkCompletedMessageLibrary': '',
 'ResponseSummary': 'No',
 'EOSMessageLibrary': '',
 'EmailThankYou': 'false',
 'ThankYouEmailMessageLibrary': None,
 'ThankYouEmailMessage': None,
 'ValidateMessage': 'false',
 'ValidationMessageLibrary': None,
 'InactiveSurvey': 'DefaultMessage',
 'PartialDataCloseAfter': 'LastActivity',
 'ActiveResponseSet': None,
 'InactiveMessageLibrary': '',
 'InactiveMessage': '',
 'AvailableLanguages': {'EN': []},
 'SurveyLanguage': 'EN',
 'SurveyStartDate': None,
 'SurveyExpirationDate': None})

update_options(SurveyOptions, SurveyID)