References:

- https://api.qualtrics.com/docs
- https://api.qualtrics.com/reference
- https://researcher-help.prolific.co/hc/en-gb/articles/360009220993-Recording-participant-IDs-in-your-study-survey
- https://blog.mturk.com/getting-great-survey-results-from-mturk-and-qualtrics-be1704ff9786
- https://www.cloudresearch.com/resources/blog/workerid-and-all-mturk-fields-sent-to-qualtrics/
- https://medium.com/@jlroo/integrating-a-qualtrics-survey-with-amazon-mechanical-turk-561cdeebe239
- https://blog.mturk.com/tutorial-understanding-requirements-and-qualifications-99a26069fba2

In [2]:
import socket
import pandas as pd
from timeit import default_timer as timer
import os
import requests
import json
import numpy as np
import pyarrow.parquet as pq
from glob import glob

print('Hostname:', socket.gethostname())
if 'samuel' in socket.gethostname().lower():
    path_to_data='../../data'
else:
    path_to_data='/scratch/spf248/twitter/data'
    
# Setting user Parameters
with open(os.path.join(path_to_data,'keys/qualtrics/apiToken'),'r') as f:
    apiToken = eval(f.readline())
dataCenter = "nyu.ca1"
SurveyName = "labor-market-tweets"
SurveySourceID = "SV_4VMcMBs2MXU1tdz"
QuestionTemplateID = "QID1"
QuestionConsentID = "QID2"
QuestionWorkerID = "QID3"
QuestionCompletionID = "QID4"
QuestionDescriptionID = "QID5"

country_code = "US"
n_workers = 20 # Number of workers
block_size = 50 # Number of tweets per worker
print(country_code)
print('# n_workers:', n_workers)
print('block_size:', block_size)

Hostname: Samuels-MBP
US
# n_workers: 20
block_size: 50


# Import Sample

In [3]:
checks={'US':['I lost my job today.','I got hired today.']}[country_code]
print('Attention Checks:\n')
print('\n'.join(checks))

Attention Checks:

I lost my job today.
I got hired today.


In [4]:
n_tweets = n_workers*(block_size-len(checks))//2 
print('# Tweets (2 workers per tweets + 2 attention checks):', n_tweets)

tweets=pq.ParquetDataset(glob(os.path.join(path_to_data,'classification',country_code,'labeling','*.parquet'))).read().to_pandas()
tweets=tweets.sample(n=n_tweets,random_state=0)
print('# Unique Tweets:', tweets.drop_duplicates('tweet_id').shape[0])

# Tweets (2 workers per tweets + 2 attention checks): 480
# Unique Tweets: 480


In [5]:
keywords=sorted(tweets['keyword'].unique())
print('Keywords:\n')
print('\n'.join(keywords))

Keywords:

fired
hired
job
laid_off
position
quit
random
unemployed
work


In [6]:
targets=sorted(tweets['target'].unique())
print('Targets:\n')
print('\n'.join(targets))

Targets:

random
target_anyone_hiring
target_here_is_a_job_opportunity_you_might_be_interested_in
target_i_am_currently_not_working
target_i_am_searching_for_a_new_position
target_i_got_hired_today
target_i_lost_my_job_today
target_i_recently_started_working_at_my_new_job
target_i_was_fired_earlier_this_week
target_looking_for_a_new_position
target_now_i_am_unemployed


In [31]:
tweets_0=tweets.sample(frac=1,random_state=0).set_index('tweet_id')['text']
tweets_0.index=tweets_0.index.map(lambda x:x+'-v0')
tweets_1=tweets.sample(frac=1,random_state=1).set_index('tweet_id')['text']
tweets_1.index=tweets_1.index.map(lambda x:x+'-v1')

# Split tweets into chunks with two labels per tweet
chunks=np.array_split(pd.concat([tweets_0,tweets_1]),n_workers)

In [32]:
# Add Attention Checks
tweets_to_label=pd.concat([chunk.append(pd.Series({
'check-0-worker-'+str(i):checks[0],
'check-1-worker-'+str(i):checks[1]})).sample(frac=1,random_state=0) 
for i,chunk in enumerate(chunks)])

In [33]:
print('# Tweets to label:', tweets_to_label.shape[0])

# Tweets to label: 1000


# Library

In [8]:
def create_survey(SurveyName="MySurvey", apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions".format(
        dataCenter)
    
    headers = {
        "x-api-token": apiToken,
        "content-type": "application/json",
        "Accept": "application/json"
    }

    data = {
        "SurveyName": SurveyName,
        "Language": "EN",
        "ProjectCategory": "CORE"
    }

    response = requests.post(baseUrl, json=data, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
    
    SurveyID = json.loads(response.text)['result']['SurveyID']
    DefaultBlockID = json.loads(response.text)['result']['DefaultBlockID']

    return SurveyID, DefaultBlockID

In [9]:
def get_options(SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/options".format(
        dataCenter, SurveyID)
    
    headers = {
        "x-api-token": apiToken,
    }

    response = requests.get(baseUrl, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
        
    return json.loads(response.text)["result"]

In [10]:
def update_options(SurveyOptions, SurveyID, apiToken=apiToken, dataCenter=dataCenter):
    
    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/options".format(
        dataCenter, SurveyID)
    
    headers = {
    'accept': "application/json",
    "content-type": "application/json",
    "x-api-token": apiToken,
    }

    response = requests.put(baseUrl, json=SurveyOptions, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])

In [11]:
def get_flow(SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/flow".format(
        dataCenter, SurveyID)
    
    headers = {
        "x-api-token": apiToken,
    }

    response = requests.get(baseUrl, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
        
    return json.loads(response.text)["result"]

In [12]:
def update_flow(SurveyFlow, SurveyID, apiToken=apiToken, dataCenter=dataCenter):
    
    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/flow".format(
        dataCenter, SurveyID)
    
    headers = {
    'accept': "application/json",
    "content-type": "application/json",
    "x-api-token": apiToken,
    }

    response = requests.put(baseUrl, json=SurveyFlow, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])

In [13]:
def create_block(BlockName, SurveyID, apiToken=apiToken, dataCenter=dataCenter):
    
    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/blocks".format(
        dataCenter, SurveyID)

    headers = {
       'accept': "application/json",
       'content-type': "application/json",
       "x-api-token": apiToken,
    }

    BlockTemplate = {
    "Type": "Standard",
    "Description": BlockName,
    }
    
    response = requests.post(baseUrl, json=BlockTemplate, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
    
    BlockID = json.loads(response.text)['result']['BlockID']
    FlowID = json.loads(response.text)['result']['FlowID']
    
    return BlockID, FlowID

In [14]:
def get_block(BlockID, SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/blocks/{2}".format(
        dataCenter, SurveyID, BlockID)
    
    headers = {
        "x-api-token": apiToken,
    }

    response = requests.get(baseUrl, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
        
    return json.loads(response.text)["result"]

In [15]:
def update_block(BlockData, BlockID, SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/blocks/{2}".format(
        dataCenter, SurveyID, BlockID)

    headers = {
       'accept': "application/json",
       'content-type': "application/json",
       "x-api-token": apiToken,
    }

    response = requests.put(baseUrl, json=BlockData, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])

In [16]:
def create_question(QuestionData, SurveyID, apiToken=apiToken, dataCenter=dataCenter):
    
    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/questions".format(
        dataCenter, SurveyID)

    headers = {
       'accept': "application/json",
       'content-type': "application/json",
       "x-api-token": apiToken,
    }

    response = requests.post(baseUrl, json=QuestionData, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
    
    return json.loads(response.text)['result']['QuestionID']

In [17]:
def get_question(QuestionID, SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/questions/{2}".format(
        dataCenter, SurveyID, QuestionID)
    
    headers = {
        "x-api-token": apiToken,
    }

    response = requests.get(baseUrl, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])
        
    return json.loads(response.text)["result"]

In [18]:
def update_question(QuestionData, QuestionID, SurveyID, apiToken=apiToken, dataCenter=dataCenter):

    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/questions/{2}".format(
        dataCenter, SurveyID, QuestionID)

    headers = {
       'accept': "application/json",
       'content-type': "application/json",
       "x-api-token": apiToken,
    }

    response = requests.put(baseUrl, json=QuestionData, headers=headers)
    
    if json.loads(response.text)["meta"]["httpStatus"] != '200 - OK':
        print(json.loads(response.text)["meta"]["httpStatus"])

# Create Survey

In [19]:
print('Create New Survey')
start = timer()

SurveyID, BlockID = create_survey(SurveyName)

print("Done in", round(timer()-start), "sec")

Create New Survey
Done in 2 sec


In [20]:
print('Fetch Template Question')
QuestionTemplateData = get_question(QuestionTemplateID, SurveySourceID)

Fetch Template Question


In [21]:
start = timer()
print("Create Questions")

for i,(tweet_id, tweet) in enumerate(tweets_to_label.iteritems()):
    
    if i%block_size==0:
        
        BlockData = get_block(BlockID, SurveyID)
        BlockData['Type'] = 'Standard'
        update_block(BlockData, BlockID, SurveyID)

        print('Block', i//block_size+1)
        BlockID, FlowID = create_block("Worker "+str(i//block_size+1), SurveyID)
        
        BlockData = get_block(BlockID, SurveyID)
        BlockData['Type'] = 'Default'
        update_block(BlockData, BlockID, SurveyID)

    text='Please answer the following questions about the following tweet:\n\n"'+tweet+'""'
    QuestionID = create_question(QuestionTemplateData, SurveyID)
    QuestionData = get_question(QuestionID, SurveyID)
    QuestionData['QuestionText'] = tweet
    QuestionData['QuestionDescription'] = tweet
    QuestionData['QuestionText_Unsafe'] = tweet
    QuestionData['DataExportTag'] = 'ID_'+tweet_id
    update_question(QuestionData, QuestionID, SurveyID)
    
    if i%block_size==0:
        
        BlockData = get_block(BlockID, SurveyID)
        BlockData['Options'] = {
        "BlockLocking": "false",
        "RandomizeQuestions": "false",
        "BlockVisibility": "Collapsed",
        }
        update_block(BlockData, BlockID, SurveyID)
        
print("Done in", round(timer()-start), "sec")

Create Questions
Block 1
Block 2
Block 3
Block 4
Block 5
Block 6
Block 7
Block 8
Block 9
Block 10
Block 11
Block 12
Block 13
Block 14
Block 15
Block 16
Block 17
Block 18
Block 19
Block 20
Done in 1602 sec


In [22]:
BlockData = get_block(BlockID, SurveyID)
BlockData['Type'] = 'Standard'
update_block(BlockData, BlockID, SurveyID)

print('Create Completion Block')
BlockID, FlowID = create_block("Completion", SurveyID)

BlockData = get_block(BlockID, SurveyID)
BlockData['Type'] = 'Default'
update_block(BlockData, BlockID, SurveyID)

print('Create Completion Question')
QuestionCompletionData = get_question(QuestionCompletionID, SurveySourceID)
QuestionID = create_question(QuestionCompletionData, SurveyID)
QuestionData = get_question(QuestionID, SurveyID)
QuestionData['DataExportTag'] = 'QIDCompletion'
update_question(QuestionData, QuestionID, SurveyID)

print('Close Block')
BlockData = get_block(BlockID, SurveyID)
BlockData['Options'] = {
"BlockLocking": "false",
"RandomizeQuestions": "false",
"BlockVisibility": "Collapsed",
}
update_block(BlockData, BlockID, SurveyID)

Create Completion Block
Create Completion Question
Close Block


In [23]:
SurveyFlow = get_flow(SurveyID)

print('Randomize Survey Flow')
# Create a Randomizer Drawing One Block At Random Except Intro And Completion Block
Randomizer = {
'Type': 'BlockRandomizer',
'FlowID': 'FL_'+str(max([int(el['FlowID'].split('_')[1]) for el in SurveyFlow['Flow']])+1),
'SubSet': '1',
'EvenPresentation': True,
'Flow':SurveyFlow['Flow'][1:-1]}

SurveyFlow['Flow'] = [
SurveyFlow['Flow'][0],
Randomizer,
SurveyFlow['Flow'][-1],
]

SurveyFlow['Properties']['Count']+=1
SurveyFlow['Properties'].update({'RemovedFieldsets': []})

print('Embbeded Worker ID')
EmbeddedData = {'Type': 'EmbeddedData',
 'FlowID': 'FL_'+str(max([int(el['FlowID'].split('_')[1]) for el in SurveyFlow['Flow']])+1),
 'EmbeddedData': [{'Description': 'Random ID',
   'Type': 'Custom',
   'Field': 'Random ID',
   'VariableType': 'String',
   'DataVisibility': [],
   'AnalyzeText': False,
   'Value': '${rand://int/1000000000:9999999999}'}]}

SurveyFlow['Flow'] = [EmbeddedData]+SurveyFlow['Flow']
SurveyFlow['Properties']['Count']+=1

update_flow(SurveyFlow, SurveyID)

Randomize Survey Flow
Embbeded Worker ID


In [24]:
# Switch Default Block From Current ...
BlockData = get_block(BlockID, SurveyID)
BlockData['Type'] = 'Standard'
update_block(BlockData, BlockID, SurveyID)

# ... to Intro
BlockID = SurveyFlow['Flow'][1]['ID']
BlockData = get_block(BlockID, SurveyID)
BlockData['Type'] = 'Default'
update_block(BlockData, BlockID, SurveyID)

print('Add Consent Question')
QuestionConsentData = get_question(QuestionConsentID, SurveySourceID)
QuestionID = create_question(QuestionConsentData, SurveyID)
QuestionData = get_question(QuestionID, SurveyID)
QuestionData['DataExportTag'] = 'QIDConsent'
update_question(QuestionData, QuestionID, SurveyID)

print('Add Worker ID Question')
QuestionWorkerData = get_question(QuestionWorkerID, SurveySourceID)
QuestionID = create_question(QuestionWorkerData, SurveyID)
QuestionData = get_question(QuestionID, SurveyID)
QuestionData['DataExportTag'] = 'QIDWorker'
update_question(QuestionData, QuestionID, SurveyID)

print('Add Description Question')
QuestionDescriptionData = get_question(QuestionDescriptionID, SurveySourceID)
QuestionID = create_question(QuestionDescriptionData, SurveyID)
QuestionData = get_question(QuestionID, SurveyID)
QuestionData['DataExportTag'] = 'QIDDescription'
update_question(QuestionData, QuestionID, SurveyID)

print('Close Intro Block')
BlockData = get_block(BlockID, SurveyID)
BlockData['Options'] = {
"BlockLocking": "false",
"RandomizeQuestions": "false",
"BlockVisibility": "Collapsed",
}
BlockData['Description'] = 'Intro'
update_block(BlockData, BlockID, SurveyID)

Add Consent Question
Add Worker ID Question
Add Description Question
Close Intro Block


In [25]:
print('Update Survey Options')

SurveyOptions = get_options(SurveyID)

SurveyOptions.update({
 'BackButton': 'false',
 'SaveAndContinue': 'true',
 'SurveyProtection': 'PublicSurvey',
 'BallotBoxStuffingPrevention': 'true',
 'NoIndex': 'Yes',
 'SecureResponseFiles': 'true',
 'SurveyExpiration': None,
 'SurveyTermination': 'DefaultMessage',
 'Header': '',
 'Footer': '',
 'ProgressBarDisplay': 'None',
 'PartialData': '+3 days',
 'PreviousButton': ' ← ',
 'NextButton': ' → ',
 'SkinLibrary': 'nyu',
 'SkinType': 'templated',
 'Skin': {'brandingId': None,
  'templateId': '*base',
  'overrides': {'contrast': 0.3, 'questionsContainer': {'on': True}}},
 'NewScoring': 1,
 'CustomStyles': [],
 'QuestionsPerPage': '1',
 'PageTransition': 'fade',
 'EOSMessage': '',
 'ShowExportTags': 'false',
 'CollectGeoLocation': 'false',
 'SurveyTitle': 'Online Survey Software | Qualtrics Survey Solutions',
 'SurveyMetaDescription': 'Qualtrics sophisticated online survey software solutions make creating online surveys easy. Learn more about Research Suite and get a free account today.',
 'PasswordProtection': 'No',
 'AnonymizeResponse': 'No',
 'Password': '',
 'RefererCheck': 'No',
 'RefererURL': 'http://',
 'UseCustomSurveyLinkCompletedMessage': None,
 'SurveyLinkCompletedMessage': '',
 'SurveyLinkCompletedMessageLibrary': '',
 'ResponseSummary': 'No',
 'EOSMessageLibrary': '',
 'EmailThankYou': 'false',
 'ThankYouEmailMessageLibrary': None,
 'ThankYouEmailMessage': None,
 'ValidateMessage': 'false',
 'ValidationMessageLibrary': None,
 'InactiveSurvey': 'DefaultMessage',
 'PartialDataCloseAfter': 'LastActivity',
 'ActiveResponseSet': None,
 'InactiveMessageLibrary': '',
 'InactiveMessage': '',
 'AvailableLanguages': {'EN': []},
 'SurveyLanguage': 'EN',
 'SurveyStartDate': None,
 'SurveyExpirationDate': None})

update_options(SurveyOptions, SurveyID)

Update Survey Options
