In [8]:
from collections import defaultdict

import numpy as np
import pandas as pd

In [9]:
#get answers from csv
a_dict = dict()
answers = pd.read_csv('answers.csv')
questions = answers.loc[:,"Example Scenario in Survey"]
correct = answers.loc[:,"Correct Answer"]
for i in range(0, len(questions)):
    a_dict[str(questions[i])] = str(correct[i])

In [11]:
#dataframe raw responses
df = pd.read_csv('responses.csv')
#optional questions should not be collected
ignore = {"Q629", "Q1051", "Q227", "Q401", "Q1052", "Q549", "Q1053", "Q200", "Q401.1", "Q254.1", "Q202", "Q229"}
df = df.drop(labels=1, axis=0)

#match question ids to question text
descriptionsDict = dict()
start = df.columns.get_loc("Q176")
end = df.columns.get_loc("QID1054")
for i in range(start, end + 1, 1):
    key = df.columns[i]
    if key in ignore:
        continue
    else:
        descriptionsDict[key] = df.iat[0, i]

In [12]:
#categorize questions by type
section1 = set()
section2 = set()
section3 = set()
for question in descriptionsDict:
    if 'attention check. Please go ahead and select the option that begins with "N".' in descriptionsDict[question]:
        section1.add(question)
    elif 'attention check. Please go ahead and select the option that begins with "H".' in descriptionsDict[question]:
        section2.add(question)
    elif 'attention check. Please go ahead and select the option that begins with "P".' in descriptionsDict[question]:
        section3.add(question)
    elif "collection and sharing" in descriptionsDict[question]:
        section1.add(question)
    elif "Our app collects:" in descriptionsDict[question]:
        section2.add(question)
    elif "Why" in descriptionsDict[question]:
        section3.add(question)
typeDict = dict()
count = 0
start = df.columns.get_loc("Q176")
end = df.columns.get_loc("QID1054")
for i in range(start, end + 1, 1):
    key = df.columns[i]
    if key in ignore:
        continue
    if key in section1:
        entry = 1
    elif key in section2:
        entry = 2
    elif key in section3:
        entry = 3
    typeDict[key] = entry

In [13]:
#get correct answers for each question (based on ID)
correctAnswerDict = dict()
for question in descriptionsDict:
    search_key = descriptionsDict[question]
    if "Data usage scenario:" in search_key:
        search_key = search_key.replace("Data usage scenario:", "")
    if "Our app collects:" in search_key:
        search_key = search_key.replace("Our app collects:", "")
    if "Select" in search_key:
        end = search_key.index("Select")
    elif "Why" in search_key:
        end = search_key.index("Why")
    elif "Is" in search_key:
        end = search_key.index("Is")
    else:
        end = len(search_key)
    search_key = search_key[1:end-1].strip()
    if search_key[-1] == ".":
        search_key = search_key[:-1]
    if search_key in a_dict:
        correctAnswerDict[question] = a_dict[search_key]
    else:
        for key in a_dict:
            if search_key in key:
                    correctAnswerDict[question] = a_dict[key]
                    break

In [15]:
#potential answers, for indexing
collection = ['Collected', 'Shared', 'Both', 'Neither']
types = ['Location', 'Personal Information', 'Financial Information', 'Health and Fitness', 'Messages', 'Photos or Videos', 'Audio Files', 'Files and docs', 'Calendar', 'Contacts', 'App Activity', 'Web Browsing', 'App Information and Performance', 'Device or Other Identifiers', 'None of the Above']
purpose = ['App functionality', 'Analytics', 'Developer communications', 'Advertising or marketing', 'Fraud prevention, security, and compliance', 'Personalization', 'Account management', 'None of the Above']
#calculating index of correct answer
answerDict = defaultdict(str)
indexGivenDict = defaultdict(str)
indexExpectedDict = defaultdict(str)
attentionChecks = {"Q203", "Q326", "Q892", "Q176", "Q554", "Q871", "Q230", "Q474", "Q944"}
rowIds = []
start = df.columns.get_loc("Q176")
end = df.columns.get_loc("QID1054")

#participants who did not pass the attention check
remove = set()
count = 0
for i in range(start, end+1, 1):
    for j in range(1, len(df.iloc[:, 0]), 1):
        participantId = df.iat[j, df.columns.get_loc("Q162")]
        questionId = df.columns[i]
        if questionId in ignore:
            continue
        rowId = (participantId, questionId)
        rowIds.append(rowId)
        answer = str(df.iat[j, i])
        if answer == "nan":
            answerDict[rowId] = ""
            continue
        if isinstance(answer, str):
            answer = answer.strip()
        if answer in collection or answer in types or answer in purpose:
            answer = answer
        else:
            if typeDict[questionId] == 1:
                for word in collection:
                    if answer[0:6] in word:
                        answer = word
            elif typeDict[questionId] == 2:
                for word in types:
                    if answer[0:9] in word:
                        answer = word
            elif typeDict[questionId] == 3:
                for word in purpose:
                    if answer[0:9] in word:
                        answer = word
        if answer not in collection and answer not in types and answer not in purpose:
            continue
        answerDict[rowId] = str(answer)
        #remove questions that don't pass the attentionChecks
        if questionId in attentionChecks:
            if answer != correctAnswerDict[questionId]:
                remove.add(participantId)
        if typeDict[questionId] == 1:
            index_given = collection.index(answer)
            index_expected = collection.index(correctAnswerDict[questionId])
        elif typeDict[questionId] == 2:
            index_given = types.index(answer)
            index_expected = types.index(correctAnswerDict[questionId])
        else:
            index_given = purpose.index(answer)
            index_expected = purpose.index(correctAnswerDict[questionId])
        indexGivenDict[rowId] = index_given
        indexExpectedDict[rowId] = index_expected

In [16]:
#get condition
conditionDict = dict()
for j in range(0, len(df.iloc[:, 0]), 1):
    conditionDict[df.iat[j, df.columns.get_loc("Q162")]] = df.iat[j, -1]
conditionDict.pop('What is your Prolific ID?')

'condition'

In [17]:
#generating dataframe columns
results = pd.DataFrame()
participantIds = [p for p, q in rowIds]
questionIds = [q for p, q in rowIds]
descriptions = [descriptionsDict[q] for q in questionIds]
types = [typeDict[q] for q in questionIds]
answers = [answerDict[(p, q)] for p, q in zip(participantIds, questionIds)]
correctAnswers = [correctAnswerDict[q] for q in questionIds]
conditions = [conditionDict[p] for p in participantIds]
givenIndices = [indexGivenDict[(p, q)] for p, q in zip(participantIds, questionIds)]
expectedIndices = [indexExpectedDict[(p, q)] for p, q in zip(participantIds, questionIds)]

results['Question ID'] = questionIds
results['Participant ID'] = participantIds
results['Description'] = descriptions
results['Answer'] = answers
results['Correct Answer'] = correctAnswers
results['Question Type'] = types
results['Prompt Condition'] = conditions
results['Answer Index'] = givenIndices
results['Correct Answer Index'] = expectedIndices
results.to_csv("weird.csv")
# filter out rows with empty answers (i.e., not selected in the study)
results = results[results["Answer"] != ""]

#remove rows that don't pass the attentionChecks
results = results[~results["Participant ID"].isin(remove)]
results['Correctness'] = (results['Answer'] == results['Correct Answer']).apply(lambda v: 1 if v else 0)

results['Order Difference'] =\
    (results['Answer Index'] - results['Correct Answer Index']).apply(lambda v: 0 if v == 0 else 1 if v > 0 else -1)

In [None]:
#export to csv
results.to_csv("output.csv")