In [28]:
from sqlalchemy import create_engine, MetaData, Table
import json
import pandas as pd

In [29]:
import warnings
warnings.filterwarnings("ignore")

In [30]:
db_url = "..."
table_name = "JOR"

In [31]:
data_column_name = 'datastring'

# boilerplace sqlalchemy setup
engine = create_engine(db_url)
metadata = MetaData()
metadata.bind = engine
table = Table(table_name, metadata, autoload=True)

# make a query and loop through
s = table.select()

In [32]:
rows = s.execute()

In [33]:
#status codes of subjects who completed experiment
statuses=[0,1,2,3,4,5,6,7]

# exclusively these workerids
workerids = ["A1QUQ0TV9KVD4C"]

In [34]:
data = []
conditions = []

for row in rows:
    # only use subjects who completed experiment and aren't excluded
    if (row['status'] in statuses) and (row['workerid'] in workerids):
        conditions.append({
                'uniqueid': row['uniqueid'],
                'beginhit': row['beginhit'],
                'endhit': row['endhit'],
                'status': row['status'],
                'counterbalance': row['counterbalance'],
                'workerid': row['workerid'],
                'codeversion': row['codeversion']
            })
        data.append(row[data_column_name])

In [40]:
condition_frame = pd.DataFrame(conditions)
condition_frame.sort_values('beginhit')

Unnamed: 0,uniqueid,beginhit,endhit,status,counterbalance,workerid,codeversion
0,A1QUQ0TV9KVD4C:3DL65MZB8EMN1MVMTLR3CLOF4WQCED,2022-03-21 20:19:02,,6,26,A1QUQ0TV9KVD4C,2.1


In [36]:
# Now we have all participant datastrings in a list.
# Let's make it a bit easier to work with:

# parse each participant's datastring as json object
# and take the 'data' sub-object
subject_data = []
for subject_json in data:
    try:
        subject_dict = json.loads(subject_json)
        subject_data.append(subject_dict['data'])
    except:
        continue

In [37]:
# insert uniqueid field into trialdata in case it wasn't added
# in experiment:
for part in subject_data:
    for record in part:
        record['trialdata']['uniqueid'] = record['uniqueid']

In [38]:
# flatten nested list so we just have a list of the trialdata recorded
# each time psiturk.recordTrialData(trialdata) was called.
trialdata = []
for part in subject_data:
    for record in part:
        trialdata.append(record['trialdata'])

In [39]:
# Put all subjects' trial data into a dataframe object from the
# 'pandas' python library: one option among many for analysis
df = pd.DataFrame(trialdata)
df

Unnamed: 0,rt,responses,type,trial_type,trial_index,time_elapsed,internal_node_id,uniqueid,stimulus,key_press,button_pressed
0,5801.2,"{""name"":""mary""}",NAME,survey-html-form,0,5846,0.0-0.0-0.0,A1QUQ0TV9KVD4C:3DL65MZB8EMN1MVMTLR3CLOF4WQCED,,,
1,3285.0,,,html-keyboard-response,1,9335,0.0-1.0,A1QUQ0TV9KVD4C:3DL65MZB8EMN1MVMTLR3CLOF4WQCED,Welcome to the <strong>Judgment of Recency</st...,,
2,10396.0,,,html-button-response,2,19947,0.0-2.0-0.0,A1QUQ0TV9KVD4C:3DL65MZB8EMN1MVMTLR3CLOF4WQCED,<p class='inst-justified'>According to Wikiped...,,3.0
3,1613.7,,,html-button-response,3,21562,0.0-2.0-1.0,A1QUQ0TV9KVD4C:3DL65MZB8EMN1MVMTLR3CLOF4WQCED,<p class='inst'>How often do you realize you'v...,,0.0
