In [81]:
import pandas as pd
import os
from expanalysis.utils import get_pages
import json
import operator
import itertools
from collections import defaultdict

In [82]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None) 

In [83]:
def correct_reverse_coding(row):
    if row['id'] == 'ai_survey_5_options':
        return 6 - int(row['response'])
    else:
        return int(row['response'])
    
def map_value_to_option(row):
    value = int(row['response'])
    options_list = row['options']
    text = row['text']

    for item in options_list:
        
        if int(item['value'].strip()) == value:
            return f"{text} - Selected Option: {item['text']}"
    return None 

In [84]:
def get_results(url=None,access_token=None):
    '''get_results is a wrapper for get_url, to first check that the user has provided an access token
    :param url: the expfactory/results/api url
    :param access_token: a token obtained at expfactory.org/token when the user is logged in
    '''
    if url == None:
        url = "http://expfactory.org/new_api/results/84/"
    if access_token != None:
        return get_pages(url=url,access_token=access_token)
    else:
        print("You must provide an access_token to authenticate to the API.")

In [85]:
battery_id =  ['259']

In [86]:
with open('access_token.txt', 'r') as file:
    access_token = file.read()
url = "http://expfactory.org/new_api/results/84/" # defualt url

In [87]:
# creates a results array of specific batteries
results = []
for id in battery_id:
    single_result = get_results(url=f"http://expfactory.org/new_api/results/{id}/", access_token=access_token)
    results.append(single_result)


Retrieving http://expfactory.org/new_api/results/259/
Retrieving http://expfactory.org/new_api/results/259/?limit=10&offset=10
Retrieving http://expfactory.org/new_api/results/259/?limit=10&offset=20
Retrieving http://expfactory.org/new_api/results/259/?limit=10&offset=30
Found 34 results!


In [88]:
################################################################## 
#                       Define variables     
################################################################## 
file_path = "/Users/jahrios/Documents/Stanford/Poldrack Lab/Python Code/network_output"

surveys = ['demographics_survey__stanford', 'ai_survey']

subject_info = pd.read_csv('shared_control_subjects.csv')


In [89]:
# flattening the list
temp_results = [item for sublist in results for item in sublist]
results = temp_results

In [90]:
# we are finding all of the unique workers in this line of code
workers = []
#for i in range(0, len(results)):
for i in range(0, len(results)):
    worker_id = results[i]['worker']['id']
    workers.append(worker_id)
    
unique_workers = set(workers)

In [91]:
# after we find all unique workers, we are going to check which experiments and surveys that worker has done
all_subs = []
num_unique_worker = len(unique_workers)
for i in range(0, num_unique_worker):
    current_worker = unique_workers.pop()
    if not os.path.exists(file_path + '/' + current_worker):
        os.makedirs(file_path + '/' + current_worker)
    tasks_completed = []    
    tasks_completed.append(current_worker)
    for x in range(0, len(results)):
        if current_worker == results[x]['worker']['id']:
            if results[x]['completed'] == True:
                completed_task = results[x]['experiment']['exp_id']
                tasks_completed.append(completed_task)
                
    all_subs.append(tasks_completed)

In [92]:
# gathering all info from all subs into a list
# you can see into this list to see which subs completed what  
info_array = []
for i in range(0,len(all_subs)):
    current_worker = all_subs[i][0]
    single_sub_task_and_survey = []
    info_array.append([current_worker])
    for x in range(1,len(all_subs[i])): 
        current_task = all_subs[i][x]
        single_sub_task_and_survey.append(current_task)
    info_array[i].append(single_sub_task_and_survey)

In [93]:
# outputs which workers have completed everything
workers_who_have_completed = []    
for i in range(0,len(info_array)):
    current_worker = info_array[i][0]
    current_completed_tasks = info_array[i][1]
    if set(current_completed_tasks) == set(surveys):
        workers_who_have_completed.append(current_worker)
        
print('There are ' + str(len(workers_who_have_completed)) + ' subject(s) who have completed the network battery.  They are:')
print(workers_who_have_completed)

There are 15 subject(s) who have completed the network battery.  They are:
['323a83de-05e3-42dd-a975-4d8407f8bf35', '686fb9b6-423d-4130-82ed-dd94c65b7c71', '641d914a-7477-480a-9951-c34376399207', '518bb4c0-1eb3-454e-9c07-57b7f14a7bc2', '9d16d983-101a-464c-9056-d0526355d134', '2a894bdd-3a63-4490-82a8-60aa04ff2891', '69383d84-3462-4b90-b37d-5bc10da0cf1f', 'f86b2c13-eb4c-47a3-bb45-68b01088e20c', '63ba282e-0156-45ae-8070-677e8b43be09', '56a06a94-cbf4-4a97-9b11-c903cc44669d', '22285773-a87e-4a68-a101-66bfd5cca2ed', '3854412c-03ce-4be4-82b8-d548ed62c787', '80e2dc89-dc36-4c85-9028-07a45c0fa569', 'a14a7c9d-937a-436c-84f1-cd979080ec0b', '06fbed4e-909b-4183-86d8-db2d58716fd5']


In [94]:
survey_data = {}

for i in results:
    if i['experiment']['exp_id'] == 'ai_survey':
        if i['worker']['id'] in list(subject_info['expfactory_id']):
            worker_id = i['worker']['id']
            survey_data[worker_id] = pd.DataFrame(i['data']).T

In [95]:
worker_to_subject = dict(zip(subject_info['expfactory_id'], subject_info['subject_id']))
subject_data = {worker_to_subject[worker_id]: data for worker_id, data in survey_data.items()}

In [96]:
survey_results = {}
for subject in subject_data.keys():
    
    print(subject)
    if subject_data[subject].empty:
        survey_results[subject] = {'data': df, 'result': None}
    else:
        df = subject_data[subject]
        df['corrected_value'] = df.apply(correct_reverse_coding, axis=1)
        df['results'] = df.apply(map_value_to_option, axis=1)

        result = df['corrected_value'].mean()

        survey_results[subject] = {'data': df, 'result': result}

s019
s018
s013
s011
s007
s017
s014
s016
s012
s010
s015
s008
s009
s006
s005
s004


In [101]:
result_df = pd.DataFrame({'survey_result': {subject: details['result'] for subject, details in survey_results.items()}})

In [103]:
result_df.to_csv('output/ai_survey_metrics.csv')

In [102]:
result_df

Unnamed: 0,survey_result
s004,2.142857
s005,3.0
s006,2.714286
s007,3.428571
s008,4.0
s009,3.142857
s010,1.857143
s011,1.571429
s012,2.428571
s013,3.428571
