In [168]:
import pandas as pd 
import numpy as np 
import re
from openai import OpenAI
import ast
from tqdm import tqdm 
import matplotlib.pyplot as plt 
import plotly.express as px
import statsmodels.formula.api as smf
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import spearmanr, pearsonr
import pickle
import itertools
import os


In [2]:
client = OpenAI(api_key=open("/Users/hi387/Desktop/open_ai_key.txt", "r").read().strip())

In [28]:
df = pd.read_csv("../data/102_tasks_with_sources_clean.csv")
df.head()

Unnamed: 0,Task Name,Source,Link,Stimulus Complex,Goal Directives
0,9 Dot Problem,Maier 1930,https://doi.org/10.1037/h0073232,Participants are given a 3x3 grid of nine dots...,"Within a limited amount of time, participants ..."
1,Abstract grid task,Adams et al. 2021,https://doi.org/10.1038/s41586-021-03380-y,"Participants are given a shared grid, in which...",The goal is to make the grid appear exactly sy...
2,Advertisement writing,Whiting et al. 2019,https://dl.acm.org/doi/pdf/10.1145/3359311,Participants were given the description of a p...,The goal was to write an online text advertise...
3,Aerospace Investment (Role-playing),Sanpietro 2019,https://www.pon.harvard.edu/daily/teaching-neg...,Participants are randomly paired for a scored ...,Participants are explicitly instructed that th...
4,Allocating resources to programs,Whiting et al. 2019,https://dl.acm.org/doi/pdf/10.1145/3359311,Participants are given a list of complex compe...,The goal is to collectively decide how to allo...


In [43]:
def get_task_string(task):
    return f"""{task['Task Name']}
    Setup: {task['Stimulus Complex']}
    Goal : {task['Goal Directives']}
    """  

    

def task_description_to_dimensions(ref_task_list):
    system_prompt = f"You will be presented with a description of {len(ref_task_list)} different tasks." + """ After looking at all of the task descriptions, determine the set of dimensions along which the tasks vary. For each of these dimensions, provide a short description of the dimension, along with how it can be operationalized. As an example, if the dimension is 'Time-solvability', the question can be 'Is a participant able to come up with a provably correct solution, assuming sufficiewnt ability, time, motivation, and resources? After providing the different dimensions that best describe these tasks, provide an overall rationale for your choice of these tasks as well. 
    
        YOUR ANSWER MUST FOLLOW THIS JSON FORMAT : {"dimensions": [{[DIMENSION 1 NAME] : [DIMENSION 1 DESCRIPTION]}, {[DIMENSION 2 NAME] : [DIMENSION 2 DESCRIPTION], ...], 'rationale' : [YOUR RATIONALE]}"""

    prompts = []
    
    for i, row in ref_task_list.iterrows():
        prompts.append(f"Task {i+1} is: {get_task_string(row)}")
        
    messages = [{"role": "system", "content": system_prompt}]
    
    for prompt in prompts:
        messages.append({"role": "user", "content": prompt})
    
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=messages,
        temperature=0)
    
    return response

In [194]:
iteration = 3

In [195]:
num_samples = 100
sample = df.sample(n = num_samples).reset_index()
sample

Unnamed: 0,index,Task Name,Source,Link,Stimulus Complex,Goal Directives
0,58,Public goods game,Tomassini and Antonioni 2020,https://doi.org/10.3389/fphy.2020.00058,Participants receive the same amount of tokens...,Participants are instructed that their goal is...
1,7,Arithmetic problem 1,Shaw 1963,https://collections.uakron.edu/digital/collect...,Participants see a hypothetical story problem ...,The goal is to pair each man with one of the m...
2,28,Euclidean traveling salesperson,Bernstein et al. 2018,https://doi.org/10.1073/pnas.1802407115,"Participants get a map with multiple cities, w...",The goal is to find and select the precise sho...
3,3,Aerospace Investment (Role-playing),Sanpietro 2019,https://www.pon.harvard.edu/daily/teaching-neg...,Participants are randomly paired for a scored ...,Participants are explicitly instructed that th...
4,84,The N light bulbs game,Yahosseini and Moussaïd 2020,https://doi.org/10.1038/s41598-020-59946-9,Participants see 10 light bulbs.\nSome of the ...,The goal is to get the most points by setting ...
...,...,...,...,...,...,...
95,52,New Recruit,"Overbeck, Neale, and Govan 2010",https://doi.org/10.1016/j.obhdp.2010.02.004,Participants are randomly paired for a negotia...,Participants are explicitly instructed that th...
96,75,Sender-Receiver game,Gneezy 2005,https://doi.org/10.1257/0002828053828662,Participants are paired for a communication-ba...,Each participant's goal is to maximize their o...
97,24,Estimating Factual Quantities,"Silver, Mellers, and Tetlock 2021",https://doi.org/10.1016/j.jesp.2021.104157,Participants are given a series of questions o...,The goal is to answer as many questions from t...
98,64,Ravens Matrices,Weidmann and Deming 2020,https://doi.org/10.3386/w27071,"Participants are presented with a pattern, whi...",The goal is to complete the pattern by identif...


In [196]:
list(sample['index'])

[58,
 7,
 28,
 3,
 84,
 34,
 51,
 97,
 16,
 99,
 36,
 53,
 66,
 32,
 65,
 95,
 82,
 43,
 37,
 61,
 83,
 80,
 29,
 10,
 56,
 47,
 73,
 71,
 79,
 57,
 25,
 78,
 12,
 54,
 62,
 35,
 77,
 67,
 45,
 87,
 15,
 69,
 50,
 38,
 89,
 49,
 68,
 14,
 40,
 33,
 18,
 6,
 96,
 30,
 90,
 13,
 98,
 0,
 39,
 19,
 9,
 31,
 72,
 70,
 100,
 8,
 48,
 76,
 27,
 42,
 60,
 23,
 11,
 5,
 22,
 59,
 93,
 21,
 44,
 101,
 20,
 85,
 4,
 46,
 26,
 74,
 17,
 94,
 41,
 81,
 88,
 55,
 1,
 2,
 86,
 52,
 75,
 24,
 64,
 92]

In [197]:
response = task_description_to_dimensions(sample)
response

ChatCompletion(id='chatcmpl-8hsd09qkW015se2B9RAteklkPMVEC', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{"dimensions": [\n    {"Complexity": "The level of difficulty involved in completing the task, which can be operationalized by measuring the number of steps, rules, or pieces of information that must be managed to achieve the task goal."},\n    {"Collaboration Requirement": "The extent to which the task requires participants to work together with others, which can be operationalized by whether the task is designed for individual or group participation and the degree of interdependence among participants."},\n    {"Cognitive Load": "The amount of mental effort required to perform the task, which can be operationalized by assessing the memory demands, the need for attentional focus, and the level of problem-solving involved."},\n    {"Time Constraint": "The degree to which the task is time-bound, which can be operationalized by t

In [198]:
response.choices[0].message.content.replace('\n', '')

'{"dimensions": [    {"Complexity": "The level of difficulty involved in completing the task, which can be operationalized by measuring the number of steps, rules, or pieces of information that must be managed to achieve the task goal."},    {"Collaboration Requirement": "The extent to which the task requires participants to work together with others, which can be operationalized by whether the task is designed for individual or group participation and the degree of interdependence among participants."},    {"Cognitive Load": "The amount of mental effort required to perform the task, which can be operationalized by assessing the memory demands, the need for attentional focus, and the level of problem-solving involved."},    {"Time Constraint": "The degree to which the task is time-bound, which can be operationalized by the presence of a time limit and the strictness of that time limit."},    {"Quantitative Reasoning": "The extent to which the task involves numerical computation, estima

In [199]:
response_dict = ast.literal_eval(response.choices[0].message.content.replace('\n', ''))

In [200]:
response_dict['tasks'] = list(sample['index'])

In [201]:
import json
path = '../data/task_to_dimension_responses/' + str(num_samples) + '_tasks_' + str(iteration) + '.json'
with open(path, 'w') as f:
    json.dump(response_dict, f, indent = 4)

In [202]:
files = os.listdir('../data/task_to_dimension_responses/')

path = '../data/task_to_dimension_responses/'

num_iterations = []
num_tasks = []
dimensions = []
rationales = []


for file in files:
    
    data = json.load(open(path + file))
    num_tasks_file = file.split('_')[0]
    iteration_file = file.split('_')[2].split('.')[0]
    for dimension in data['dimensions']:
        num_tasks.append(int(num_tasks_file))
        num_iterations.append(iteration_file)
        dimensions.append(list(dimension.keys())[0])
        rationales.append(list(dimension.values())[0])
        
df_data = pd.DataFrame({'num_tasks' : num_tasks, 'iteration_num' : num_iterations, 'dimension' : dimensions, 'operationalization' : rationales})
df_data.head()
    

Unnamed: 0,num_tasks,iteration_num,dimension,operationalization
0,25,2,Complexity,The level of difficulty involved in completing...
1,25,2,Collaboration,Whether the task requires participants to work...
2,25,2,Creativity,The extent to which the task requires novel th...
3,25,2,Time Pressure,The degree to which a task is constrained by t...
4,25,2,Cognitive Load,The amount of mental effort and working memory...


In [207]:
%%capture output
for num_task in df_data.sort_values(by = 'num_tasks', ascending = True).num_tasks.unique():
    sliced = df_data[df_data.num_tasks == num_task]
    print('Number of tasks tested per iteration = ', num_task)
    print('Number of iterations', sliced['iteration_num'].nunique())
    print('Number of dimensions per iteration', sliced.groupby('iteration_num')['dimension'].nunique())
    print('Num unique dimensions total', sliced['dimension'].nunique())
    print('Dimensions : ', sliced.sort_values(by = 'dimension')['dimension'].unique())
    
    print('\n'*3)

In [208]:
with open('../data/task_to_dimension_responses/summary.txt', 'w') as f:
    f.write(output.stdout)