In [1]:
import openai
import pandas as pd
import json
import itertools
import os
import sys
import logging
import concurrent.futures
from datetime import datetime
from tenacity import RetryCallState
import jsonlines
import random
import numpy as np
import re

from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
    before_sleep_log
)  # for exponential backoff

#####################################
## GLOBAL CONSTANTS ##
with open('../../creds/openai_creds.json', 'r') as f:
    data = json.load(f)
RANDOM_SEED = 416
API_KEY = data['api_key']
example_df = pd.read_csv("../../data/gt_main2.csv")
AUT_ITEMS = ["box", "fork", "lightbulb", "spoon", "table"]
PROMPTS = {
    "zero_shot": "What are some creative uses for [OBJECT_NAME]? The goal is to come up with a creative idea, which is an idea that strikes people as clever, unusual, interesting, uncommon, humorous, innovative, or different. List [N] creative uses for [OBJECT_NAME].",

    "implicit": "What are some creative uses for [OBJECT_NAME]? The goal is to come up with a creative idea, which is an idea that strikes people as clever, unusual, interesting, uncommon, humorous, innovative, or different. Here are example creative uses: [EXAMPLES] Based on the examples, list [N] creative uses for [OBJECT_NAME] that sounds like the examples.",

    "explicit": "What are some creative uses for [OBJECT_NAME]? The goal is to come up with a creative idea, which is an idea that strikes people as clever, unusual, interesting, uncommon, humorous, innovative, or different. Here are example creative uses: [EXAMPLES] Carefully study the examples and their style, then list [N] creative uses for [OBJECT_NAME] that resemble the given examples. Match the style, length, and complexity of the creative ideas in the examples.",
}
TEMPERATURE = 0.7
FREQUENCY_PENALTY = 1.5
PRESCENCE_PENALTY = 1

# Define originality quartiles
FIRST_Q = np.percentile(example_df['target'], 0.25)
THIRD_Q = np.percentile(example_df['target'], 0.25)

random.seed(416)
#####################################


def handle_prompt(args):
    prompt_base, object_name, examples, n_examples, temperature, frequency_penalty, presence_penalty = args
    prompt = make_prompt(prompt_base, object_name, examples, n_examples)
    response = generate_responses(prompt, temperature, frequency_penalty, presence_penalty)
    return response


def make_prompt(prompt_base, object_name, examples, n_examples):
    prompt = prompt_base.replace("[OBJECT_NAME]", object_name)
    prompt = prompt.replace("[N]", str(n_examples))
    examples = " ".join(['\n- ' + item for item in examples]) + "\n"
    prompt = prompt.replace("[EXAMPLES]", examples)
    return prompt


@retry(wait=wait_random_exponential(multiplier=30, min=1, max=60), stop=stop_after_attempt(30),before_sleep=before_sleep_log(logging, logging.INFO))
def generate_responses(prompt, temperature, frequency_penalty, presence_penalty):
    openai.api_key = API_KEY
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=2000,
        frequency_penalty=frequency_penalty,
        presence_penalty=presence_penalty,
        temperature=temperature,
    )
    message = response["choices"][0]["text"]
    return message


def get_examples(df, prompt, n_examples, seed=416):
    return df[df['prompt'] == prompt].sample(n_examples, random_state=seed)['response'].tolist()


def split_ideas(x):
    x = x.split("\n")
    x = [(l.replace("- ", "").strip()).lower() for l in x]
    x = [l for l in x if l]
    return x


def remove_number_prefix(response):
    return re.sub(r'^\d+\.', '', response).strip()


def log_before_sleep(retry_state: RetryCallState):
    logging.info(f"Waiting {retry_state.next_action.sleep} seconds before retrying...")
    

def make_stimuli_sets(): 
    sep = "br"
    stimuli_str_sets = []
    stimuli_sets = []
    for aut_item in AUT_ITEMS:
        logging.info("Making stimuli for item".format(aut_item))
        stimuli = {}
        stimuli['human'] = get_examples(example_df, aut_item, 8, seed=RANDOM_SEED)
        gpt_human_seeds = get_examples(example_df, aut_item, 8, seed=RANDOM_SEED+10)

        few_gpt_args =  (PROMPTS['implicit'],
                                "a " + aut_item,
                                gpt_human_seeds,
                                2,
                                TEMPERATURE,
                                FREQUENCY_PENALTY,
                                PRESCENCE_PENALTY
                        )

        many_gpt_args =  (PROMPTS['implicit'],
                            "a " + aut_item,
                            gpt_human_seeds,
                            6,
                            TEMPERATURE,
                            FREQUENCY_PENALTY,
                            PRESCENCE_PENALTY
                    )

        stimuli['few_gpt_unlabled'] = [remove_number_prefix(x) for x in split_ideas(handle_prompt(few_gpt_args))] + [remove_number_prefix(x) for x in random.sample(stimuli['human'], 6)]
        stimuli['many_gpt_unlabled'] = [remove_number_prefix(x) for x in split_ideas(handle_prompt(many_gpt_args))] + [remove_number_prefix(x) for x in random.sample(stimuli['human'], 2)]
        stimuli['few_gpt_labled'] = [remove_number_prefix(x) + " (SOURCE: AI)" for x in stimuli['few_gpt_unlabled'][:2]] + [remove_number_prefix(x) + " (SOURCE: HUMAN)" for x in stimuli['few_gpt_unlabled'][2:]]
        stimuli['many_gpt_labled'] = [remove_number_prefix(x) + " (SOURCE: AI)" for x in stimuli['many_gpt_unlabled'][:6]] + [remove_number_prefix(x) + " (SOURCE: HUMAN)" for x in stimuli['many_gpt_unlabled'][6:]]
        stimuli['item_name'] = aut_item
        stimuli_str = {k: (f"{sep} " + f"\n{sep} ".join(v)) if k != 'item_name' else v for k, v in stimuli.items()}
        stimuli_sets.append(stimuli)
        stimuli_str_sets.append(stimuli_str)
    stim_df = pd.DataFrame(stimuli_sets)
    stim_df.to_csv("stimuli_sets.csv")
    
    stim_str_df = pd.DataFrame(stimuli_str_sets)
    stim_str_df.to_csv("stimuli_str_sets.csv")
    return stim_df, stim_str_df

def main():
    now = datetime.now()
    date_string = now.strftime("%Y-%m-%d-%H-%M-%S")
    log_file = f"get_conditions_{date_string}.log"

    logging.info("Making stimuli sets")
    stim_df, stim_str_df = make_stimuli_sets()
    
    
    logging.info("Making experiment conditions for Qualtrics loop&merge")

    CONDITIONS = [
    "human",
    "few_gpt_unlabled",
    "many_gpt_unlabled",
    "few_gpt_labled",
    "many_gpt_labled",
    ]


    all_orders = list(itertools.permutations(AUT_ITEMS))
    conditions_data = []
    for item_order in all_orders:
        condition_data = {}
        condition_data['idx'] = "_".join(item_order)
        for c in range(len(CONDITIONS)):
            cond_str = CONDITIONS[c] # The str of the condition 
            cond_item = item_order[c] # The str of the item
            condition_data[f'{cond_str}_item'] = cond_item
            condition_data[f'{cond_str}_set'] = stim_df[stim_df['item_name'] == cond_item][cond_str].tolist()[0]
            
            # Due to Qualtrics loop and merge, we have to add each answer as its own column
            # E.g: human1, human2...
            for r in range(len(condition_data[f'{cond_str}_set'])):
                condition_data[f'{cond_str}_r{r}'] = condition_data[f'{cond_str}_set'][r]
        conditions_data.append(condition_data)

    cond_df = pd.DataFrame(conditions_data)
    cond_df.to_csv("condition_df.csv")


cond_df = main()

