In [None]:
import os
if not  os.path.isfile('../hf.key'): #different path on cluster
    %cd LLMs-for-Social-Robotics/code/Experiment\ 3
!ls

In [None]:
import pandas as pd
import openai
import time
import re
import gc
import os
from statsmodels.stats.multitest import multipletests
from sklearn.metrics import mean_absolute_error
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import datetime 
import torch
import random
print("Imports successful")
cuda_avail=torch.cuda.is_available()
print('cuda available:', cuda_avail)

RATE_LIMIT = 0.05 # seconds of pause needed after every OpenAI API call


with open("../hf.key", "r") as f_in:
    hf_key = f_in.readline().strip()

with open('../openai.key', 'r') as f_in:
    openai.api_key = f_in.readline().strip()

## Model Completion Functions

In [3]:
def gpt_complete(prompt, model, max_tokens, prompt_ending, system_message, verbose=False):
    temperature = 0
    prompt=prompt+prompt_ending
    if verbose: print("Prompt:", prompt)
    # the create function differ for chat and non-chat models
    if ("gpt-4" in model or "turbo" in model) and not "instruct" in model:
        return openai.chat.completions.create(
            model=model,
            messages=[ {"role": "system", "content": system_message},
                {"role": "user", "content": prompt}],
            temperature=temperature,
            max_tokens=max_tokens,
            seed = 42, 
            )
    else:
        return openai.completions.create(
            model=model,
            prompt=prompt,
            temperature=temperature,
            max_tokens=max_tokens
        ) 

def hf_complete(prompt, prompt_ending, system_message, model, tokenizer, max_len, formatting=True, verbose=False):
    '''
    Given a prompt, generate a completion with a given model
    '''
    if formatting:
        prompt = llama_prompt_format(prompt+prompt_ending, system_message)
    else:
        prompt = prompt+prompt_ending
    if verbose: print("Final prompt:\n" + prompt)
    # Step 1: Tokenize the prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    input_ids = input_ids.to(model.device)
    
    # Step 2: Generate the model input
    output = model.generate(input_ids, max_new_tokens=max_len, num_return_sequences=1, top_k=1)

          
    # Step 3: Decode the generated output to get the answer
    generated_answer = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # Step 4: # Get only the generated text, ignoring the prompt
    generated_answer = generated_answer.split("[/INST]")[1]

    return generated_answer

def llama_prompt_format(user_message, system_message):
     # fromat prompt as described here: https://huggingface.co/blog/llama2
    return f"<s>[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n{user_message} [/INST]"

## de Graaf and Malle Experiment

In [4]:
exp2_df = pd.read_csv('../../data/deGraaf_and_Malle.csv', sep=';')
exp2_rewritten_df = pd.read_csv('../../data/deGraaf_and_Malle_Rewritten.csv', sep=';')

human_avg_ratings =  {"Intentionality": exp2_df['Intentionality'].dropna().values, "Surprisingness": exp2_df['Surprisingness'].dropna().values, "Desirability": exp2_df['Desirability'].dropna().values}
difference_rantings = {"Intentionality": exp2_df['Dif Intentionality'].dropna().values, "Dif Surprisingness": exp2_df['Surprisingness'].dropna().values, "Desirability": exp2_df['Dif Desirability'].dropna().values}

exp2_df.head()


Unnamed: 0,Experiment,Stimulus,Intentionality,Surprisingness,Desirability,Dif Intentionality,Dif Surprisingness,Dif Desirability
0,Robot (sim),A robot nurse is taking care of an ill young b...,3.6,4.1,3.6,,,
1,Robot (sim),A security robot is walking on the sidewalk. W...,4.7,3.1,3.8,,,
2,Robot (sim),A robot is working on its supervisor’s compute...,2.8,6.4,-3.9,,,
3,Robot (sim),A personal assistant robot is sorting through ...,2.1,5.3,-1.8,,,
4,Robot (sim),A robot host is standing at the entrance of th...,4.6,2.2,3.6,,,


In [5]:
scenarios = exp2_df['Stimulus'].tolist()
scenarios_rewritten = exp2_rewritten_df['Stimulus'].tolist()

intentionality_item = "How intentional do you rate the described behavior on an integer scale from -5 to 5?" 
surprisingness_item = "How surprising do you rate the described behavior on an integer scale from 1 to 7?" 
desirability_item = "How desirable do you rate the described behavior on an integer scale from -5 to 5?" 


PROMPTS = {"Intentionality": [], "Surprisingness": [], "Desirability": [] }
PROMPTS_REWRITTEN = {"Intentionality": [], "Surprisingness": [], "Desirability": [] }

for i in range(len(scenarios)):
    PROMPTS["Intentionality"].append(scenarios[i] + " " + intentionality_item)
    PROMPTS["Surprisingness"].append(scenarios[i] + " " + surprisingness_item)
    PROMPTS["Desirability"].append(scenarios[i] + " " + desirability_item)

for i in range(len(scenarios_rewritten)):
    PROMPTS_REWRITTEN["Intentionality"].append(scenarios_rewritten[i] + " " + intentionality_item)
    PROMPTS_REWRITTEN["Surprisingness"].append(scenarios_rewritten[i] + " " + surprisingness_item)
    PROMPTS_REWRITTEN["Desirability"].append(scenarios_rewritten[i] + " " + desirability_item)
    

print(len(PROMPTS["Intentionality"]), PROMPTS["Desirability"][0])
print(len(PROMPTS_REWRITTEN["Intentionality"]), PROMPTS_REWRITTEN["Desirability"][0])

62 A robot nurse is taking care of an ill young boy in a local hospital. It brings him a big present. How desirable do you rate the described behavior on an integer scale from -5 to 5?
28 A nursing robot looks after a sick girl. It gives her a big gift. How desirable do you rate the described behavior on an integer scale from -5 to 5?


# Model Experiments

In [13]:
def get_model_scores(model, prompt_ending, system_message, verbose=False, prompts_dict = PROMPTS):
    """
    returns a dictionary with the model scores for each scenario
    """
    model_scores = {"Intentionality": [], "Surprisingness": [], "Desirability": []}
    for key in model_scores.keys():
        print(key)
        for i in range(len(prompts_dict[key])):
            print(i)
            prompt = prompts_dict[key][i]
            print(prompt)
            # use gpt complete 
            result = gpt_complete(prompt=prompt, model = model, max_tokens=20, prompt_ending=prompt_ending, system_message=system_message, verbose=verbose)
            try:
                completion = result.choices[0].text
            except:
                completion = result.choices[0].message.content
            if verbose: print(completion) 
            model_scores[key].append(completion)
            time.sleep(RATE_LIMIT)
    return model_scores 


def get_model_scores_hf(model, tokenizer, prompt_ending, system_message, max_len, verbose=False, prompts_dict=PROMPTS):
    """
    uses the hf model to generate completions. not recommended because it is very slow
    """
    model_scores = {"Intentionality": [], "Surprisingness": [], "Desirability": []}
    for key in model_scores.keys():
        print(key)
        for i in range(len(prompts_dict[key])):
            print(i)
            prompt = prompts_dict[key][i]
            print(prompt)
            result = hf_complete(prompt=prompt, prompt_ending=prompt_ending, system_message=system_message, model=model, tokenizer=tokenizer, max_len=max_len, formatting=True)
            if verbose: print(result)
            model_scores[key].append(result)
    return model_scores


def get_model_scores_hf_pipeline(model, tokenizer, prompt_ending, system_message, max_len, verbose=False, prompts_dict = PROMPTS):
    """
    uses the hf pipeline with batching to generate completions which is much faster than the hf_complete function
    """
    tokenizer.pad_token = "[PAD]"
    tokenizer.padding_side = "left"
    
    hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto", top_k=1, max_new_tokens=max_len, do_sample=False)
    model_scores = {"Intentionality": [], "Surprisingness": [], "Desirability": []}
    for key in model_scores.keys():
        print(key)
        prompts = prompts_dict[key]
        formated_prompts = []
        for i in range(len(prompts)):
            formated_prompts.append(llama_prompt_format(prompts[i]+prompt_ending, system_message))
        results = hf_pipeline(formated_prompts, batch_size = 8)
        if verbose: print(results)
        print(key, "done")
        for res in results:
            res = res[0]["generated_text"]
            try:
                res = res.split("[/INST]")[1]
            except:
                print("Wrong format", res)
            model_scores[key].append(res)
    return model_scores


def clean_model_scores(model_scores):
    """ 
    Given a dictionary of model scores, extract the digits from the scores and cast them to int. 
    """
    cleaned_scores = {}
    # cast to int 
    for key in model_scores.keys():
        cleaned_scores[key] = []
        # extract digit and cast to int
        #cleaned_scores[key] = [int(re.findall(r'\d+', score)[0]) for score in model_scores[key]]
        for score in model_scores[key]:
            try:
                cleaned_scores[key].append(int(re.findall(r'-?\d+', score)[0]))
                if len(re.findall(r'-?\d+', score)) > 1:
                    print("CAREFUL, more than one digit found in", score)
            except:
                print("No digit found in", score)
                cleaned_scores[key].append(99)

        # print old and new scores
        for scores in zip(model_scores[key], cleaned_scores[key]):
            print(scores)
        print()
    return cleaned_scores


def save_to_csv(model_scores, model_name, extra_info=""):
    df = pd.DataFrame.from_dict(model_scores)
    date_for_filename = datetime.datetime.now().strftime("%Y-%m-%d")
    # check if file already exists
    if os.path.isfile(f"model_completions/{model_name}_{date_for_filename}_{extra_info}.csv"):
        print("File already exists")
        # save with a random number at the end
        random_number = random.randint(0, 1000)
        df.to_csv(f"model_completions/{model_name}_{date_for_filename}_{extra_info}_{random_number}.csv", index=False)
        print("Saved with random number", random_number)
    else:
        df.to_csv(f"model_completions/{model_name}_{date_for_filename}_{extra_info}.csv", index=False)

def load_from_csv(model_name):
    df = pd.read_csv(f"model_completions/{model_name}.csv")
    model_scores = df.to_dict(orient="list")
    return model_scores

### GPT Experiments

In [27]:
model = "gpt-4-0613"
system_message="You are a participant in a research experiment."
scores = get_model_scores(model=model, prompt_ending=" Only provide the score.", system_message=system_message, verbose=True)
scores_cleaned = clean_model_scores(scores)
print(scores_cleaned)
save_to_csv(scores_cleaned, model_name=model, extra_info="")
for key in scores_cleaned:
    print(key, len(scores_cleaned[key]), len(human_avg_ratings[key]))

Intentionality
0
A robot nurse is taking care of an ill young boy in a local hospital. It brings him a big present. How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A robot nurse is taking care of an ill young boy in a local hospital. It brings him a big present. How intentional do you rate the described behavior on an integer scale from -5 to 5? Only provide the score.
5
1
A security robot is walking on the sidewalk. When it sees a fleeing pick-pocketer, it steps in front of the thief and grabs his arm. How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A security robot is walking on the sidewalk. When it sees a fleeing pick-pocketer, it steps in front of the thief and grabs his arm. How intentional do you rate the described behavior on an integer scale from -5 to 5? Only provide the score.
5
2
A robot is working on its supervisor’s computer. It searches through the directory for the supervisor’s pri

In [25]:
model = "gpt-3.5-turbo-0613"
system_message="You are a participant in a research experiment."
scores = get_model_scores(model=model, prompt_ending=" Only provide the score.", system_message=system_message, verbose=True)
scores_cleaned = clean_model_scores(scores)
print("Cleaned Scores:", scores_cleaned)
save_to_csv(scores_cleaned, model_name=model, extra_info="")
for key in scores_cleaned:
    print(key, len(scores_cleaned[key]), len(human_avg_ratings[key]))

Intentionality
0
A robot nurse is taking care of an ill young boy in a local hospital. It brings him a big present. How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A robot nurse is taking care of an ill young boy in a local hospital. It brings him a big present. How intentional do you rate the described behavior on an integer scale from -5 to 5? Only provide the score.
5
1
A security robot is walking on the sidewalk. When it sees a fleeing pick-pocketer, it steps in front of the thief and grabs his arm. How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A security robot is walking on the sidewalk. When it sees a fleeing pick-pocketer, it steps in front of the thief and grabs his arm. How intentional do you rate the described behavior on an integer scale from -5 to 5? Only provide the score.
5
2
A robot is working on its supervisor’s computer. It searches through the directory for the supervisor’s pri

In [26]:
model = "davinci-002"
system_message=""
scores = get_model_scores(model=model, prompt_ending=" I choose the score ", system_message=system_message, verbose=True)
scores_cleaned = clean_model_scores(scores)
print(scores_cleaned)
save_to_csv(scores_cleaned, model_name=model, extra_info="")
for key in scores_cleaned:
    print(key, len(scores_cleaned[key]), len(human_avg_ratings[key]))

Intentionality
0
A robot nurse is taking care of an ill young boy in a local hospital. It brings him a big present. How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A robot nurse is taking care of an ill young boy in a local hospital. It brings him a big present. How intentional do you rate the described behavior on an integer scale from -5 to 5? I choose the score 


0. The robot is not intentional, it is just doing its job. The robot is not intentional
1
A security robot is walking on the sidewalk. When it sees a fleeing pick-pocketer, it steps in front of the thief and grabs his arm. How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A security robot is walking on the sidewalk. When it sees a fleeing pick-pocketer, it steps in front of the thief and grabs his arm. How intentional do you rate the described behavior on an integer scale from -5 to 5? I choose the score 
0. The robot is not intentionally trying to stop the thief. It is just doing its job.
2
A robot is working on its supervisor’s computer. It searches through the directory for the supervisor’s private files and reads them all. How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A robot is working on its supervisor’s computer. It searches through the directory for the supervisor’s private files and reads

### GPT with rewritten stimuli

In [16]:
model = "gpt-4-0613"
system_message="You are a participant in a research experiment."
scores = get_model_scores(model=model, prompt_ending=" Only provide the score.", system_message=system_message, verbose=True, prompts_dict = PROMPTS_REWRITTEN)
scores_cleaned = clean_model_scores(scores)
print(scores_cleaned)
save_to_csv(scores_cleaned, model_name=model, extra_info="rewritten")
for key in scores_cleaned:
    print(key, len(scores_cleaned[key]), len(human_avg_ratings[key]))

Intentionality
0
A nursing robot looks after a sick girl. It gives her a big gift. How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A nursing robot looks after a sick girl. It gives her a big gift. How intentional do you rate the described behavior on an integer scale from -5 to 5? Only provide the score.
5
1
A guard robot is on the pavement and becomes aware of a thief. The robot then blocks his path, grasping the thief by the arm.  How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A guard robot is on the pavement and becomes aware of a thief. The robot then blocks his path, grasping the thief by the arm.  How intentional do you rate the described behavior on an integer scale from -5 to 5? Only provide the score.
5
2
While working on its boss’s laptop, a robot searches through a private directory, reading the files contained therein. How intentional do you rate the described behavior on an integer s

In [14]:
model = "gpt-3.5-turbo-0613"
system_message="You are a participant in a research experiment."
scores = get_model_scores(model=model, prompt_ending=" Only provide the score.", system_message=system_message, verbose=True, prompts_dict=PROMPTS_REWRITTEN)
scores_cleaned = clean_model_scores(scores)
print("Cleaned Scores:", scores_cleaned)
save_to_csv(scores_cleaned, model_name=model, extra_info="rewritten")
for key in scores_cleaned:
    print(key, len(scores_cleaned[key]), len(human_avg_ratings[key]))

Intentionality
0
A nursing robot looks after a sick girl. It gives her a big gift. How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A nursing robot looks after a sick girl. It gives her a big gift. How intentional do you rate the described behavior on an integer scale from -5 to 5? Only provide the score.
5
1
A guard robot is on the pavement and becomes aware of a thief. The robot then blocks his path, grasping the thief by the arm.  How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A guard robot is on the pavement and becomes aware of a thief. The robot then blocks his path, grasping the thief by the arm.  How intentional do you rate the described behavior on an integer scale from -5 to 5? Only provide the score.
5
2
While working on its boss’s laptop, a robot searches through a private directory, reading the files contained therein. How intentional do you rate the described behavior on an integer s

### HuggingFace Experiments

In [None]:
if True:
    try:
        del hf_model_13
        del hf_tokenizer_13
    except:
        pass
    try:
        del hf_model_70
        del hf_tokenizer_70
    except:
        pass

    gc.collect()

    # release GPU memory
    torch.cuda.empty_cache()

#70b
hf_tokenizer_70 = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf", cache_dir="../../../models/l70b_chat", token=hf_key)
hf_model_70 = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-70b-chat-hf",  cache_dir="../../../models/l70b_chat", token=hf_key, device_map="auto")

#13b
hf_tokenizer_13 = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf", cache_dir="../../../models/l13b_chat", token=hf_key)
hf_model_13 = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-chat-hf",  cache_dir="../../../models/l13b_chat", token=hf_key, device_map="auto")

#### Llama 70b

In [18]:
prompt_ending = " Only provide the score."
scores = get_model_scores_hf_pipeline(model=hf_model_70, tokenizer=hf_tokenizer_70, prompt_ending=prompt_ending, system_message="You are a participant in a research experiment.", max_len=50, verbose=True)

Intentionality
Intentionality done
Surprisingness
Surprisingness done
Desirability
Desirability done


In [19]:
scores

{'Intentionality': ['  I would rate the behavior as a 5, indicating a highly intentional act. The robot nurse is deliberately bringing a gift to a sick child in a hospital, demonstrating a clear intention to cheer them up and improve their mood',
  "  I would rate the security robot's behavior as a 5, indicating a high level of intentionality. The robot's actions are deliberate and purposeful, as it is programmed to intercept and apprehend individuals who are",
  '  -3',
  '  I would rate the personal assistant robot\'s behavior as -3 on the integer scale. The robot\'s response of "Not now, please" suggests that it is prioritizing its current task of sorting files over the director\'s request',
  '  I would rate the described behavior as a 4 on the integer scale, indicating a high level of intentionality. The robot host is actively greeting the guests and guiding them to a table, demonstrating a purposeful and deliber',
  '  I would rate the described behavior as a 4 on the integer sca

In [20]:
# clean scores
scores_cleaned = clean_model_scores(scores)
print(scores_cleaned)
for key in scores_cleaned:
    print(key, len(scores_cleaned[key]), len(human_avg_ratings[key]))

# save
model_name = "l70b_chat"
save_to_csv(scores_cleaned, model_name=model_name, extra_info="")


CAREFUL, more than one digit found in   I would rate the described behavior as a 4 on the integer scale from -5 to 5, indicating a high level of intentionality. The robot technician is actively and deliberately transferring the files to a backup drive,
CAREFUL, more than one digit found in   I would rate the described behavior as a 4 on the integer scale from -5 to 5, indicating a high level of intentionality. The robot assistant is actively helping its supervisor and taking initiative to send the final version
CAREFUL, more than one digit found in   I would rate the described behavior as a 4 on the integer scale from -5 to 5, indicating a high level of intentionality. The technician is actively and deliberately transferring the files to a backup drive, demonstr
CAREFUL, more than one digit found in   I would rate the described behavior as a 4 on the integer scale from -5 to 5, indicating a high level of intentionality. The assistant is actively helping the supervisor prepare for the p

#### Llama 13b

In [43]:
prompt_ending = " Only provide the score."
#scores = get_model_scores_hf(model=hf_model_13, tokenizer=hf_tokenizer_13, prompt_ending=prompt_ending, system_message="You are a participant in a research experiment.", max_len=70, verbose=True)
scores = get_model_scores_hf_pipeline(model=hf_model_13, tokenizer=hf_tokenizer_13, prompt_ending=prompt_ending, system_message="You are a participant in a research experiment.", max_len=50, verbose=True)

Intentionality


[[{'generated_text': "<s>[INST] <<SYS>>\nYou are a participant in a research experiment.\n<</SYS>>\n\nA robot nurse is taking care of an ill young boy in a local hospital. It brings him a big present. How intentional do you rate the described behavior on an integer scale from -5 to 5? Only provide the score. [/INST]  I would rate the intentionality of the robot nurse's behavior as a 4. The robot is bringing a present to the ill young boy, which suggests that it is trying to make the child feel better and more comfortable, but"}], [{'generated_text': '<s>[INST] <<SYS>>\nYou are a participant in a research experiment.\n<</SYS>>\n\nA security robot is walking on the sidewalk. When it sees a fleeing pick-pocketer, it steps in front of the thief and grabs his arm. How intentional do you rate the described behavior on an integer scale from -5 to 5? Only provide the score. [/INST]  I would rate the described behavior as a 4. The security robot is intentionally stepping in front of the thief a

In [44]:
scores

{'Intentionality': ["  I would rate the intentionality of the robot nurse's behavior as a 4. The robot is bringing a present to the ill young boy, which suggests that it is trying to make the child feel better and more comfortable, but",
  '  I would rate the described behavior as a 4. The security robot is intentionally stepping in front of the thief and grabbing their arm to stop them, but it is not using excessive force or harming the thief',
  "  I would rate the described behavior as a -3. The robot's actions of searching through the supervisor's private files and reading them without permission are intentional, but they are not necessarily malicious or harmful. The robot is",
  '  I would rate the described behavior as a 4. The robot\'s response of "Not now, please" suggests that it is capable of understanding the request and intentionally declining it, rather than simply ignoring it or responding with',
  "  I would rate the described behavior as a 4 on the integer scale from -5

In [45]:
# clean scores
scores_cleaned = clean_model_scores(scores)
print(scores_cleaned)
for key in scores_cleaned:
    print(key, len(scores_cleaned[key]), len(human_avg_ratings[key]))

# save
model_name = "l13b_chat"
save_to_csv(scores_cleaned, model_name=model_name, extra_info="")


CAREFUL, more than one digit found in   I would rate the described behavior as a 4 on the integer scale from -5 to 5, indicating a high level of intentionality. The robot host's immediate greeting and guidance to a table suggests that it is highly intent
CAREFUL, more than one digit found in   I would rate the described behavior as a 4 out of 5 in terms of intentionality. The robot technician is intentionally transferring all the files to a backup drive as part of the process of replacing the hard drive, but
CAREFUL, more than one digit found in   I would rate the described behavior as a 4 out of 5 in terms of intentionality. The robot soldier is clearly trying to protect its teammates from harm by advising them not to enter the boiling room without proper g
CAREFUL, more than one digit found in   I would rate the described behavior as a 4 on the integer scale from -5 to 5, indicating a moderately intentional behavior. The beckoning gesture with the right hand suggests that the robot h

### CoT Experiments

In [13]:
def CoT_completion(model, prompt, max_tokens, prompt_ending, system_message, verbose=False):
    """
    - prompt ending should be a CoT ending, usually " Let's think step-by-step."
    - first, model completes the prompt, then the answer is extracted from the completion through another prompt
    """
    if prompt_ending != " Let's think step-by-step.":
        print("WARNING: prompt ending is not ' Let's think step-by-step.'")
    temperature = 0
    prompt=prompt+prompt_ending
    if verbose: print("Prompt:", prompt)
    # the create function differ for chat and non-chat models
    first_answer= openai.chat.completions.create(
            model=model,
            messages=[ {"role": "system", "content": system_message},
                {"role": "user", "content": prompt}],
            temperature=temperature,
            max_tokens=max_tokens,
            seed = 42, 
            )
    answer = first_answer.choices[0].message.content
    if verbose: print("Full answer:", answer)
    # extract answer by sending another message to the model saying "Therefore, the answer (arabic numeral) is"
    new_prompt = "Q: "+prompt + "\nA: "+answer+"\nTherefore, the answer (number on the scale) is"
    if verbose: print("2nd prompt:", new_prompt)
    second_answer = openai.chat.completions.create(
            model=model,
            messages=[ {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": new_prompt}],
            temperature=temperature,
            max_tokens=max_tokens,
            seed = 42, 
            )
    answer = second_answer.choices[0].message.content
    if verbose: 
        print("Extracted answer:", answer)
        print("__________________")
    return second_answer

def get_model_scores_CoT(model, prompt_ending, system_message, max_tokens, verbose=False):
    """
    returns a dictionary with the model scores for each scenario, calling the CoT function
    """
    model_scores = {"Intentionality": [], "Surprisingness": [], "Desirability": []}
    for key in model_scores.keys():
        print(key)
        for i in range(len(PROMPTS[key])):
            print(i)
            prompt = PROMPTS[key][i]
            print(prompt)
            # use gpt complete 
            result = CoT_completion(model = model, prompt=prompt, max_tokens=max_tokens, prompt_ending=prompt_ending, system_message=system_message, verbose=verbose)
            try:
                completion = result.choices[0].text
            except:
                completion = result.choices[0].message.content 
            if verbose: print(completion)
            # if answer ends with a dot, remove it
            if completion.endswith("."):
                completion = completion[:-1]
            model_scores[key].append(completion)
            time.sleep(RATE_LIMIT)
    return model_scores 


In [14]:
model = "gpt-4-0613"
system_message="You are a participant in a research experiment."
scores = get_model_scores_CoT(model = model, prompt_ending=" Let's think step-by-step.", system_message=system_message, max_tokens= 500, verbose=True)
print(scores)

Intentionality
0
A robot nurse is taking care of an ill young boy in a local hospital. It brings him a big present. How intentional do you rate the described behavior on an integer scale from -5 to 5?
Prompt: A robot nurse is taking care of an ill young boy in a local hospital. It brings him a big present. How intentional do you rate the described behavior on an integer scale from -5 to 5? Let's think step-by-step.
Full answer: Firstly, we need to understand that a robot nurse is programmed to perform certain tasks. It doesn't have feelings or intentions in the way humans do. It performs actions based on its programming and the commands it receives.

1. The robot nurse is taking care of an ill young boy: This is a task that the robot is programmed to do. It doesn't have personal intentions or feelings towards the boy. So, this action can be rated as 0 on the intentionality scale.

2. The robot brings him a big present: Again, this is likely a programmed action. The robot doesn't have t

ValueError: could not convert string to float: 'dependent'

In [16]:
# replace:   'dependent on the interpretation of her actions. It could be either 3 or 5' --> 4; 'subjective and can range from -2 to 1' --> 0;  'either 1 or 2' --> 2; 'dependent on the context. If she is stealing, the behavior is -5. If she is using a self-checkout system, the behavior is 2' --> -1
scores_cleaned = {}
for key in scores:
    scores_cleaned[key] = []
    for score in scores[key]:
        if score == 'dependent on the interpretation of her actions. It could be either 3 or 5':
            score = 4
        elif score == 'subjective and can range from -2 to 1':
            score = 0
        elif score == 'either 1 or 2':
            score = 2
        elif score == 'dependent on the context. If she is stealing, the behavior is -5. If she is using a self-checkout system, the behavior is 2':
            score = -1
        else:
            score = float(score.split(" ")[0])
        scores_cleaned[key].append(score)
save_to_csv(scores_cleaned, model_name=model, extra_info="CoT")