### Setup

In [1]:
import os
import openai
import numpy as np
from tqdm.notebook import tqdm

# Path to your OpenAI API key
api_key_location=os.path.expanduser(os.path.join("~/.ssh/", "openai")) 

# Setting up the API key
with open(api_key_location) as f:
    openai.api_key = f.read().strip()

### Generate prompts

In [None]:
# generate_chat_prompt will take as input a tuple of indices corresponding to concepts in `labels`
def generate_prompt(current_pair):
    prompt = f"""How related are the two concepts on a scale of 0 (unrelated) to 1 (highly related)? Reply with a numerical rating and no other text."""
    prompt += f'''\nConcept 1: {labels[current_pair[0]]}\nConcept 2: {labels[current_pair[1]]}\nRating:'''
    return prompt

# Let's try an example
current_pair=(0,1)
prompt=generate_prompt(current_pair)
print(prompt)
messages=[{"role":"user", "content":prompt}]
print(messages)

### Configure GPT

In [None]:
temperature = 1 # Higher temperature makes GPT more creative
model = "gpt-3.5-turbo"
seed = 1

### Collect GPT Responses


In [None]:
def openai_chat_completion(messages, model, temperature):
    response=False
    i=0
    while not response:
        i+=1
        try:
                response = openai.ChatCompletion.create(
                    model=model,
                    messages=messages,
                    temperature=temperature,
                )
        except Exception as e:
            if i>=5: #if error persis after 5 attempts, give up
                return False, False
            if i>=4:
                print(f'Attempt {i} failed: {e}')
            elif i>=3: print(f'Attempt {i} failed.')
            time.sleep(5) # wait before pinging the API again in case error is due to rate limit
    choices = [dict(choice.items()) for choice in response.choices]
    return choices, response.created

In [None]:
predicted_similarities = np.zeros((len(labels),len(labels)))
visited_pairs=[]

for idx1, bname1 in enumerate(tqdm(labels)):
    for idx2, bname2 in enumerate(tqdm(labels,desc=f'Generating predictions {idx1}/{len(labels)}')):
        
        current_pair = (idx1, idx2)
        
        #We assume that the similarity matrix is symmetric which means we can skip entries from the lower triangle
        if current_pair in visited_pairs:
            # print(f'Already visited pair {current_pair}')
            continue
            
            
        prompt = generate_prompt(current_pair)
        response=False
        answer=False
        attempt=0
        while not answer:
            attempt+=1
            messages=[{'role':'user', 'content':prompt}]
            choices,created=openai_chat_completion(messages, model, temperature)
            try:
                answer = choices[0]['message']['content'].strip().split()[0]
                predicted_similarities[idx1, idx2] = float(answer)
                predicted_similarities[idx2, idx1] = float(answer) # Because we assume the matrix is symmetric
                if idx1 == idx2:
                    visited_pairs.append((idx1, idx2))
                else:
                    visited_pairs.append((idx1, idx2))
                    visited_pairs.append((idx2, idx1))
            #In case we fail to parse out an answer 10 times in a row, we skip that entry
            except Exception as e: 
                print(answer)
                print(e)
                answer=False
                if attempt>10:
                    print('Error', cache[key], e)


#Create a directory where we will save the model predictions
os.makedirs(f'RC/predictions/RC-fruits', exist_ok=True)

# Save the model predictions
np.save(f'RC/predictions/RC-fruits/RC-fruits-{model}-{temperature}-{seed}.npy', predicted_similarities)

### Analyze the data