# Collect GPT Ratings
A framework to collect ratings, given the filename of human ratings, and several set parameters (temperature, number of responses, GPT model, etc.).

In [1]:
import numpy as np, pandas as pd
from tqdm import tqdm
import json
import csv
import re

In [2]:
from openai import OpenAI
client = OpenAI()

In [3]:
# Collect human ratings and the list of unique words

human_ratings = pd.read_csv('glasgow_valence.csv')
human_ratings = human_ratings[['word', 'Response']]
human_ratings.columns = ['word', 'rating']

wordlist = human_ratings['word'].unique()
wordlist = wordlist[wordlist != "This is a test; click 1."]
#wordlist = wordlist[:100] # option to limit the number of words (total ~800)
wordlist

array(['groom (bride)', 'nail (finger)', 'hold (store)',
       'process (understand)', 'hooker (prostitute)', 'star (celebrity)',
       'stir (cook)', 'prick (stab)', 'boxer (dog)', 'drop (fall)',
       'bass (guitar)', 'mad (crazy)', 'tense (verb)', 'coast (glide)',
       'club (tool)', 'cast (plaster bandage)', 'peer (look)',
       'couch (express)', 'fork (road)', 'kiwi (fruit)', 'fast (quick)',
       'cock (bird)', 'mass (religious)', 'deal (with)',
       'contract (agreement)', 'sick (ill)', 'plant (flower)',
       'pride (lions)', 'select (choose)', 'sole (only)',
       'right (correct)', 'head (body)', 'watch (wrist)', 'book (read)',
       'party (celebration)', 'live (to exist)', 'rocket (go quickly)',
       'speaker (orator)', 'bat (club)', 'stall (kiosk)',
       'live (not recorded)', 'stick (thin piece of wood)',
       'text (written work)', 'bank (rely)', 'range (outdoors)',
       'present (show)', 'litter (animal babies)', 'tweet (Twitter)',
       'poach (co

In [17]:
GPT_MODEL = 'gpt-4o-mini-2024-07-18'
SYSTEM_PROMPT = "You are a helpful assistant. Your job is to rate the semantic dimensions associated with different words."
MAX_TOKENS = 10
NUMBER_RESPONSES = 50
TEMPERATURE = 1

In [20]:
CACHE_FILENAME_VALENCE = 'gpt_valence_ratings.csv'
CACHE_FILENAME_CONCRETENESS = 'gpt_concreteness_ratings.csv'

DEFINITION_VALENCE = "Valence is a measure of value or worth. A word is POSITIVE if it represents something considered good, whereas a word is NEGATIVE if it represents something considered bad.\nPlease indicate on a scale from 1 to 7 the valence of the word on a scale of VERY NEGATIVE to VERY POSITIVE, with the midpoint representing NEUTRAL:"
DEFINITION_CONCRETENESS = "Concreteness is a measure of how concrete or abstract something is. A word is CONCRETE if it represents something that exists in a definite physical form in the real world. In contrast, a word is ABSTRACT if it represents more of a concept or idea.\nPlease indicate on a scale from 1 to 7 how concrete you think the word is on a scale of VERY ABSTRACT to VERY CONCRETE, with the midpoint being neither especially abstract nor concrete:"

In [21]:
def construct_prompt(word, definition):
    """
    Given a word (e.g. 'apple') and a definition (e.g. DEFINITION_VALENCE), prepare the prompt to provide to GPT.
    """
    return f"{definition}\n\n{word}\n\nPlease respond with a single number.\n\nAnswer: "

def get_raw_response(prompt, system_prompt=SYSTEM_PROMPT, gpt_model=GPT_MODEL, temp=TEMPERATURE):
    """
    Ask a prompt to GPT and collect the response object.
    """
    response = client.chat.completions.create(
        model=gpt_model,
        messages=[
            {
                "role": "system",
                "content": SYSTEM_PROMPT
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        temperature=temp,
        max_tokens=MAX_TOKENS,
        n=NUMBER_RESPONSES
    )
    return response

def get_string_ratings(response):
    """
    Parse the response object to collect the string message.
    """
    return [choice.message.content for choice in response.choices]

def update_cache(word, responses, filename):
    """
    Save the responses to a dedicated CSV file.
    """
    with open(filename, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        for response in responses:
            writer.writerow([word, response])

def collect_responses(filename, definition):
    """
    Collect all GPT ratings and save them to the dedicated cache file.
    """
    for word in tqdm(wordlist):
        with open(filename, 'a+', encoding='utf-8') as file:
            if word in [row[0] for row in csv.reader(file)]:
                continue

        prompt = construct_prompt(word, definition)
        raw_responses = get_raw_response(prompt)
        string_ratings = get_string_ratings(raw_responses)
        update_cache(word, string_ratings, filename)

In [22]:
# Collecting ratings for VALENCE
collect_responses(CACHE_FILENAME_VALENCE, DEFINITION_VALENCE)

100%|█████████████████████████████████████████| 871/871 [10:04<00:00,  1.44it/s]


In [23]:
# Collecting ratings for CONCRETENESS
collect_responses(CACHE_FILENAME_CONCRETENESS, DEFINITION_CONCRETENESS)

100%|█████████████████████████████████████████| 871/871 [10:49<00:00,  1.34it/s]
