In [2]:
from openai import OpenAI
import os
import json
from dotenv import load_dotenv
from pathlib import Path
from pydantic import BaseModel

extracted_answers = []
query_file = "queries.json"


dotenv_path = Path('../global_environment.env')
load_dotenv(dotenv_path=dotenv_path)
openai_key = os.getenv('OPENAI_KEY')

client = OpenAI(
        api_key=openai_key
)

if(openai_key):
    print('Key loaded successfully')
else:
    print('API Key not found')

Key loaded successfully


In [None]:
class SignalTags(BaseModel):
    signal_index: str
    tags: list[str]

def getCategories(signal):
    signal_index = signal['signal_index']
    print('Extracting from ', signal_index)
    with open(query_file, 'r') as f:
        queries = json.load(f)
    descriptions = signal['sensory_descriptions'] + signal['emotional_descriptions'] + signal['associative_descriptions']
    desc_list = '\n'.join(descriptions)
    response = client.beta.chat.completions.parse(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are an assistant that processes descriptions of haptic feedback and returns json objects"},
                {"role": "user", "content":"Signal index: "+str(signal_index)+": "+ queries['tagPrompt']+'\n'+desc_list},
                ],
                response_format=SignalTags
        )
    print(response.choices[0].message.content)
    return json.loads(response.choices[0].message.content)

with open('./collated_descriptions.json', 'r') as f:
    data = json.load(f)
outputData = []
for signal in data:
    outputData.append(getCategories(signal))

with open('./tagged-signals.json', 'w') as f:
    json.dump(outputData, f)

Extracting from  113


KeyError: 'tagsPrompt'

In [None]:
# Function to extract keywords using GPT API
def extract_keywords(description, word_type):
    print("Extracting from \n", description)
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are an assistant that extracts keywords from answers describing haptic sensations. Sensory data refers to answers to the question \'How would you describe the signal to other people\', emotional data refers to answers to the question \'How does the sensation make you feel? Can you attach any emotions to it?\', and association data refers to answers to the question \'Does this remind you something you have felt before? If so can you associate any actions or objects with the signal?\'. You will be extracting the respective keywords from the answers provided."},
                {"role": "user", "content": f"Extract the key {word_type} keywords, separated by commas, from the following answer:\n\n{description}\n\nKeywords:"}
            ]
        )
        print(response.choices[0].message)
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error extracting keywords: {e}")
        return "Error" 



def extract_sensory_keywords(sensory_descriptions, signal_index):
    with open(query_file, 'r') as f:
        queries = json.load(f)
    print("Extracting sensory keywords for signal index ", signal_index)
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": queries['agentInstructions']},
                {"role": "user", "content": queries['sensoryExtractionPrompt']+"\n"+sensory_descriptions}
            ]
        )
        print(response.choices[0].message)
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error extracting keywords: {e}")
        return "Error" 

def extract_emotional_keywords(emotional_descriptions, signal_index):
    with open(query_file, 'r') as f:
        queries = json.load(f)
    print("Extracting emotional keywords for signal index ", signal_index)
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": queries['agentInstructions']},
                {"role": "user", "content": queries['emotionalExtractionPrompt']+"\n"+emotional_descriptions}
            ]
        )
        print(response.choices[0].message)
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error extracting keywords: {e}")
        return "Error" 

def extract_emotional_keywords(metaphor_descriptions, signal_index):
    with open(query_file, 'r') as f:
        queries = json.load(f)
    print("Extracting metaphor keywords for signal index ", signal_index)
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": queries['agentInstructions']},
                {"role": "user", "content": queries['metaphorsExtractionPrompt']+"\n"+metaphor_descriptions}
            ]
        )
        print(response.choices[0].message)
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error extracting keywords: {e}")
        return "Error" 

In [25]:
# Function to process the JSON file and extract keywords
def process_signal_data(input_file, output_file):
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    extracted_data = {}

    for signal_index, entries in data.items():
        extracted_data[signal_index] = []
        sensory_query = ""
        emotional_query = ""
        associative_query = ""
        sensory_data = []
        emotional_data = []
        associative_data = []
        for entry in entries:
            file_name = entry["file_name"]
            sensory_description = entry.get("free_text_sensory", "N.A.")
            if sensory_description != "N.A.":
                sensory_query = sensory_query + sensory_description +"\n"
            emotional_description = entry.get("free_text_emotional", "N.A.")
            if emotional_description != "N.A.":
                emotional_query = emotional_query + emotional_description +"\n"
            associative_description = entry.get("free_text_association", "N.A.")
            if associative_description !="N.A.":
                associative_query = associative_query + associative_description +"\n"
            
            #sensory_keywords = extract_keywords(sensory_description, "sensory")
            #emotional_keywords = extract_keywords(emotional_description, "emotional")
            #associative_keywords = extract_keywords(associative_description, "associative")
            #sensory_data.append(sensory_keywords)
            #emotional_data.append(emotional_keywords)
            #associative_data.append(associative_keywords)
        sensory_keywords = extract_sensory_keywords(sensory_query, signal_index)
        emotional_keywords = extract_emotional_keywords(emotional_query, signal_index)
        associative_keywords = extract_emotional_keywords(associative_query, signal_index)
        sensory_data.append(sensory_keywords)
        emotional_data.append(emotional_keywords)
        associative_data.append(associative_keywords)
        
        extracted_data[signal_index].append({
            "sensory_keywords": sensory_data,
            "emotional_keywords": emotional_data,
            "associative_keywords": associative_data
        })
    
    # Save the extracted data to a JSON file
    with open(output_file, 'w') as f:
        json.dump(extracted_data, f, indent=4)


In [26]:
def main():
    input_file = "signal_descriptions.json"  # Replace with the path to the JSON file
    output_file = "extracted_keywords_even_better.json"  # Output file for saving the results
    
    print("Processing signal descriptions...")
    process_signal_data(input_file, output_file)
    print(f"Keywords have been extracted and saved to {output_file}")

if __name__ == "__main__":
    main()

Processing signal descriptions...
Extracting sensory keywords for signal index  113
ChatCompletionMessage(content='slow heartbeat, vibrations, phone vibrating, animal heartbeat, buzz, slight buzz, buzz notification, bumpy-feeling, pause, beats', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)
Extracting metaphor keywords for signal index  113
ChatCompletionMessage(content='anxious, active, fuzzy, enjoyable, calm, anxiety, comforted, reassuring, alert, attention, greasy, pulse-check', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)
Extracting metaphor keywords for signal index  113
ChatCompletionMessage(content='heartbeat, alarm, phone vibrating, sonography, mobile notification, alert, doorbell ring, car crash, video game', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)
Extracting sensory keywords for signal index  114
ChatCompletionMessage(content='buzzes, low-high-low, long buzz, little