In [1]:
# Check for CUDA and GPU, and if True, GPU will be used.

import torch
torch.cuda.is_available()

True

In [2]:
"""
Developing function to identify and resolve toponyms, and detect emotions in context 
on either side of each toponym.  Context length is based on trying different lengths,
with the final context length chosen based on which gives the most likely detected emotion
with the highest confidence score.

"""

# Set a global variable for my OpenAI API key so that the model can be accessed.

import os
os.environ["OPENAI_API_KEY"] = "YOUR_KEY"

# Access libraries
from openai import OpenAI
from pydantic import BaseModel
import pandas as pd
import json
import numpy as np

# Access model with my API key

client = OpenAI()

# Bring "Instructions" txt file into memory for the response function to access

with open("openai_EmotionSpatialDetect2_prompt.txt", "r", encoding="utf-8") as f:
    instructions = f.read()

# Create response function to recognize and resolve toponyms and detect emotions in 150 character context
# on either side of the toponym.

response = client.responses.create(
  model="gpt-4.1-2025-04-14",
  instructions=instructions,
  input="I know them by sight. We're becoming friends now. I'm glad not to have to travel alone. Age 16, November 1941, Le Chambon-sur-Lignon, France. I'm in Le Chambon-sur-Lignon. It's way up on a high plateau in the mountains. It's beautiful here. What an amazing difference from Gurs! I'm staying at La Guespy, which is a home for refugees.  My friend just arrived, and before she came she saw her mother for an hour that morning through the fence, and then they put them all on cattle wagons and they were deported. And now they're gone.  We are not sure, but we think they took them to Drancy.  Martin and I are working in the new carpentry shop now. I don't really like carpentry, but at least I know how to do it, and at least I am safe.",
  text={
    "format": {
      "type": "text"
    }
  },
  reasoning={},
  tools=[
    {
      "type": "function",
      "name": "identify_toponyms_and_emotions",
      "description": "Read the input text string and identify the toponyms in the text, disambiguate their locations, and perform emotion detection on surrounding context.",
      "parameters": {
        "type": "object",
        "required": [
          "input_text",
          "toponyms"
        ],
        "properties": {
          "input_text": {
            "type": "string",
            "description": "The text string from which to identify toponyms and their surrounding context."
          },
          "toponyms": {
            "type": "array",
            "description": "Array of identified toponyms, each containing properties of location details and emotional context.",
            "items": {
              "type": "object",
              "properties": {
                "name": {
                  "type": "string",
                  "description": "The name of the toponym identified."
                },
                "latitude": {
                  "type": "number",
                  "description": "The latitude coordinate of the toponym."
                },
                "longitude": {
                  "type": "number",
                  "description": "The longitude coordinate of the toponym."
                },
                "emotion": {
                  "type": "string",
                  "description": "The most likely detected emotion around the toponym.",
                  "enum": [
                    "anger",
                    "disgust",
                    "fear",
                    "joy",
                    "sadness",
                    "surprise",
                    "neutral"
                  ]
                },
                "confidence_score": {
                  "type": "number",
                  "description": "The confidence score for the detected emotion, on a scale of 0 to 1."
                },
                "context": {
                  "type": "string",
                  "description": "The block of text surrounding the toponym used for emotion detection, whose length is determined based on trying different lengths and seeing which one gives the highest confidence score for the most likely detected emotion."
                },
                "context_length": {
                  "type": "number",
                  "description": "The length, in characters including spaces, of the final block of text surrounding the toponym used for emotion detection."
                }
              },
              "required": [
                "name",
                "latitude",
                "longitude",
                "emotion",
                "confidence_score",
                "context",
                "context_length"
              ],
              "additionalProperties": False
            }
          }
        },
        "additionalProperties": False
      },
      "strict": True
    }
  ],
  temperature=1,
  max_output_tokens=32768,
  top_p=1,
  store=True
)

In [6]:
#Take response output, convert into json format, then normalize the data that I want 
#and put into a dataframe.  Then assign numeric values to the detected emotions.

output = json.loads(response.output[0].arguments)
df = pd.json_normalize(output['toponyms'], meta=['name', 'latitude', 'longitude', 'emotion', 'confidence_score', 'context', 'context_length'])

conditions = [
    df["emotion"] == "anger",
    df["emotion"] == "disgust",
    df["emotion"] == "fear",
    df["emotion"] == "joy",
    df["emotion"] == "neutral",
    df["emotion"] == "sadness",
    df["emotion"] == "surprise"
]
values = ["0", "1", "2", "3", "4", "5", "6"]
df["emotion_numeric"] = np.select(conditions, values, default="Unknown")

df

Unnamed: 0,name,latitude,longitude,emotion,confidence_score,context,context_length,emotion_numeric
0,Le Chambon-sur-Lignon,45.06081,4.302941,joy,0.88,"I'm glad not to have to travel alone. Age 16, ...",328,3
1,France,46.603354,1.888334,neutral,0.81,"I'm glad not to have to travel alone. Age 16, ...",197,4
2,Le Chambon-sur-Lignon,45.06081,4.302941,joy,0.91,"November 1941, Le Chambon-sur-Lignon, France. ...",247,3
3,Gurs,43.231991,-0.755542,sadness,0.96,It's way up on a high plateau in the mountains...,255,5
4,La Guespy,45.06069,4.31102,neutral,0.79,What an amazing difference from Gurs! I'm stay...,186,4
5,Drancy,48.925797,2.445665,sadness,0.92,before she came she saw her mother for an hour...,316,5


In [7]:
# Export results to csv

df.to_csv("Results2_ToponymsEmotions_FewSentences.csv", encoding="utf-8-sig", index=False, header=True, mode="w+")