In [1]:
import os
import pandas as pd
import json
import openai
from langchain_openai import ChatOpenAI

In [2]:
df_faz = pd.read_csv('data/faz_2024-11-21.csv')
df_spiegel = pd.read_csv('data/spiegel_2024-11-21.csv')
df_zeit = pd.read_csv('data/zeit_2024-11-21.csv')

In [8]:
openai.api_key = os.getenv("OPENAI_API_KEY")
client = openai.OpenAI(api_key=openai.api_key)

In [9]:
def predict_sentiment_mini_formatted(text, entity, examples, model="gpt-4o-mini"):
    # JSON schema definition for structured response
    json_schema = {
        "title": "sentiment_classification",
        "description": "Classify the text as expressing a sentiment about the given entity.",
        "type": "object",
        "properties": {
            "sentiment": {
                "type": "string",
                "description": "The sentiment result, either 'positive', 'negative', or 'neutral'.",
            },
            "explanation": {
                "type": "string",
                "description": "The reasoning behind the sentiment classification.",
            },
        },
        "required": ["sentiment", "explanation"],
    }

    # Construct the example string for the prompt
    examples_str = "\n".join([
        f"Example {i+1}:\nInput:\nText:\n{ex['text']}\nEntity:\n{ex['entity']}\nOutput:\nsentiment: {ex['sentiment']}\nexplanation: {ex['explanation']}\n"
        for i, ex in enumerate(examples)
    ])

    # Construct the prompt to ask the model
    prompt = f"""
    
    Classify the text into one of the above categories according to how the text is talking about the specified entity.
    If the text is not talking about the entity, the sentiment is neutral.

    Here are some examples to help with the classification:

    {examples_str}

    Now, classify the following text:

    Input:
    Text: "{text}"
    Entity: "{entity}"

    Provide the response in JSON format with the following structure:
    {{
        "sentiment": "<positive, negative, or neutral>",
        "explanation": "<Reasoning behind the classification>"
    }}
    """
    # Initialize the language model
    llm = ChatOpenAI(model=model)

    # Use the structured output schema
    structured_llm = llm.with_structured_output(json_schema)

    # Invoke the model with the prompt
    response = structured_llm.invoke(prompt)

    return response

with open("examples.json", 'r', encoding='utf-8') as file:
    examples = json.load(file)

In [19]:
df_faz['json_response_sentiment_task'] = df_faz.apply(lambda x: predict_sentiment_mini_formatted(x['content'], x['politician_name'], examples), axis=1)
df_faz.to_csv('output/faz_sentiment_output-2024-11-21.csv', index=False)

In [20]:
df_spiegel['json_response_sentiment_task'] = df_spiegel.apply(lambda x: predict_sentiment_mini_formatted(x['content'], x['politician_name'], examples), axis=1)
df_spiegel.to_csv('output/spiegel_sentiment_output-2024-11-21.csv', index=False)

In [None]:
df_zeit['json_response_sentiment_task'] = df_zeit.apply(lambda x: predict_sentiment_mini_formatted(x['content'], x['politician_name'], examples), axis=1)
df_zeit.to_csv('output/zeit_sentiment_output-2024-11-21.csv', index=False)