1. JSON Mode (Gemini):
   - Wird direkt bei der Modellkonfiguration eingestellt.
   - Das Modell generiert Ausgaben direkt im spezifizierten JSON-Format.
   - Erfordert weniger Prompt-Engineering, da das Format in der Modellkonfiguration festgelegt ist.

2. Structured Output (Claude mit PydanticOutputParser):
   - Verwendet Formatierungsanweisungen im Prompt.
   - Das Modell generiert Text, der dem angeforderten Format entsprechen soll.
   - Ein Parser (PydanticOutputParser) validiert und strukturiert die Ausgabe nachträglich.
   - Erfordert mehr Prompt-Engineering, da das gewünschte Format im Prompt beschrieben werden muss.

Hauptunterschied: Bei JSON Mode erfolgt die Strukturierung direkt durch das Modell, während bei Structured Output die Strukturierung durch einen nachgelagerten Parser erfolgt, basierend auf Anweisungen im Prompt.


In [10]:
import os
from dotenv import load_dotenv
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List
from data_models import EmotionAnalysisOutput
from src_llm_pipeline.model import create_llm

# Laden der Umgebungsvariablen
load_dotenv()

# Initialisierung des ChatAnthropic-Modells
llm = create_llm(model_name="claude-3-5-sonnet-20240620")
llm = llm.with_structured_output(EmotionAnalysisOutput)

# Erstellung des Pydantic Output Parsers
parser = PydanticOutputParser(pydantic_object=EmotionAnalysisOutput)

# Erstellung des Prompts mit Formatierungsanweisungen
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert in emotion analysis based on the theory of Lisa Feldmann Barrett. Provide a structured analysis of the given text."),
    ("human", "Analyze the following text and provide an emotion analysis: {input_text}\n\n{format_instructions}")
])

# Zusammenstellung der Chain
chain = prompt | llm | parser

# Testtext
test_input = """
I can't believe I finally got the job I've been dreaming of for years! 
It's been such a long journey with so many ups and downs. 
I'm excited but also a bit nervous about starting in a new environment.
"""

# Ausführung der Analyse
try:
    result = chain.invoke({
        "input_text": test_input,
        "format_instructions": parser.get_format_instructions()
    })
    print("Analyse erfolgreich:")
    print(result)
except Exception as e:
    print(f"Fehler bei der Analyse: {e}")
    # Wenn es Probleme gibt, versuchen wir, die Rohausgabe zu erhalten
    raw_output = llm.invoke(prompt.format(
        input_text=test_input,
        format_instructions=parser.get_format_instructions()
    ))
    print("Rohausgabe des Modells:")
    print(raw_output)

Fehler bei der Analyse: "Input to ChatPromptTemplate is missing variables {'format_instructions'}.  Expected: ['format_instructions', 'input_text'] Received: ['input_text']\nNote: if you intended {format_instructions} to be part of the string and not a variable, please escape it with double curly braces like: '{{format_instructions}}'."
Rohausgabe des Modells:
core_affect_analysis=CoreAffectAnalysis(thought="To analyze the core affect, I'll consider both valence and arousal:\n1. Valence: The text indicates overall positive feelings ('finally got the job I've been dreaming of'), suggesting high pleasantness.\n2. Arousal: The language used ('can't believe', 'excited', 'nervous') suggests a heightened state of activation.\n3. The phrase 'ups and downs' implies a history of varying emotional states, but the current state seems predominantly positive with some tension.\n4. The mix of excitement and nervousness indicates a complex emotional state with high arousal.", valence='Predominantly p

In [1]:
import importlib
from pathlib import Path
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers.pydantic import PydanticOutputParser
from langchain_core.utils.json_schema import dereference_refs
from model import create_llm
from langsmith import Client, traceable
from langsmith.run_helpers import get_current_run_tree
import os
from typing import List
from uuid import UUID
from data_models import UserNeed

path_folder = Path("./inputs/sp0")

# Load environment variables
load_dotenv()

# Dynamically import the module
models = importlib.import_module("data_models")

def load_system_prompt(filename: str, folder: Path = Path("./prompts")):
    return (folder / filename).read_text()

def load_input(filename: str, folder: Path = path_folder):
    return (folder / filename).read_text()

def run_llm_chain(context_sphere: str, user_id: str):
    @traceable(metadata={"user_id": user_id})
    def invoke_chain():
        llm_chain = LLM_chain()
        result = llm_chain.invoke({"context_sphere": context_sphere})
        run = get_current_run_tree()
        print(f"run_llm_chain Run Id: {run.id}")
        return result, run.id

    return invoke_chain()

def LLM_chain():
    queries_schema = dereference_refs(models.EmotionAnalysisOutput.model_json_schema())
    queries_schema.pop("$defs", None)

    prompt = ChatPromptTemplate(
        [
            ("human", load_system_prompt("LFB_role_setting_prompt.md")),
            ("ai", load_system_prompt("LFB_role_feedback_prompt.md")),
            ("human", load_system_prompt("user_task_prompt.md"))
        ]
    ).partial(context_sphere="context_sphere")

    llm = create_llm(
        model_name="gemini-1.5-flash-002",
        temperature=0.5,
        response_mime_type="application/json",
        response_schema=queries_schema,
    )

    chain = prompt | llm | PydanticOutputParser(pydantic_object=models.EmotionAnalysisOutput)

    return chain

def provide_feedback(client: Client, run_id: UUID, analysis_name: str, classification: str, thought: str):
    """
    Simplified feedback mechanism to include only the classification result.
    """
    client.create_feedback(
        run_id=run_id,
        key=f"{analysis_name}_analysis",
        value=f"{classification}",
        comment=f"{thought}"
    )

if __name__ == "__main__":
    client = Client()
    dataset_name = "Emotion Analysis Full Dataset"

    # Directory containing input files
    input_folder = path_folder

    # Process each file in the input folder
    for input_file in input_folder.glob("*.md"):
        user_id = input_file.stem  # Extract user_id from the file name

        # Load input data
        context_sphere = load_input(input_file.name)

        # Get results and run ID from LLM chain
        emotion_analysis, run_id = run_llm_chain(context_sphere, user_id)

        print("RUN:", run_id)
        print("USER ID:", user_id)
        print(emotion_analysis)

        # Provide feedback for each type of analysis with only classification
        provide_feedback(
            client,
            run_id,
            "valence",
            emotion_analysis.valence.aspect.classification,
            emotion_analysis.valence.aspect.thought
        )
        
        provide_feedback(
            client,
            run_id,
            "engagement",
            emotion_analysis.engagement.aspect.classification,
            emotion_analysis.engagement.aspect.thought
        )
        
        provide_feedback(
            client,
            run_id,
            "polarization",
            emotion_analysis.polarization.aspect.classification,
            emotion_analysis.polarization.aspect.thought
        )
        print(emotion_analysis.user_needs)
        

run_llm_chain Run Id: ff711f95-5141-4c60-af16-287a64c212b3
RUN: ff711f95-5141-4c60-af16-287a64c212b3
USER ID: user_193947_threads_cleaned
valence=ValenceAnalysis(aspect=EmotionalAspect(thought="The overall valence of the user's comments is strongly negative.  Their language is often cynical, critical, and sarcastic, reflecting a negative emotional response to the political events and figures involved.  While there are moments of humor, the dominant sentiment expressed across the different articles is one of negativity, skepticism, and disapproval.  This consistent negative valence indicates a strong negative emotional response to the political climate.", classification='Negative')) engagement=EngagementAnalysis(aspect=EmotionalAspect(thought="The user, identified as 193947, demonstrates consistent engagement across multiple articles related to Austrian politics, specifically focusing on the Ibiza scandal and its aftermath.  Their comments reveal a keen interest in the political machina

AttributeError: 'EmotionAnalysisOutput' object has no attribute 'user_needs'

In [6]:
# Angenommen, wir haben ein emotion_analysis Objekt

# Valence
print("Valence:")
print(f"Thought: {emotion_analysis.valence.aspect.thought}")
print(f"Classification: {emotion_analysis.valence.aspect.classification}")
print()

# Engagement
print("Engagement:")
print(f"Thought: {emotion_analysis.engagement.aspect.thought}")
print(f"Classification: {emotion_analysis.engagement.aspect.classification}")
print()

# Polarization
print("Polarization:")
print(f"Thought: {emotion_analysis.polarization.aspect.thought}")
print(f"Classification: {emotion_analysis.polarization.aspect.classification}")
print()

# User Need
print("User Need:")
print(f"Thought: {emotion_analysis.user_need.thought}")

# Basic Needs
basic_needs_list = [need.value for need in emotion_analysis.user_need.basic_needs]
print("Basic Needs:")
for need in basic_needs_list:
    print(f"- {need}")

# User Needs
user_needs_list = [need.value for need in emotion_analysis.user_need.user_needs]
print("User Needs:")
for need in user_needs_list:
    print(f"- {need}")

Valence:
Thought: The overall valence of the user's comments is strongly negative.  Their language is often cynical, critical, and sarcastic, reflecting a negative emotional response to the political events and figures involved.  While there are moments of humor, the dominant sentiment expressed across the different articles is one of negativity, skepticism, and disapproval.  This consistent negative valence indicates a strong negative emotional response to the political climate.
Classification: Negative

Engagement:
Thought: The user, identified as 193947, demonstrates consistent engagement across multiple articles related to Austrian politics, specifically focusing on the Ibiza scandal and its aftermath.  Their comments reveal a keen interest in the political machinations, expressing cynicism, skepticism, and at times, humor. The length and detail of their contributions, especially in the article about Strache's Facebook activity, indicate a high level of engagement and sustained int

In [7]:
import csv

def save_emotion_analysis_to_csv(emotion_analysis, filename="emotion_analysis.csv"):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        # Schreiben der Überschriften
        writer.writerow(["Category", "Subcategory", "Value"])

        # Valence
        writer.writerow(["Valence", "Thought", emotion_analysis.valence.aspect.thought])
        writer.writerow(["Valence", "Classification", emotion_analysis.valence.aspect.classification])

        # Engagement
        writer.writerow(["Engagement", "Thought", emotion_analysis.engagement.aspect.thought])
        writer.writerow(["Engagement", "Classification", emotion_analysis.engagement.aspect.classification])

        # Polarization
        writer.writerow(["Polarization", "Thought", emotion_analysis.polarization.aspect.thought])
        writer.writerow(["Polarization", "Classification", emotion_analysis.polarization.aspect.classification])

        # User Need
        writer.writerow(["User Need", "Thought", emotion_analysis.user_need.thought])

        # Basic Needs
        for need in emotion_analysis.user_need.basic_needs:
            writer.writerow(["Basic Need", need.name, need.value])

        # User Needs
        for need in emotion_analysis.user_need.user_needs:
            writer.writerow(["User Need", need.name, need.value])

    print(f"Data has been saved to {filename}")

# Verwendung der Funktion
save_emotion_analysis_to_csv(emotion_analysis)

Data has been saved to emotion_analysis.csv


In [19]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

def create_analysis_folder():
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    folder_name = f"analysis_{timestamp}"
    os.makedirs(folder_name, exist_ok=True)
    return folder_name

def load_data(filename):
    return pd.read_csv(filename)

def format_labels(labels):
    formatted = {
        'KEEP_ME_ENGAGED': 'Keep me engaged',
        'UPDATE_ME': 'Update me',
        'EDUCATE_ME': 'Educate me',
        'GIVE_ME_PERSPECTIVE': 'Give me perspective',
        'DIVERT_ME': 'Divert me',
        'INSPIRE_ME': 'Inspire me',
        'CONNECT_ME': 'Connect me',
        'HELP_ME': 'Help me',
        'KNOWLEDGE': 'Knowledge',
        'UNDERSTANDING': 'Understanding',
        'FEELING': 'Feeling',
        'DOING': 'Doing'
    }
    return [formatted[label] for label in labels]

def plot_analysis(data, folder_name):
    sns.set(style="whitegrid")

    # Grundbedürfnisse analysieren
    basic_needs = ['KNOWLEDGE', 'UNDERSTANDING', 'FEELING', 'DOING']
    basic_data = data[basic_needs].mean().sort_values() * 100
    basic_df = basic_data.reset_index()
    basic_df.columns = ['Basic Needs', 'Percentage']

    plt.figure(figsize=(8, 6))
    sns.barplot(data=basic_df, x='Percentage', y='Basic Needs', palette="muted")
    plt.title("Basic Needs Analysis")
    plt.xlabel("Erfüllung (%)")
    plt.ylabel("")
    plt.xlim(0, 100)
    plt.yticks(ticks=range(len(basic_needs)), labels=format_labels(basic_needs))
    plt.tight_layout()
    plt.savefig(os.path.join(folder_name, "basic_needs_analysis.png"))
    plt.close()

    # Benutzerbedürfnisse analysieren
    user_needs = [
        'KEEP_ME_ENGAGED', 'UPDATE_ME', 'EDUCATE_ME',
        'GIVE_ME_PERSPECTIVE', 'DIVERT_ME', 'INSPIRE_ME',
        'CONNECT_ME', 'HELP_ME'
    ]
    user_data = data[user_needs].mean().sort_values() * 100
    user_df = user_data.reset_index()
    user_df.columns = ['User Needs', 'Percentage']

    plt.figure(figsize=(8, 6))
    sns.barplot(data=user_df, x='Percentage', y='User Needs', palette="muted")
    plt.title("User Needs Analysis")
    plt.xlabel("Erfüllung (%)")
    plt.ylabel("")
    plt.xlim(0, 100)
    plt.yticks(ticks=range(len(user_needs)), labels=format_labels(user_needs))
    plt.tight_layout()
    plt.savefig(os.path.join(folder_name, "user_needs_analysis.png"))
    plt.close()

def main():
    csv_filename = "emotion_analysis.csv"
    data = load_data(csv_filename)
    folder_name = create_analysis_folder()
    plot_analysis(data, folder_name)
    print(f"Analysis completed. Results saved in the folder '{folder_name}'.")

if __name__ == "__main__":
    main()


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=basic_df, x='Percentage', y='Basic Needs', palette="muted")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=user_df, x='Percentage', y='User Needs', palette="muted")


Analysis completed. Results saved in the folder 'analysis_20240926_130730'.
