In [1]:
import pandas as pd
import numpy as np
from transformers import pipeline




In [2]:
def get_texts(key):
    with open(f'exports/{key}_full.txt', 'r') as f:
        text = f.read()
    return text.split('--------------------NEW-TEXT------------------')

In [3]:
emotion_model = pipeline("text-classification", model="visegradmedia-emotion/Emotion_RoBERTa_german6_v7", top_k=None, truncation=True)

Device set to use mps:0


In [20]:
joy_s = 'Freude'
fear_s = 'Angst'
anger_s = 'ich bin Wütend'
sadness_s = 'Traurigkeit'
disgust_s = 'Ekel'
none_s = 'Neutral'

In [21]:
print('JOY: ', emotion_model(joy_s))
print('FEAR: ', emotion_model(fear_s))
print('ANGER: ', emotion_model(anger_s))
print('SADNESS: ', emotion_model(sadness_s))
print('DISGUST: ', emotion_model(disgust_s))
print('NONE: ', emotion_model(none_s))

JOY:  [[{'label': 'LABEL_4', 'score': 0.977495014667511}, {'label': 'LABEL_5', 'score': 0.008196782320737839}, {'label': 'LABEL_0', 'score': 0.007576040457934141}, {'label': 'LABEL_3', 'score': 0.006525225006043911}, {'label': 'LABEL_2', 'score': 0.003997484687715769}, {'label': 'LABEL_1', 'score': 0.001897070906125009}]]
FEAR:  [[{'label': 'LABEL_1', 'score': 0.9911304712295532}, {'label': 'LABEL_5', 'score': 0.007378977723419666}, {'label': 'LABEL_0', 'score': 0.00580309284850955}, {'label': 'LABEL_3', 'score': 0.004539343062788248}, {'label': 'LABEL_4', 'score': 0.0035139520186930895}, {'label': 'LABEL_2', 'score': 0.001936954678967595}]]
ANGER:  [[{'label': 'LABEL_0', 'score': 0.9243784546852112}, {'label': 'LABEL_2', 'score': 0.04132488742470741}, {'label': 'LABEL_5', 'score': 0.027078239247202873}, {'label': 'LABEL_3', 'score': 0.015537630766630173}, {'label': 'LABEL_4', 'score': 0.003564917016774416}, {'label': 'LABEL_1', 'score': 0.0031422206666320562}]]
SADNESS:  [[{'label': '

In [4]:
emotion_label_map = {
    'LABEL_0': 'anger',
    'LABEL_1': 'fear',
    'LABEL_2': 'disgust',
    'LABEL_3': 'sadness',
    'LABEL_4': 'joy',
    'LABEL_5': 'none'
}

In [37]:
def split_text_into_chunks(text, max_length):
    return [text[i:i+max_length] for i in range(0, len(text), max_length)]

In [34]:

def analyze_emotions(text_lists, max_length=512, normalization_size=20):
    """
    Analysiert mehrere Listen von Texten auf Emotionen, unterteilt lange Texte in Abschnitte
    und normalisiert die Ergebnisse auf eine feste Anzahl von Texten pro Liste.

    :param text_lists: Liste von Listen mit Texten zur Analyse
    :param emotion_model: Funktion oder Modell zur Emotionserkennung
    :param emotion_label_map: Wörterbuch zur Übersetzung von Labels in lesbare Emotionsnamen
    :param max_length: Maximale Länge eines Textabschnitts
    :param normalization_size: Anzahl der Zeilen, auf die die Ergebnisse normalisiert werden
    :return: DataFrame mit Emotionswerten pro Textliste
    """
    def split_text_into_chunks(text, max_length):
        """Hilfsfunktion, um Texte in Abschnitte aufzuteilen."""
        chunks = [text[i:i + max_length] for i in range(0, len(text), max_length)]
        chunk_lenghs = [len(chunk.split(' ')) for chunk in chunks]
        return chunks, chunk_lenghs

    # Liste zur Speicherung der Durchschnittswerte für jede Textliste
    averages = []

    for texts in text_lists:
        # Liste zur Speicherung von Emotionsergebnissen
        emotion_results = []
        

        for text in texts:
            # Text in Abschnitte aufteilen, falls er zu lang ist
            chunks = split_text_into_chunks(text, max_length)
            chunk_lenghs = chunks[1]
            chunks = chunks[0]

            for chunk in chunks:
                # Emotionen für jeden Abschnitt analysieren
                emotions = emotion_model(chunk)

                # Extrahiere Ergebnisse und speichere sie in einer Liste von Dictionnaries
                for item in emotions:
                    for item in item:
                        emotion_label = item['label']
                        emotion_score = item['score']

                        # Übersetzung des Labels in die Emotion
                        emotion_name = emotion_label_map.get(emotion_label, emotion_label)  # Fallback, falls Label fehlt

                        emotion_results.append({emotion_name: emotion_score})

        # DataFrame aus den Ergebnissen erstellen
        emotions_df = pd.DataFrame(emotion_results)

        # Fehlende Werte mit 0 ersetzen
        emotions_df = emotions_df.fillna(0)



        # Durchschnitt der Emotionen berechnen
        average_emotions = emotions_df.mean().to_dict()
        averages.append(average_emotions)

    # Endgültiges DataFrame erstellen, mit einer Zeile pro Textliste
    final_df = pd.DataFrame(averages, index=['gpt4o', 'gpt35t', 'perplexity', 'clde', 'human'])

    # Fehlende Spalten mit 0 auffüllen (falls nicht alle Labels in allen Textlisten vorkommen)
    final_df = final_df.fillna(0)

    return final_df


In [83]:
def analyze_emotions(text_lists):

    def get_chunks(text):
        chunks = [text[i:i + 512] for i in range(0, len(text), 512)]
        chunk_lenghs = [len(chunk) for chunk in chunks]
        #print(chunks)
        #print(chunk_lenghs)
        return chunks, chunk_lenghs


    def get_w_avg(column, weights):
        column = np.array(column, dtype='float')
        weights = np.array(weights, dtype='float')
        return np.average(column, weights=weights)


    g_df = pd.DataFrame(
        columns=['anger', 'fear', 'disgust', 'sadness', 'joy', 'none'],
    )

    for modell in text_lists:

        chunk_lens = []
        chunks     = []

        for text in modell:
            i_chunk = get_chunks(text)
            chunk_lens.extend(i_chunk[1])
            chunks.extend(i_chunk[0])

        #print(chunks)

        df = pd.DataFrame(
            columns=['anger', 'fear', 'disgust', 'sadness', 'joy', 'none'],
        )

        for chunk in chunks:
            chunk_em = {}
            #print(chunk)
            chunk_emotions = emotion_model(inputs=chunk)
            for i_emotion in chunk_emotions:
                for emotion in i_emotion:
                    emotion_label = emotion_label_map[emotion['label']]
                    emotion_score = emotion['score']
                    chunk_em[emotion_label]=emotion_score

            #print(chunk_em)
            

            
            df = pd.concat([df, pd.DataFrame([chunk_em])], ignore_index=True)
            

        
        print(len(df['anger'].values.tolist()), len(chunk_lens))
        
        final = {
            "anger": get_w_avg(df['anger'].values.tolist(), chunk_lens),
            "fear": get_w_avg(df['fear'].values.tolist(), chunk_lens),
            "disgust":get_w_avg(df['disgust'].values.tolist(), chunk_lens),
            "sadness":get_w_avg(df['sadness'].values.tolist(), chunk_lens),
            "joy":get_w_avg(df['joy'].values.tolist(), chunk_lens),
            "none":get_w_avg(df['none'].values.tolist(), chunk_lens),
        }


        g_df = pd.concat([g_df, pd.DataFrame([final])], ignore_index=True)
    
    return g_df


        


In [32]:
text_lists = [get_texts('gpt4o'), get_texts('gpt35t'), get_texts('perplexity'), get_texts('clde'), get_texts('human')]

In [None]:
df = analyze_emotions(text_lists)
df

In [84]:
analyze_emotions(text_lists).to_csv('includes/emotions_2.csv')

  df = pd.concat([df, pd.DataFrame([chunk_em])], ignore_index=True)


209 209


  g_df = pd.concat([g_df, pd.DataFrame([final])], ignore_index=True)
  df = pd.concat([df, pd.DataFrame([chunk_em])], ignore_index=True)


145 145


  df = pd.concat([df, pd.DataFrame([chunk_em])], ignore_index=True)


220 220


  df = pd.concat([df, pd.DataFrame([chunk_em])], ignore_index=True)


142 142


  df = pd.concat([df, pd.DataFrame([chunk_em])], ignore_index=True)


223 223


In [None]:
"""

         LABEL_0    LABEL_1     LABEL_2     LABEL_3     LABEL_4     LABEL_5
gpt4o
gpt35t
perplexity
clde


"""