# Sentiment analysis on document level


## Base Line for binary classification

In [18]:
from transformers import pipeline
import numpy as np

sentiment_pipeline = pipeline("sentiment-analysis")

  from .autonotebook import tqdm as notebook_tqdm
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use mps:0


In [2]:
def load_data(file_path):
    with open(file_path, 'r') as file:
        return file.read()

In [44]:
test_data = "Feuerzangenbowle (German: is a traditional German alcoholic drink for which a rum-soaked sugarloaf is set on fire and drips into mulled wine. It is often part of a Christmas or New Year's Eve tradition. The name translates literally as fire-tongs punch, Bowle meaning punch being borrowed from English.The popularity of the drink was boosted in Germany by the 1944 comedy film Die Feuerzangenbowle. It is a traditional drink of some German fraternities, who also call it Krambambuli, as the red color is reminiscent of a cherry liqueur of that name which was manufactured by the distillery Der Lachs zu Danzig [de] (in Gdańsk).[1][2]Procedure Feuerzangenbowle is prepared in a bowl, similar to a fondue set, which usually is suspended over a small burner (Rechaud). The bowl is filled with heated dry red wine spiced with cinnamon sticks, cloves, star anise and orange peel, similar to mulled wine. The Feuerzange was originally a pair of tongs, but nowadays it is common for a purpose-designed metal grate mounted on top of the bowl to hold the Zuckerhut (sugarloaf), a 250-gram (9 oz) lump of sugar. The sugar is soaked with rum and set alight, melting and caramelizing. The rum should have at least 54% alcohol by volume (ABV), such as the high-ABV Austrian rum Stroh 80, and be at room temperature in order to burn properly. More rum is poured with a ladle until all the sugar has melted and mixed with the wine. The resulting punch is served in mugs while the burner keeps the bowl warm. For some the ceremony is more important than the drink itself, celebrating the gathering of friends and conveying a notion of Gemütlichkeit."

In [45]:
# load data from a file as junks inta array

def load_data_as_chunks(file_path):
    with open(file_path, 'r') as file:
        data = file.readlines()
    return [line.strip() for line in data if line.strip()]


In [46]:
len(test_data.split())

277

In [47]:
def join_data(tokens, chunk_size=400):
    return [' '.join(tokens[i:i+chunk_size]) for i in range(0, len(tokens), chunk_size)]

In [48]:
test_data = join_data(data.split(), chunk_size=300)

In [49]:

def document_sentiment_analysis_binary(data : list[str], pipeline=sentiment_pipeline, seed= 0, random_aprox=False):

    doc_analysis = {}

    if random_aprox:

        np.random.seed(seed)
        # natural random numbers for testing
        random_scores = np.unique(np.random.randint(0, len(data), 10))
        data = [test_data[i] for i in random_scores]

    analysis = pipeline(data)

    if analysis["label" == "NEGATIVE"] != None:
        negative_prob = np.sum([doc_analysis["score"] for doc_analysis in analysis if doc_analysis["label"] == "NEGATIVE" ]) / len(analysis)
    else:
        negative_prob = 0
    if analysis["label" == "POSITIVE"] != None:
        positive_prob = np.sum([doc_analysis["score"] for doc_analysis in analysis if doc_analysis["label"] == "POSITIVE" ])  / len(analysis)
    else:
        positive_prob = 0

    if negative_prob > positive_prob:
        doc_analysis["label"] = "NEGATIVE"
        doc_analysis["score"] = negative_prob
    else:
        doc_analysis["label"] = "POSITIVE"
        doc_analysis["score"] = positive_prob

    return doc_analysis

In [50]:
len(test_data[0].split())

277

In [51]:
sentiment_pipeline(test_data)


[{'label': 'NEGATIVE', 'score': 0.7614216804504395}]

In [55]:
document_sentiment_analysis_binary(test_data, random_aprox=True)

{'label': 'NEGATIVE', 'score': 0.7614216804504395}

In [43]:
data = "Feuerzangenbowle (German: is a traditional German alcoholic drink for which a rum-soaked sugarloaf is set on fire and drips into mulled wine. It is often part of a Christmas or New Year's Eve tradition. The name translates literally as fire-tongs punch, Bowle meaning punch being borrowed from English.The popularity of the drink was boosted in Germany by the 1944 comedy film Die Feuerzangenbowle. It is a traditional drink of some German fraternities, who also call it Krambambuli, as the red color is reminiscent of a cherry liqueur of that name which was manufactured by the distillery Der Lachs zu Danzig [de] (in Gdańsk).[1][2]Procedure Feuerzangenbowle is prepared in a bowl, similar to a fondue set, which usually is suspended over a small burner (Rechaud). The bowl is filled with heated dry red wine spiced with cinnamon sticks, cloves, star anise and orange peel, similar to mulled wine. The Feuerzange was originally a pair of tongs, but nowadays it is common for a purpose-designed metal grate mounted on top of the bowl to hold the Zuckerhut (sugarloaf), a 250-gram (9 oz) lump of sugar. The sugar is soaked with rum and set alight, melting and caramelizing. The rum should have at least 54% alcohol by volume (ABV), such as the high-ABV Austrian rum Stroh 80, and be at room temperature in order to burn properly. More rum is poured with a ladle until all the sugar has melted and mixed with the wine. The resulting punch is served in mugs while the burner keeps the bowl warm. For some the ceremony is more important than the drink itself, celebrating the gathering of friends and conveying a notion of Gemütlichkeit."

In [168]:
analysis_approx

{'label': 'NEGATIVE', 'score': 0.46918564372592503}

In [None]:
# subjectivity detection in newspaper sentences
# A sentence is subjective if its content is based on or influenced by personal feelings, tastes, or opinions. Otherwise, the sentence is objective. (Antici et al., 2023).
# https://huggingface.co/GroNLP/mdebertav3-subjectivity-english
# https://checkthat.gitlab.io/clef2023/task2/


pipe = pipeline("text-classification", model="GroNLP/mdebertav3-subjectivity-english")

Device set to use mps:0


In [65]:
def document_sentiment_analysis(data : list[str]):

    doc_analysis = {}
    analysis = sentiment_pipeline(data)

    unique_labels = np.unique([doc["label"] for doc in analysis])

    arg_max_labels = 0

    for label in unique_labels:

        label_prob = np.sum([doc_analysis["score"] for doc_analysis in analysis if doc_analysis["label"] == label ]) / len(analysis)

        if arg_max_labels < label_prob:
            doc_analysis["label"] = label
            doc_analysis["score"] = label_prob

    return doc_analysis

## Multivariant Classification

In [None]:
unique_labels = np.unique([doc["label"] for doc in analysis])

array(['POSITIVE'], dtype='<U8')

In [73]:
# Multilingual GoEmotions Classifier
pipe = pipeline("text-classification", model="AnasAlokla/multilingual_go_emotions")

Device set to use mps:0


In [74]:
pipe(negative_text)

[{'label': 'neutral', 'score': 0.9459356069564819},
 {'label': 'neutral', 'score': 0.8973721861839294},
 {'label': 'neutral', 'score': 0.9459723234176636},
 {'label': 'neutral', 'score': 0.8502932190895081},
 {'label': 'neutral', 'score': 0.946921169757843},
 {'label': 'neutral', 'score': 0.9041789770126343},
 {'label': 'neutral', 'score': 0.6865472793579102},
 {'label': 'neutral', 'score': 0.6713699102401733},
 {'label': 'neutral', 'score': 0.7492476105690002},
 {'label': 'neutral', 'score': 0.9403115510940552},
 {'label': 'neutral', 'score': 0.8996682167053223},
 {'label': 'optimism', 'score': 0.8210803866386414},
 {'label': 'neutral', 'score': 0.9212598204612732},
 {'label': 'neutral', 'score': 0.8895576596260071},
 {'label': 'neutral', 'score': 0.7807997465133667}]