In [72]:
# Import libraries adn load dependencies
import torch
import numpy as np
import pandas as pd

# Models:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification # for german
from pattern.en import sentiment # for english
import nltk # for english
from nltk.sentiment import SentimentIntensityAnalyzer # for english
nltk.download('vader_lexicon') # for english
from pysentimiento import create_analyzer # for spanish

# Accuracy
# from utils import evaluate_performance, transform_scores #ACTION: uncomment and put utils function away
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
def evaluate_performance(df, sentiment_column, label_column):
    """
    This function evaluates the performance of a sentiment analysis model by calculating accuracy, generating a confusion matrix, and creating a classification report. It takes a DataFrame with true sentiment labels and predicted sentiment scores as input.

    Parameters:
        df: DataFrame
            A DataFrame containing true sentiment labels in the column specified by 'label_column' and predicted sentiment scores in the column specified by 'sentiment_column'.
        sentiment_column: str
            The name of the column in the DataFrame containing the predicted sentiment scores.
        label_column: str
            The name of the column in the DataFrame containing the true sentiment labels.

    Returns:
        tuple
            A tuple containing the following elements:
            - accuracy: float
                The accuracy of the sentiment analysis model.
            - unique_predicted: ndarray
                An array containing the unique predicted sentiment labels.
            - cm_df: DataFrame
                A DataFrame representing the confusion matrix for better visualization.
            - report: str
                The classification report containing precision, recall, F1-score, and support for each class.
    """

    # Calculate the accuracy
    accuracy = accuracy_score(df[label_column], df[sentiment_column])

    # Find unique predicted sentiment labels
    unique_predicted = df[sentiment_column].unique()

    # Assign true and predicted labels
    true_labels = df[label_column]
    predicted_labels = df[sentiment_column]

    # Create the confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)

    # Convert the confusion matrix to a DataFrame for better visualization
    labels = np.unique(np.concatenate((true_labels, predicted_labels)))
    cm_df = pd.DataFrame(cm, index=labels, columns=labels)


    # Generate the classification report
    report = classification_report(true_labels, predicted_labels)

    return accuracy, unique_predicted, cm_df, report

def transform_scores(df, sentiment_column):
    """
    This function transforms sentiment scores into three-dimensional sentiment labels (positive/neutral/negative). It takes a DataFrame containing sentiment scores as input and returns a list of corresponding sentiment labels.

    Parameters:
        df: DataFrame
            A DataFrame containing sentiment scores, typically in a column named 'sentiment_bert'.
            
    Returns:
        list of str
            A list of sentiment labels ('positive', 'neutral', or 'negative') based on the input sentiment scores.
    """

    sentiment_3_labels = []
    for score in df['sentiment_bert']: 
        if score > 0.6:
            sentiment_label = "positiv"
        elif score < 0.4:
            sentiment_label = "negativ"
        else:
            sentiment_label = "neutral"
        sentiment_3_labels.append(sentiment_label)
    return sentiment_3_labels

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\joana\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


# More sentiment models and their accuracy per language

## Intro
This document is used to explore diffrent sentiment models which were trained specifically for sentiment per language and evaluate their performance in order to find the most accurate model compared to the multilingual model. 

### Load labeled data: d1.1


In [39]:
# Load labeled CSV files into a DataFrame
df_de_1 = pd.read_csv('https://raw.githubusercontent.com/svisel22/SS23-BIPM-Analytics-Lab---Group-4-repository/main/Preprocessing/data_clean/labeled-data/labeled-de_clean_1-1.csv', sep=';')
df_en_1 = pd.read_csv('https://raw.githubusercontent.com/svisel22/SS23-BIPM-Analytics-Lab---Group-4-repository/main/Preprocessing/data_clean/labeled-data/labeled-en_clean_1-1_not101010.csv')
df_es_1 = pd.read_csv('https://raw.githubusercontent.com/svisel22/SS23-BIPM-Analytics-Lab---Group-4-repository/main/Preprocessing/data_clean/labeled-data/labeled-es_clean_1-1.csv', sep=';')

### Load labeled data: data condensed for english
Explanation why we used data condensed for english

In [40]:
# Load labeled CSV file into a DataFrame
df_en_con = pd.read_csv('https://raw.githubusercontent.com/svisel22/SS23-BIPM-Analytics-Lab---Group-4-repository/main/Preprocessing/data_clean/labeled-data/labeled-en_clean_con_sen.csv')

## German
For the german data we used one additional model to the multilingual only, because then we focused on clustering.

### Model: AutoTokenizer and AutoModelForSequenceClassification.from_pretrained("oliverguhr/german-sentiment-bert")
The model used was trained on 1.834 million German-language samples sepcifically for sentiment classification.
https://huggingface.co/oliverguhr/german-sentiment-bert

In [89]:
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("oliverguhr/german-sentiment-bert")
model = AutoModelForSequenceClassification.from_pretrained("oliverguhr/german-sentiment-bert")

# Create an empty list to store the sentiment scores
sentiment_scores = []

# Iterate over the 'data' column in the DataFrame
for text in df_de_1['data']:
    # Tokenize the input text
    tokens = tokenizer.encode_plus(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt")

    # Perform the sentiment analysis
    with torch.no_grad():
        logits = model(**tokens)[0]

    # Convert logits to predicted label (positive/negative)
    predicted_label = torch.argmax(logits, dim=1).item()
    sentiment = "positive" if predicted_label == 1 else "negative"

    # Append the sentiment score to the list
    sentiment_scores.append(sentiment)

# Add the sentiment scores as a new column in the DataFrame
df_de_1['sentiment'] = sentiment_scores

df_de_1

Unnamed: 0,data,player,language,publishedAt,Label,sentiment_bert,sentiment_score_positive,sentiment_score_negative,sentiment_score_neutral,combined_sentiment_score,average_sentiment_score,sentiment
14,"zweimal verwandelte palacios, es wurde ein ube...",palacios,de,2023-03-19T20:01:45Z,positiv,neutral,5e-06,0.999991,3e-06,-0.999986,"[[tensor(3.3496e-06), tensor(5.2880e-06), tens...",negative
23,schalke: reis nimmt den spieler ausdrucklich i...,palacios,de,2023-04-03T08:12:20Z,neutral,neutral,0.00021,0.999716,7.4e-05,-0.999506,"[[tensor(7.3707e-05), tensor(0.0002), tensor(0...",negative
31,palacios (r.) verletzte sich leicht im hinspie...,palacios,de,2023-04-17T06:50:19Z,neutral,neutral,5e-06,0.999991,4e-06,-0.999986,"[[tensor(3.6115e-06), tensor(4.9578e-06), tens...",negative
41,"""auf jeden fall"" sei das ein ganz grosser tag,...",palacios,de,2023-04-20T20:54:03Z,positiv,neutral,0.001677,0.997806,0.000517,-0.996128,"[[tensor(0.0005), tensor(0.0017), tensor(0.997...",negative
58,beim abschlusstraining trug letzterer zumindes...,palacios,de,2023-05-17T11:20:50Z,neutral,neutral,5e-06,0.999992,4e-06,-0.999987,"[[tensor(3.5287e-06), tensor(4.9880e-06), tens...",negative
63,bayerleverkusen argentinischer weltmeister pal...,palacios,de,2023-05-14T18:07:21Z,negativ,neutral,6e-06,0.999991,3e-06,-0.999985,"[[tensor(3.4372e-06), tensor(5.6462e-06), tens...",negative
83,"""es gibt im moment nicht viel besseres in der ...",frimpong,de,2023-03-31T07:59:57Z,positiv,neutral,0.015271,0.981306,0.003423,-0.966035,"[[tensor(0.0034), tensor(0.0153), tensor(0.981...",negative
93,der fc bayern munchen interessiert sich nach i...,frimpong,de,2023-04-06T16:49:00Z,neutral,neutral,6e-06,0.999991,4e-06,-0.999985,"[[tensor(3.6224e-06), tensor(5.5068e-06), tens...",negative
95,frimpong hat noch bis vertrag in bayerleverkus...,frimpong,de,2023-04-07T07:42:27Z,neutral,neutral,5e-06,0.999991,4e-06,-0.999986,"[[tensor(3.6396e-06), tensor(5.1672e-06), tens...",negative
102,hincapie (l.) und tah bedanken sich fur die fa...,frimpong,de,2023-04-14T08:16:22Z,positiv,neutral,0.005233,0.991056,0.003711,-0.985823,"[[tensor(0.0037), tensor(0.0052), tensor(0.991...",negative


In [41]:
def german_sentiment_model(df):
    # Load the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained("oliverguhr/german-sentiment-bert")
    model = AutoModelForSequenceClassification.from_pretrained("oliverguhr/german-sentiment-bert")

    # Create an empty list to store the sentiment scores
    sentiment_scores = []

    # Iterate over the 'data' column in the DataFrame
    for text in df['data']:
        # Tokenize the input text
        tokens = tokenizer.encode_plus(text, padding="max_length", truncation=True, max_length=128,
                                       return_tensors="pt")

        # Perform the sentiment analysis
        with torch.no_grad():
            logits = model(**tokens)[0]

        # Convert logits to predicted label (positive/negative)
        predicted_label = torch.argmax(logits, dim=1).item()
        sentiment = "positive" if predicted_label == 1 else "negativ"

        # Append the sentiment score to the list
        sentiment_scores.append(sentiment)

    # Add the sentiment scores as a new column in the DataFrame
    df['sentiment_bert'] = sentiment_scores

    return df


# Perform sentiment analysis on the DataFrame
df_de_1 = german_sentiment_model(df_de_1)

# Print the updated DataFrame
print(df_de_1)



#ACTION: NOTE FOR JOANA: check whether this makes a new dataframe instead of appending a column. If so: Change & check what 

                                                  data    player language  \
0    trainer alonso vor den mitgereisten fans in mo...  palacios       de   
1    zudem ist die konkurrenzsituation auf der dopp...  palacios       de   
2    wie auch palacios sah der defensive mittelfeld...  palacios       de   
3    ,,er ist eine option", erklart alonso, der im ...  palacios       de   
4       allerdings waren in andrich und dem argenti...  palacios       de   
..                                                 ...       ...      ...   
434  vor allem monacos krepin diatta und bayerlever...  hincapie       de   
435     vor allem monacos krepin diatta und bayerle...  hincapie       de   
436  in der funften minute der nachspielzeit sah ba...  hincapie       de   
437  une febrilite deconcertante venant du bayerlev...  hincapie       de   
438  sekunden waren gespielt, als andrich nach here...  hincapie       de   

              publishedAt    Label sentiment_bert  
0    2023-02-24T09:33:3

#### Evaluate model performance for german bert model
TODO and todo code

In [42]:
# Drop rows where 'Label' is NaN or empty
df_de_1.dropna(subset=['Label'], inplace=True)

In [65]:
print('Performance evaluation for oliverguhr/german-sentiment-bert')

# Evaluate the performance of the model
accuracy_de, unique_predicted_de, confusion_matrix_de, classification_report_de = evaluate_performance(df_de_1_pos_neg, 'sentiment_bert', 'Label')

# Print the evaluation results
print('Confusion matrix: ')
print(confusion_matrix_de)
print('Classification report: ')
print(classification_report_de)

Performance evaluation for oliverguhr/german-sentiment-bert
Confusion matrix: 
         negativ  positiv
negativ       10        0
positiv       10        0
Classification report: 
              precision    recall  f1-score   support

     negativ       0.50      1.00      0.67        10
     positiv       0.00      0.00      0.00        10

    accuracy                           0.50        20
   macro avg       0.25      0.50      0.33        20
weighted avg       0.25      0.50      0.33        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## English - d1.1
TODO

### Why we used the pipeline for the english and spanish models
TODO


### Model 1: sentiment-analysis from bert-base-uncased
TODO
https://huggingface.co/bert-base-uncased

In [44]:
# Initiate model
sentiment_classifier_en = pipeline('sentiment-analysis', model='bert-base-uncased')

# Apply sentiment analysis on the 'data' column  and store the sentiment in a new column "sentiment_bert"
df_en_1['sentiment_bert'] = df_en_1['data'].apply(lambda x: sentiment_classifier_en(x)[0]['score'])

# Print the updated dataframe
df_en_1.head()

Unnamed: 0,data,player,language,publishedAt,Label,sentiment_bert
0,ten if you included the toe-poked volley to te...,palacios,en,2023-02-16T23:56:00Z,,0.679857
1,bayerleverkusen took the lead again in the st ...,palacios,en,2023-02-23T20:50:50Z,,0.731629
2,wissam ben yedder levelled straight away from ...,palacios,en,2023-02-23T20:53:59Z,positiv,0.725886
3,"midfielders: leandro paredes (juventus), angel...",palacios,en,2023-03-03T16:40:46Z,neutral,0.763633
4,midfielders: rodrigo de paul (atletico madrid)...,palacios,en,2023-03-03T18:17:37Z,neutral,0.760541


### Model 2: Sentiment Intensity Analyzer from nltk

In [45]:
# Create an instance of the VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Function to get sentiment polarity
def get_sentiment(text):
    sentiment_scores = sid.polarity_scores(text)
    return sentiment_scores['compound']


# Apply sentiment analysis to the "data" column and store the sentiment in a new column "sentiment_nltk"
df_en_1['sentiment_nltk'] = df_en_1['data'].apply(get_sentiment)

# Print the updated dataframe
df_en_1.head()


Unnamed: 0,data,player,language,publishedAt,Label,sentiment_bert,sentiment_nltk
0,ten if you included the toe-poked volley to te...,palacios,en,2023-02-16T23:56:00Z,,0.679857,0.0
1,bayerleverkusen took the lead again in the st ...,palacios,en,2023-02-23T20:50:50Z,,0.731629,-0.0516
2,wissam ben yedder levelled straight away from ...,palacios,en,2023-02-23T20:53:59Z,positiv,0.725886,0.2263
3,"midfielders: leandro paredes (juventus), angel...",palacios,en,2023-03-03T16:40:46Z,neutral,0.763633,0.0
4,midfielders: rodrigo de paul (atletico madrid)...,palacios,en,2023-03-03T18:17:37Z,neutral,0.760541,0.0


#### Evaluate model performance for all english models
TODO 

In [46]:
# Drop rows where 'Label' is NaN or empty
df_en_1.dropna(subset=['Label'], inplace=True)

In [67]:
print('Performance evaluation for bert-base-uncased')

# Transform score into three-dimensional label for Performance evaluation
sentiment_3_labels = transform_scores(df_en_1, 'sentiment_3_label_bert')
df_en_1['sentiment_3_label_bert'] = sentiment_3_labels

# Evaluate the performance of the model
accuracy_en_bert, unique_predicted_en_bert, confusion_matrix_en_bert, classification_report_en_bert = evaluate_performance(df_en_1, 'sentiment_3_label_bert', 'Label')

# Print the evaluation results
print('Confusion matrix: ')
print(confusion_matrix_en_bert)
print('Classification report: ')
print(classification_report_en_bert)


Performance evaluation for bert-base-uncased
Confusion matrix: 
         negativ  neutral  positiv
negativ        0        0        4
neutral        0        0       13
positiv        0        0       13
Classification report: 
              precision    recall  f1-score   support

     negativ       0.00      0.00      0.00         4
     neutral       0.00      0.00      0.00        13
     positiv       0.43      1.00      0.60        13

    accuracy                           0.43        30
   macro avg       0.14      0.33      0.20        30
weighted avg       0.19      0.43      0.26        30



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [66]:
print('Performance evaluation for nltk')

# Transform score into three-dimensional label for Performance evaluation
sentiment_3_labels = transform_scores(df_en_1, 'sentiment_bert')
df_en_1['sentiment_3_label_bert'] = sentiment_3_labels

# Evaluate the performance of the model
accuracy_en_nltk, unique_predicted_en_nltk, confusion_matrix_en_nltk, classification_report_en_nltk = evaluate_performance(df_en_1, 'sentiment_3_label_bert', 'Label')

# Print the evaluation results
print('Confusion matrix: ')
print(confusion_matrix_en_nltk)
print('Classification report: ')
print(classification_report_en_nltk)

Performance evaluation for nltk
Confusion matrix: 
         negativ  neutral  positiv
negativ        0        0        4
neutral        0        0       13
positiv        0        0       13
Classification report: 
              precision    recall  f1-score   support

     negativ       0.00      0.00      0.00         4
     neutral       0.00      0.00      0.00        13
     positiv       0.43      1.00      0.60        13

    accuracy                           0.43        30
   macro avg       0.14      0.33      0.20        30
weighted avg       0.19      0.43      0.26        30



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Spanish
TODO?

### Model 1: sentiment-analysis from spanish bert: beto-sentiment-analysis
TODO
https://huggingface.co/finiteautomata/beto-sentiment-analysis
Although this model was trained on tweets, not on news data, it is a popular model to use for sentiment analysis of spanish data.
The base model is BETO, a BERT model trained on spanish data (for further information: https://github.com/dccuchile/beto).

In [49]:
sentiment_classifier_es_beto = pipeline('sentiment-analysis', model='finiteautomata/beto-sentiment-analysis')

In [50]:
# Apply sentiment analysis on the 'data' column  and store the sentiment in a new column "sentiment_beto"
df_es_1['sentiment_beto'] = df_es_1['data'].apply(lambda x: sentiment_classifier_es_beto(x)[0]['score'])

# Print the updated dataframe
df_es_1.head()

Unnamed: 0,data,player,language,publishedAt,Label,sentiment_beto
0,adeyemi firmo el que es su primer gol en lo qu...,palacios,es,2023-01-29T18:25:03Z,,0.468314
1,"el club aleman, que siempre se ha caracterizad...",palacios,es,2023-01-31T20:41:38Z,,0.99652
2,"alberto fernandez el presidente de la afa, cl...",palacios,es,2023-02-09T18:32:38Z,,0.926711
3,alberto fernandez tambien participaron los ot...,palacios,es,2023-02-12T21:13:55Z,,0.961068
4,fue el momento en que desde las tribunas se de...,palacios,es,2023-02-13T01:05:15Z,,0.988847


### Model 2: sentiment-analysis from another spanish bert: bert-base-spanish-wwm-uncased
TODO

In [51]:
sentiment_classifier_es_bert = pipeline('sentiment-analysis', model='dccuchile/bert-base-spanish-wwm-uncased')

In [52]:
# Apply sentiment analysis on the 'data' column  and store the sentiment in a new column "sentiment_bert"
df_es_1['sentiment_bert'] = df_es_1['data'].apply(lambda x: sentiment_classifier_es_bert(x)[0]['score'])

# Print the updated dataframe
df_es_1.head()

Unnamed: 0,data,player,language,publishedAt,Label,sentiment_beto,sentiment_bert
0,adeyemi firmo el que es su primer gol en lo qu...,palacios,es,2023-01-29T18:25:03Z,,0.468314,0.569327
1,"el club aleman, que siempre se ha caracterizad...",palacios,es,2023-01-31T20:41:38Z,,0.99652,0.530692
2,"alberto fernandez el presidente de la afa, cl...",palacios,es,2023-02-09T18:32:38Z,,0.926711,0.521055
3,alberto fernandez tambien participaron los ot...,palacios,es,2023-02-12T21:13:55Z,,0.961068,0.538828
4,fue el momento en que desde las tribunas se de...,palacios,es,2023-02-13T01:05:15Z,,0.988847,0.55548


#### Evaluate model performance for all spanish models
TODO 

In [53]:
# Drop rows where 'Label' is NaN or empty
df_es_1.dropna(subset=['Label'], inplace=True)

In [68]:
print('Performance evaluation for beto-sentiment-analysis')

# Transform score into three-dimensional label for Performance evaluation
sentiment_3_labels = transform_scores(df_en_1, 'sentiment_beto')
df_es_1['sentiment_3_label_beto'] = sentiment_3_labels

# Evaluate the performance of the model
accuracy_es_beto, unique_predicted_es_beto, confusion_matrix_es_beto, classification_report_es_beto = evaluate_performance(df_es_1, 'sentiment_3_label_beto', 'Label')

# Print the evaluation results
print('Confusion matrix: ')
print(confusion_matrix_es_beto)
print('Classification report: ')
print(classification_report_es_beto)

Performance evaluation for beto-sentiment-analysis
Confusion matrix: 
         negativ  neutral  positiv
negativ        0        0       10
neutral        0        0       10
positiv        0        0       10
Classification report: 
              precision    recall  f1-score   support

     negativ       0.00      0.00      0.00        10
     neutral       0.00      0.00      0.00        10
     positiv       0.33      1.00      0.50        10

    accuracy                           0.33        30
   macro avg       0.11      0.33      0.17        30
weighted avg       0.11      0.33      0.17        30



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [69]:
print('Performance evaluation for bert-base-spanish-wwm-uncased')

# Transform score into three-dimensional label for Performance evaluation
sentiment_3_labels = transform_scores(df_es_1, 'sentiment_bert')
df_es_1['sentiment_3_label_bert'] = sentiment_3_labels

# Evaluate the performance of the model
accuracy_es_bert, unique_predicted_es_bert, confusion_matrix_es_bert, classification_report_es_bert = evaluate_performance(df_es_1, 'sentiment_3_label_bert', 'Label')

# Print the evaluation results
print('Confusion matrix: ')
print(confusion_matrix_es_bert)
print('Classification report: ')
print(classification_report_es_bert)

Performance evaluation for bert-base-spanish-wwm-uncased
Confusion matrix: 
         negativ  neutral  positiv
negativ        0        0       10
neutral        0        0       10
positiv        0        0       10
Classification report: 
              precision    recall  f1-score   support

     negativ       0.00      0.00      0.00        10
     neutral       0.00      0.00      0.00        10
     positiv       0.33      1.00      0.50        10

    accuracy                           0.33        30
   macro avg       0.11      0.33      0.17        30
weighted avg       0.11      0.33      0.17        30



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## English: data condensed
TODO

### Model 1: sentiment-analysis from bert-base-uncased
TODO
https://huggingface.co/bert-base-uncased

In [56]:
# Model loaded above: sentiment_classifier_en = pipeline('sentiment-analysis', model='bert-base-uncased')

# Apply sentiment analysis on the 'data' column  and store the sentiment in a new column "sentiment_bert"
df_en_con['sentiment_bert'] = df_en_con['data'].apply(lambda x: sentiment_classifier_en(x)[0]['score'])

# Print the updated dataframe
df_en_con.head()

Unnamed: 0,data,player,language,publishedAt,Label,sentiment_bert
0,bayerleverkusen took lead minute midfielder p...,exequiel palacios,en,2023-02-23T20:50:50Z,,0.697647
1,midfielders leandro paredes juventus angel ...,exequiel palacios,en,2023-03-03T16:42:19Z,neutral,0.740338
2,half goal joshua kimmich canceled penalties pa...,exequiel palacios,en,2023-03-19T18:30:00Z,positiv,0.708196
3,by reuters bayerleverkusen s palacios scored s...,exequiel palacios,en,2023-03-19T18:42:59Z,,0.699258
4,bayerleverkusen s palacios scored second half ...,exequiel palacios,en,2023-03-19T19:05:09Z,positiv,0.7175


#### Evaluate model performance for english condensed model

In [57]:
# Drop rows where 'Label' is NaN or empty
df_en_con.dropna(subset=['Label'], inplace=True)

In [73]:
print('Performance evaluation for bert-base-uncased on english condensed')

# Transform score into three-dimensional label for Performance evaluation
sentiment_3_labels = transform_scores(df_en_con, 'sentiment_bert')
df_en_con['sentiment_3_label_bert'] = sentiment_3_labels

# Evaluate the performance of the model
accuracy_en_bert, unique_predicted_en_bert, confusion_matrix_en_bert, classification_report_en_bert = evaluate_performance(df_en_con, 'sentiment_3_label_bert', 'Label')

# Print the evaluation results
print('Confusion matrix: ')
print(confusion_matrix_en_bert)
print('Classification report: ')
print(classification_report_en_bert)


Performance evaluation for bert-base-uncased on english condensed
Confusion matrix: 
         negativ  neutral  positiv
negativ        0        1        2
neutral        0        1        9
positiv        0        0       10
Classification report: 
              precision    recall  f1-score   support

     negativ       0.00      0.00      0.00         3
     neutral       0.50      0.10      0.17        10
     positiv       0.48      1.00      0.65        10

    accuracy                           0.48        23
   macro avg       0.33      0.37      0.27        23
weighted avg       0.42      0.48      0.35        23



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Summary
As The best accuracy

# Next steps for Bayer04 Leverkusen
TODO