In [67]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from germansentiment import SentimentModel
import re
from tqdm.notebook import tqdm
import numpy as np
# surpress warnings
import warnings
warnings.filterwarnings('ignore')

# Sentiment in the #BTW17 Dataset
After having a good intuition about the different topics in this dataset, we will now analyze the sentiment in the tweets. Here, we will focus on the tweets of the politicians only. Furthermore we will focus on the last month before the election, which is because of computational power. 

For the sentiment analysis, we will use this classifier: https://huggingface.co/oliverguhr/german-sentiment-bert.

## Sentiment analysis in general

To get an intuition of the general sentiment given the politicians tweets, we plotted the sentiment over time

In [2]:
model = SentimentModel()

In [55]:
tweets = pd.read_json('../data/politician_tweets.json')

In [56]:
# construct docs
docs = tweets['text'].tolist()
for i in range(len(docs)):
    # remove mentions
    docs[i] = re.sub('@[A-Za-z0-9_]+', '', docs[i])
    # remove hashtags
    docs[i] = re.sub('#[A-Za-z0-9_]+', '', docs[i])
    # remove links
    docs[i] = re.sub('(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w\.-]*)', '', docs[i])
    # strip whitespaces
    docs[i] = [docs[i].strip()]

In [65]:
sentiments = []
# get sentiment per tweet
for i in tqdm(range(len(docs))):
    sentiments.append(model.predict_sentiment(docs[i]))

  0%|          | 0/50907 [00:00<?, ?it/s]

In [88]:
# save labels in dataframe
tweets['sentiment'] = np.squeeze(sentiments)

# score
tweets['sen_score'] = np.where(tweets['sentiment']=='positive', 1.0, np.where(tweets['sentiment']=='negative', -1.0, 0.0))
tweets.to_json('../data/politician_tweets_sentiment.json')

In [89]:
print(f'Overall sentiment: {tweets["sen_score"].mean()}')

Overall sentiment: -0.11340287190366748


In [91]:
for party in tweets['party'].unique():
    print(f'{party} sentiment: {tweets[tweets["party"]==party]["sen_score"].mean()}')

Linke sentiment: -0.1923904052936311
Grüne sentiment: -0.1748747591522158
SPD sentiment: -0.045782829868743324
CDU/CSU sentiment: -0.10712999639899172
FDP sentiment: -0.08767557770729498
AfD sentiment: -0.07918552036199095
