In [25]:
import pandas as pd
from collections import defaultdict
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

from greenspectors.models.flair import FlairSentimentAnalyzer
from greenspectors.models.vader import VaderSentimentAnalyzer
from greenspectors.models.sentiment_analysis import Sentiment
from greenspectors.env import DATA_PATH

In [3]:
flair_sentiment_analyzer = FlairSentimentAnalyzer()
vader_sentiment_analyzer = VaderSentimentAnalyzer()

sentiment_analyzers = {
    "Flair": flair_sentiment_analyzer, 
    "Vader": vader_sentiment_analyzer
}

2021-10-16 19:18:34,386 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to C:\Users\Tobias\AppData\Local\Temp\tmpimqtup33


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 265512723/265512723 [01:09<00:00, 3821443.90B/s]

2021-10-16 19:19:43,968 copying C:\Users\Tobias\AppData\Local\Temp\tmpimqtup33 to cache at C:\Users\Tobias\.flair\models\sentiment-en-mix-distillbert_4.pt





2021-10-16 19:19:44,112 removing temp file C:\Users\Tobias\AppData\Local\Temp\tmpimqtup33
2021-10-16 19:19:44,139 loading file C:\Users\Tobias\.flair\models\sentiment-en-mix-distillbert_4.pt


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Tobias\AppData\Roaming\nltk_data...


In [None]:
tweets_positive = [
    """Great panel discussion around the topic of #Sustainability and #netzero energy!Brilliant presentation and input from @OliviaWhitlam on #siemens sustainability strategy and journey to achieving netzero carbon.some key messages byall #collaboration #transition #trust #transparency"""
]

In [None]:
tweets_negative = [
    """What a greenwash. Siemens are still helping Adani build a massive thermal coal mine in Australia. https://reuters.com/article/us-sie""",
    """Activists in Germany protest against new dams in the #Amazon. @Siemens: Green innovation, not Amazon destruction!""",
    """@Siemens u love green tech. and we love the Amazon. Innovation is not forest destruction.#saveTheAmazon #StopSiemens"""
]

In [17]:
for analyzer_name, sentiment_analyzer in sentiment_analyzers.items():
    print("========================")
    print(analyzer_name)
    print("========================")
    print("Positive:")
    
    for tweet in tweets_positive:
        print("\t", sentiment_analyzer.predict(tweet))
        
    print("Negative:")
    for tweet in tweets_negative:
        print("\t", sentiment_analyzer.predict(tweet))
    print()

Flair
Positive:
	 (<Sentiment.POSITIVE: 1>, 0.9973779916763306)
Negative:
	 (<Sentiment.NEGATIVE: 3>, -0.6935323476791382)
	 (<Sentiment.POSITIVE: 1>, 0.6526410579681396)
	 (<Sentiment.POSITIVE: 1>, 0.9387254118919373)

Vader
Positive:
	 (<Sentiment.POSITIVE: 1>, 0.6588)
Negative:
	 (<Sentiment.NEUTRAL: 2>, -0.1531)
	 (<Sentiment.POSITIVE: 1>, 0.5223)
	 (<Sentiment.POSITIVE: 1>, 0.9136)



# 2. Evaluate on hand-labelled tweets

In [5]:
df = pd.read_csv(f"{DATA_PATH}/twitter_sentiment_analysis.csv")

In [17]:
predictions = defaultdict(list)
labels = list()

for idx, row in df.iterrows():
    text = row['Text']
    sentiment = row['Sentiment']
    labels.append(sentiment)
    
    for name, sentiment_analyzer in sentiment_analyzers.items():
        predictions[name].append(sentiment_analyzer.predict(text))
    

In [33]:
def process_prediction(prediction):
    sentiment = prediction[0]
    score = prediction[1]
    if sentiment == Sentiment.POSITIVE:
        return 1
    elif sentiment == Sentiment.NEGATIVE:
        return 0
    else:
        return score > 0

In [34]:
for model_name, model_predictions in predictions.items():
    model_predictions = [process_prediction(pred) for pred in model_predictions]
    
    f1 = f1_score(labels, model_predictions)
    precision = precision_score(labels, model_predictions)
    recall = recall_score(labels, model_predictions)
    accuracy = accuracy_score(labels, model_predictions)
    
    print(model_name)
    print(f" - F1: {f1: 0.3f}")
    print(f" - Precision: {precision: 0.3f}")
    print(f" - Recall: {recall: 0.3f}")
    print(f" - Accuracy: {accuracy: 0.3f}")

Flair
 - F1:  0.667
 - Precision:  0.556
 - Recall:  0.833
 - Accuracy:  0.706
Vader
 - F1:  0.457
 - Precision:  0.348
 - Recall:  0.667
 - Accuracy:  0.441


In [None]:
f1_score()