In [184]:
import pandas as pd

train = pd.read_csv('data/MetaHate<lang>_train.tsv', sep='\t')
test = pd.read_csv('data/MetaHate<lang>_test.tsv', sep='\t')
data = pd.concat([train, test], ignore_index=True)
text_hate = data.loc[data['label'] == 1, 'text'].tolist()
text_no_hate = data.loc[data['label'] == 0, 'text'].tolist()

In [175]:
nrc_df = pd.read_csv('<lang>-NRC-EmoLex.txt', sep='\t')

emotion_cols = ['anger', 'anticipation', 'disgust', 'fear', 'joy',
                'negative', 'positive', 'sadness', 'surprise', 'trust']
nrc_df = nrc_df[['<lang> Word'] + emotion_cols]
nrc_df.columns = ['word'] + emotion_cols

In [176]:
nrc_df_grouped = nrc_df.groupby('word', as_index=False).mean()
nrc_emotion_dict = nrc_df_grouped.set_index('word').to_dict(orient='index')

In [None]:
from nltk.tokenize import word_tokenize
from collections import defaultdict
import nltk
nltk.download('punkt')

def analyze_emotions(text_list, emotion_dict):
    emotion_totals = defaultdict(float)
    
    for text in text_list:
        tokens = word_tokenize(text.lower())
        for word in tokens:
            if word in emotion_dict:
                for emotion, value in emotion_dict[word].items():
                    emotion_totals[emotion] += value
                    
    return dict(emotion_totals)

In [None]:
emotion_hate = analyze_emotions(text_hate, nrc_emotion_dict)
emotion_no_hate = analyze_emotions(text_no_hate, nrc_emotion_dict)

print("Hate emotions:", emotion_hate)
print("Non-hate emotions:", emotion_no_hate)

In [None]:
from nrclex import NRCLex

emotion_hate = NRCLex(' '.join(text_hate))
emotion_no_hate = NRCLex(' '.join(text_no_hate))

print(emotion_hate.raw_emotion_scores)
print(emotion_no_hate.raw_emotion_scores)

In [None]:
from textblob import TextBlob

blob_hate = TextBlob(' '.join(text_hate))
sentences_hate = list(blob_hate.sentences)

blob_no_hate = TextBlob(' '.join(text_no_hate))
sentences_no_hate = list(blob_no_hate.sentences)

In [188]:
emotion_hate_percentage = {key: value / len(sentences_hate) for key, value in emotion_hate.raw_emotion_scores.items()}
emotion_no_hate_percentage = {key: value / len(sentences_no_hate) for key, value in emotion_no_hate.raw_emotion_scores.items()}

In [181]:
emotion_hate_percentage = {key: value / len(sentences_hate) for key, value in emotion_hate.items()}
emotion_no_hate_percentage = {key: value / len(sentences_no_hate) for key, value in emotion_no_hate.items()}

In [189]:
hate_sorted = dict(sorted(emotion_hate_percentage.items(), key=lambda item: item[0]))
no_hate_sorted = dict(sorted(emotion_no_hate_percentage.items(), key=lambda item: item[0]))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

categories = sorted(['fear','anger','anticipation', 'trust', 'surprise', 'positive', 'negative', 'sadness', 'disgust', 'joy'])

def radar_plot(categories, hate, nohate, title):
    num_categories = len(categories)
    angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()

    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
    
    ax.fill(angles, hate, color='#d2047b', alpha=0.5, label='Hate')
    ax.fill(angles, nohate, color='#f397c2', alpha=0.5, label='No hate')

    num_ticks = len(categories)
    tick_locs = np.linspace(0, 360, num_ticks, endpoint=False)
    
    ax.set_thetagrids(tick_locs, categories, fontsize=12)
    ax.xaxis.set_tick_params(pad=10)
    
    ax.set_title(title)
    ax.legend(fontsize=12)
    
    plt.yticks([0.2,0.4,0.6,0.8,1], ["20%","40%", "60%", "80%","100%"], color="grey", size=9, fontweight="bold")
    plt.show()

radar_plot(categories, list(hate_sorted.values()), list(no_hate_sorted.values()), 'Hate Speech Emotions')