###This notebook:

**What emoitions does irony tend to appear with??**

+ test with both emotion classifiers:
    + trained on ISEAR data (7 labels)
    + trained on TweetEval data (4 labels)

###Check Requirements/imports

In [None]:
import tensorflow as tf
print(tf.version.VERSION)

In [None]:
pip install emojis

In [None]:
pip install contractions

In [None]:
!pip3 install -q ktrain 

In [None]:
pip install -U sklearn

In [None]:
pip install parse_version

In [None]:
import os

import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub

from keras.utils import np_utils

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

1 Physical GPUs, 1 Logical GPUs
Version:  2.5.0
Eager mode:  True
Hub version:  0.12.0
GPU is available


In [None]:
from google.colab import drive
drive.mount('/content/drive')

###Load data

In [None]:
# Load train data
train_path = '/content/drive/MyDrive/TeamLab/data/semeval_taskA_corrected.csv'

df_train = pd.read_csv(train_path, header=0, names=['index',
                                                    'irony_label',
                                                    'tweet'])
                                                

In [None]:
df_train.head()

Unnamed: 0,index,irony_label,tweet
0,1,1,Sweet United Nations video. Just in time for C...
1,2,1,@mrdahl87 We are rumored to have talked to Erv...
2,3,1,Hey there! Nice to see you Minnesota/ND Winter...
3,4,0,3 episodes left I'm dying over here
4,5,1,I can't breathe! was chosen as the most notabl...


In [None]:
# Check if dataset is balanced

# Classes are 1 and 0. Tweet can either be ironic or non-ironic -> binary classification
classes = df_train.irony_label.unique()

print((df_train.irony_label == 0).sum())
print((df_train.irony_label == 1).sum())

# => Balanced

1923
1911


In [None]:
# Load test data
test_path = '/content/drive/MyDrive/TeamLab/data/semeval_taskA_test.csv'

df_test = pd.read_csv(test_path, sep='\t', header=0, names=['index',
                                                            'irony_label',
                                                            'tweet'])

print((df_test.irony_label == 0).sum())
print((df_test.irony_label == 1).sum())

df_test.head()

473
311


Unnamed: 0,index,irony_label,tweet
0,1,0,@Callisto1947 Can U Help?||More conservatives ...
1,2,1,"Just walked in to #Starbucks and asked for a ""..."
2,3,0,#NOT GONNA WIN http://t.co/Mc9ebqjAqj
3,4,0,@mickymantell He is exactly that sort of perso...
4,5,1,So much #sarcasm at work mate 10/10 #boring 10...


In [None]:
x_train = list(df_train['tweet'])
y_train = list(df_train['irony_label'])

x_test = list(df_test['tweet'])
y_test = list(df_test['irony_label'])

###Normalisation of input

Normalise:
+ hashtags
+ tagged users
+ emoji (remove)
+ urls 


In [None]:
import emojis
from nltk.tokenize import TweetTokenizer
import re
import contractions
import numpy as np


def normalise_tweet(tweet):
    norm_tweet = re.sub("&", "and", tweet)
    norm_tweet = re.sub(r"[<>]", "", norm_tweet)
    norm_tweet = re.sub("http:.*", "url", norm_tweet)
    norm_tweet = re.sub("@", " @", norm_tweet)
    norm_tweet = re.sub("#", " ", norm_tweet)

    norm_tweet = emojis.decode(norm_tweet)
    # Remove emojis
    norm_tweet = re.sub(": ?[a-z][a-z]+.*[a-z]+ ?:", "", norm_tweet)
    
    norm_tweet = re.sub(r"[-()/_;:{}=~|,\[\]]", " ", norm_tweet)

    norm_tweet = contractions.fix(norm_tweet)

    tokenizer = TweetTokenizer()
    final_tweet = ''

    for token in tokenizer.tokenize(norm_tweet):
        if token.startswith("@"):
            token = "tagged_user"
        if token.isnumeric():
            token = "digit"

        final_tweet += token + " "
        
    return final_tweet.strip()

In [None]:
x_train_norm = []
for tweet in x_train:
    x_train_norm.append(normalise_tweet(tweet))

x_test_norm = []
for tweet in x_test:
    x_test_norm.append(normalise_tweet(tweet))

In [None]:
x_train_norm[10:20]

['Oh thank GOD our entire office email system is down ... the day of a big event . Santa you know JUST what to get me for xmas .',
 'But instead I am scrolling through Facebook Instagram and Twitter for hours on end accomplishing nothing .',
 'tagged_user no he bloody is not I was upstairs getting changed !',
 "Cold or warmth both suffuse one's cheeks with pink colour tone ... Do you understand the underlying difference and its texture ?",
 'Just great when you are mobile bill arrives by text',
 'crushes are great until you realize they will never be interested in you . p',
 'Buffalo sports media is smarter than all of us . Where else can you get the quality insight offered by Harrington and Busgaglia .',
 'I guess my cat also lost digit pounds when she went to the vet after I have been feeding her a few times a day . Eating food WorkingOut',
 'tagged_user tagged_user Rosenthal trading a SP for a defense only SS ? Brilliant trade .',
 'But tagged_user was trying to find us and my batte

###Load pre-trained emotion classifier (EC_RoBERTa_nonorm) - ISEAR

In [None]:
import ktrain
from ktrain import text

emotion_predictor = ktrain.load_predictor('/content/drive/MyDrive/TeamLab/my_models/EC_RoBERTa_nonorm')

In [None]:
emotion_predictor

<ktrain.text.predictor.TextPredictor at 0x7f70d0121d90>

In [None]:
# Make predictions and store them in list
y_pred_emotion_train = emotion_predictor.predict(x_train_norm)

In [None]:
print(type(y_pred_emotion_train))
print(len(y_pred_emotion_train))
print(len(y_train))

<class 'list'>
3834
3834


In [None]:
y_pred_emotion_train[:5]

['joy', 'anger', 'joy', 'fear', 'shame']

In [None]:
# possible labels (ISEAR)
emotion_labels = set(y_pred_emotion_train)

In [None]:
# list which contains the emotion predictions only for ironic tweets
emotions_of_ironic_tweets = []
for i in range(len(y_train)):
    if y_train[i] == 1:
        emotions_of_ironic_tweets.append(y_pred_emotion_train[i])

###Load pre-trained emotion classifier (EC_RoBERTa_TweetEval) - TweetEval

In [None]:
import ktrain
from ktrain import text

emotion_predictor_TE = ktrain.load_predictor('/content/drive/MyDrive/TeamLab/my_models/EC_RoBERTa_TweetEval')

In [None]:
emotion_predictor_TE

<ktrain.text.predictor.TextPredictor at 0x7f70cff79990>

In [None]:
# Make predictions and store them in list
y_pred_emotion_train_TE = emotion_predictor_TE.predict(x_train_norm)

In [None]:
print(type(y_pred_emotion_train_TE))
print(len(y_pred_emotion_train_TE))
print(len(y_train))

<class 'list'>
3834
3834


In [None]:
y_pred_emotion_train_TE[:5]

['joy', 'anger', 'joy', 'sadness', 'joy']

In [None]:
# possible labels (TweetEval)
emotion_labels_TE = set(y_pred_emotion_train_TE)

In [None]:
# list which contains the emotion predictions only for ironic tweets
emotions_of_ironic_tweets_TE = []
for i in range(len(y_train)):
    if y_train[i] == 1:
        emotions_of_ironic_tweets_TE.append(y_pred_emotion_train_TE[i])

###Results

In [None]:
irony_emo_dict = {}
for i in emotions_of_ironic_tweets:
    if i not in irony_emo_dict.keys():
        irony_emo_dict[i] = 1
    else:
        irony_emo_dict[i] += 1

irony_emo_dict

{'anger': 459,
 'disgust': 173,
 'fear': 225,
 'guilt': 92,
 'joy': 564,
 'sadness': 125,
 'shame': 273}

In [None]:
irony_emo_dict_TE = {}
for i in emotions_of_ironic_tweets_TE:
    if i not in irony_emo_dict_TE.keys():
        irony_emo_dict_TE[i] = 1
    else:
        irony_emo_dict_TE[i] += 1

irony_emo_dict_TE

{'anger': 421, 'joy': 1001, 'optimism': 275, 'sadness': 214}

In [None]:
# calculate percentages
tot_irony_tweets = y_train.count(1)

for label in emotion_labels:
    num_label = irony_emo_dict[label]
    p = round((num_label / tot_irony_tweets * 100), 2)
    print("irony +", label,":", p, "%")

irony + joy : 29.51 %
irony + sadness : 6.54 %
irony + fear : 11.77 %
irony + disgust : 9.05 %
irony + shame : 14.29 %
irony + guilt : 4.81 %
irony + anger : 24.02 %


In [None]:
for label in emotion_labels_TE:
    num_label = irony_emo_dict_TE[label]
    p = round((num_label / tot_irony_tweets * 100), 2)
    print("irony +", label, ":", p, "%")

irony + optimism : 14.39 %
irony + joy : 52.38 %
irony + anger : 22.03 %
irony + sadness : 11.2 %


irony appear often with joy and anger. This seemingly contradicting patterns may be due to the fact that our definition of irony does not distinguish between irony and sarcasm. Sarcasm is thought to be more direct, more crude and to carry the intent to cause verbal harm, while irony is a more innocent form of humour.