# Predict Emotion

The main objective of this notebook is to predict emotions from tweets

In [1]:
# Add project path to the PYTHONPATH

import os
import sys
from pathlib import Path

sys.path.append(Path(os.path.join(os.path.abspath(''), '../')).resolve().as_posix())

In [2]:
import pickle

## Load Tokenizer

Load `.pickle` file with the tokenizer

In [3]:
tokenizer_path = Path('../datasets/sentiment_analysis/tokenizer.pickle').resolve()
with tokenizer_path.open('rb') as file:
    tokenizer = pickle.load(file)

## Load Model

Load the trained emotion recognition model

In [4]:
from emotion_recognition.models.lstm_conv_model import lstm_conv_model

In [5]:
model_weights_path = Path('../models/emotion_recognition/model_weights.h5').resolve()
input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1)
model = lstm_conv_model(100, input_dim, 4, embedding_dim=500)
model.load_weights(model_weights_path.as_posix())

W0717 13:53:39.933859 4511860160 deprecation.py:506] From /Users/rmohashi/miniconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/keras/initializers.py:119: calling RandomUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0717 13:53:39.995800 4511860160 deprecation.py:506] From /Users/rmohashi/miniconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0717 13:53:40.002933 4511860160 deprecation.py:506] From /Users/rmohashi/miniconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/ops/init_op

## Load data

Load the data that will have the labels predicted by the model

**data_path**: Path to the `.csv` file that will be used

In [6]:
import pandas as pd

In [7]:
data_path = Path('../datasets/predict/1151534845396430848-1150889591160881152_cyberpunk.csv').resolve()
data = pd.read_csv(data_path)
data.head()

Unnamed: 0,id,date,user,text
0,1151534845396430848,2019-07-17 16:51:16,AboveUp,@DumbCerb Apparently it originated from the Cy...
1,1151534844008173570,2019-07-17 16:51:15,patrickkingart,@mitch_murder And a bonus since it just came u...
2,1151534735992262664,2019-07-17 16:50:50,Saerzion,@CyberpunkGame Would any of these character de...
3,1151534338124800000,2019-07-17 16:49:15,DenisLaFunk_,Art by Pixkefir #cyberpunk #artwork #character...
4,1151534322639200256,2019-07-17 16:49:11,AriadneAtNaxos,@AtheneTrek Hello! I’m currently writing a fan...


## Load Encoder

Load `.pickle` file with the encoder

In [8]:
encoder_path = Path('../models/emotion_recognition/encoder.pickle').resolve()
with encoder_path.open('rb') as file:
    encoder = pickle.load(file)

## Preprocess data

Preprocess the data that will be used

In [9]:
from nlp.utils import preprocess
from tensorflow.keras.preprocessing.sequence import pad_sequences

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/rmohashi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [10]:
data['text'] = preprocess(data.text)
sequences = [text.split() for text in data.text]
list_tokenized = tokenizer.texts_to_sequences(sequences)
x_data = pad_sequences(list_tokenized, maxlen=100)

Time to clean up: 1.04 sec


## Results

Predict the labels and generate a confusion matrix

In [11]:
import numpy as np

In [12]:
y_pred = model.predict(x_data)

In [13]:
for index, value in enumerate(np.sum(y_pred, axis=0) / len(y_pred)):
    print(encoder.classes_[index] + ": " + str(value))

angry: 0.30098617
fear: 0.25841323
joy: 0.3491762
sadness: 0.09142412


In [14]:
y_pred_argmax = y_pred.argmax(axis=1)
data_len = len(y_pred_argmax)
for index, value in enumerate(np.unique(y_pred_argmax)):
    print(encoder.classes_[index] + ": " + str(len(y_pred_argmax[y_pred_argmax == value]) / data_len))

angry: 0.33281893004115226
fear: 0.2463991769547325
joy: 0.3868312757201646
sadness: 0.033950617283950615
