## Sentiment Analysis Score

Predict the sentiment analysis label, using a deep learning model for each query/emotion inside the relations file

In [2]:
# Add project path to the PYTHONPATH

import os
import sys
from pathlib import Path

sys.path.append(Path(os.path.join(os.path.abspath(''), '../')).resolve().as_posix())

## Load Tokenizer

Import and load the tokenizer from a `.pickle` file

In [3]:
import pickle
from pathlib import Path

In [4]:
tokenizer_file = Path('../datasets/sentiment140/tokenizer.pickle').resolve()
with tokenizer_file.open('rb') as file:
    tokenizer = pickle.load(file)

## Load Model

Load the sentiment analysis model, using the saved weights

In [5]:
from tensorflow.keras.layers import Input, Embedding, GRU
from tensorflow.keras.layers import Dropout, GlobalMaxPooling1D
from tensorflow.keras.layers import Bidirectional, Dense
from tensorflow.keras.models import Sequential

In [6]:
input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1)
embedding_dim = 200
input_length = 100
gru_units = 128
gru_dropout = 0.1
recurrent_dropout = 0.1
dropout = 0.1

In [7]:
model = Sequential()
model.add(Embedding(
    input_dim=input_dim,
    output_dim=embedding_dim,
    input_shape=(input_length,)
))

model.add(Bidirectional(GRU(
    gru_units,
    return_sequences=True,
    dropout=gru_dropout,
    recurrent_dropout=recurrent_dropout
)))
model.add(GlobalMaxPooling1D())
model.add(Dense(32, activation='relu'))
model.add(Dropout(dropout))

model.add(Dense(1, activation='sigmoid'))

In [8]:
weights_path = Path('../models/sentiment_analysis/gru_model.h5').resolve()
model.load_weights(weights_path.as_posix())

## Load Query Relations

Load the relations between queries and emotions from a `.json` file

In [9]:
import json

In [10]:
relations_path = Path('../query_relations.json')
with relations_path.open('r') as file:
    relations = json.load(file)

## Predict polarity

Predict the polarity of the texts, using the sentiment analysis model

In [14]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nlp import preprocess
from tqdm import tqdm
import pandas as pd
import numpy as np
import re

In [15]:
dataset_dir = Path('../datasets/tweepy').resolve()

In [17]:
data_dict = {}

query_dict = {
    'query': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': [],
    'emotion': []
}

dir_files = os.listdir(dataset_dir)

with tqdm(total=len(dir_files)) as t:
    for filename in dir_files:
        dataset = pd.read_csv(os.path.join(dataset_dir, filename))
        cleaned_texts = preprocess(dataset.text, quiet=True)

        query = re.findall(r'(#[^.]+|:.+:)', filename)[0]

        predict_sequences = [text.split() for text in cleaned_texts]
        list_tokenized_predict = tokenizer.texts_to_sequences(predict_sequences)
        x_predict = pad_sequences(list_tokenized_predict, maxlen=100)

        result = model.predict(x_predict)
        
        emotion = relations[query]
        query_dict['query'].append(query)
        query_dict['mean'].append(np.mean(result))
        query_dict['max'].append(np.amax(result))
        query_dict['min'].append(np.amin(result))
        query_dict['count'].append(len(dataset))
        query_dict['std'].append(np.std(result))
        query_dict['emotion'].append(emotion)

        if emotion in data_dict:
            data_dict[emotion] = np.concatenate([data_dict[emotion], result])
        else:
            data_dict[emotion] = result
        
        t.update()

100%|██████████| 5/5 [01:05<00:00, 13.03s/it]


## Print Results

Print the queries/emotions and the values

In [18]:
df = pd.DataFrame(data=query_dict)
for emotion in df.emotion.unique():
    display(df[df.emotion == emotion])

Unnamed: 0,query,mean,max,min,std,count,emotion
0,#angry,0.324863,0.960293,0.01123,0.243057,455,anger


Unnamed: 0,query,mean,max,min,std,count,emotion
1,#excited,0.91503,0.998822,0.093613,0.120952,1926,joy
3,#joy,0.840753,0.998868,0.013331,0.189263,7673,joy


Unnamed: 0,query,mean,max,min,std,count,emotion
2,#fear,0.481924,0.988985,0.024805,0.205533,2041,fear


Unnamed: 0,query,mean,max,min,std,count,emotion
4,#sad,0.084684,0.946671,0.001595,0.1204,7087,sadness


In [19]:
emotion_dict = {
    'emotion': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': []
}

for emotion, result in data_dict.items():
    emotion_dict['emotion'].append(emotion)
    emotion_dict['mean'].append(np.mean(result))
    emotion_dict['max'].append(np.amax(result))
    emotion_dict['min'].append(np.amin(result))
    emotion_dict['std'].append(np.std(result))
    emotion_dict['count'].append(len(result))
    
emotion_df = pd.DataFrame(data=emotion_dict)
display(emotion_df)

Unnamed: 0,emotion,mean,max,min,std,count
0,anger,0.324863,0.960293,0.01123,0.243057,455
1,joy,0.855656,0.998868,0.013331,0.180149,9599
2,fear,0.481924,0.988985,0.024805,0.205533,2041
3,sadness,0.084684,0.946671,0.001595,0.1204,7087
