## Predict Deep Learning Model

Predict the sentiment analysis label, using a deep learning model

In [1]:
# Add project path to the PYTHONPATH

import os
import sys
from pathlib import Path

sys.path.append(Path(os.path.join(os.path.abspath(''), '../')).resolve().as_posix())

In [2]:
import pickle
import pandas as pd
from pathlib import Path

In [3]:
model_weights_path = Path('../models/sentiment_analysis/model_weights.h5').resolve()
dataset_dir = Path('../datasets/tweepy').resolve()
tokenizer_file = Path('../datasets/sentiment140/tokenizer.pickle').resolve()

In [4]:
with tokenizer_file.open('rb') as file:
    tokenizer = pickle.load(file)

In [5]:
from sentiment_analysis.models.gru_model import gru_model

In [6]:
input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1)
model = gru_model(100, input_dim, embedding_dim=200)
model.load_weights(model_weights_path.as_posix())

W0716 17:43:21.062131 140452262287168 deprecation.py:506] From /home/rmohashi/anaconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/keras/initializers.py:119: calling RandomUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0716 17:43:21.076073 140452262287168 deprecation.py:506] From /home/rmohashi/anaconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0716 17:43:21.079942 140452262287168 deprecation.py:506] From /home/rmohashi/anaconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/op

In [7]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nlp.utils import preprocess
from tqdm import tqdm
import numpy as np
import re
import json

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/rmohashi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [8]:
relations_path = Path('../sentiment_analysis/query_relations.json')
with relations_path.open('r') as file:
    relations = json.load(file)

In [9]:
data_dict = {}

query_dict = {
    'query': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': [],
    'emotion': []
}

dir_files = os.listdir(dataset_dir)

with tqdm(total=len(dir_files)) as t:
    for filename in dir_files:
        dataset = pd.read_csv(os.path.join(dataset_dir, filename))
        cleaned_texts = preprocess(dataset.text, quiet=True)

        query = re.findall(r'(#[^.]+|:.+:)', filename)[0]

        predict_sequences = [text.split() for text in cleaned_texts]
        list_tokenized_predict = tokenizer.texts_to_sequences(predict_sequences)
        x_predict = pad_sequences(list_tokenized_predict, maxlen=100)

        result = model.predict(x_predict)
        
        emotion = relations[query]
        query_dict['query'].append(query)
        query_dict['mean'].append(np.mean(result))
        query_dict['max'].append(np.amax(result))
        query_dict['min'].append(np.amin(result))
        query_dict['count'].append(len(dataset))
        query_dict['std'].append(np.std(result))
        query_dict['emotion'].append(emotion)

        if emotion in data_dict:
            data_dict[emotion] = np.concatenate([data_dict[emotion], result])
        else:
            data_dict[emotion] = result
        
        t.update()

100%|██████████| 26/26 [01:52<00:00,  3.68s/it]


In [10]:
df = pd.DataFrame(data=query_dict)
for emotion in df.emotion.unique():
    display(df[df.emotion == emotion])

Unnamed: 0,query,mean,max,min,std,count,emotion
0,#depressed,0.104396,0.893191,0.001917,0.133383,687,sadness
2,:broken_heart:,0.40259,0.993368,0.001844,0.295478,1995,sadness
9,#sad,0.064449,0.964415,0.001021,0.098156,3938,sadness
18,:pensive_face:,0.423263,0.995262,0.001891,0.286538,1904,sadness
20,:crying_face:,0.3952,0.995921,0.002083,0.300229,3930,sadness
23,#depression,0.210459,0.992445,0.002672,0.210201,1989,sadness


Unnamed: 0,query,mean,max,min,std,count,emotion
1,#scary,0.374211,0.985347,0.004814,0.232148,3212,fear
6,#scared,0.290727,0.982479,0.010327,0.219454,953,fear
7,#fear,0.48011,0.994975,0.00945,0.226392,3584,fear
17,#anxious,0.442346,0.994788,0.001821,0.297688,535,fear
21,:fearful_face:,0.475567,0.996809,0.003123,0.269067,3625,fear
22,:anxious_face_with_sweat:,0.44076,0.992872,0.001507,0.282484,3946,fear
24,#worried,0.203122,0.907871,0.004884,0.210313,292,fear


Unnamed: 0,query,mean,max,min,std,count,emotion
3,:angry_face:,0.439549,0.994253,0.005171,0.272184,2526,angry
5,#angry,0.323234,0.962728,0.004849,0.236378,924,angry
8,:face_with_steam_from_nose:,0.5089,0.995374,0.005851,0.284992,1903,angry
10,:pouting_face:,0.427416,0.990881,0.00599,0.272811,3833,angry
12,#mad,0.349306,0.980547,0.001863,0.25295,1445,angry
13,:face_with_symbols_on_mouth:,0.399186,0.991673,0.003462,0.266252,2461,angry
15,#pissed,0.240351,0.948839,0.008014,0.19219,720,angry
19,#furious,0.402804,0.913972,0.005914,0.266463,164,angry


Unnamed: 0,query,mean,max,min,std,count,emotion
4,#happiness,0.877895,0.998678,0.025082,0.162128,3990,joy
11,#joy,0.862353,0.998201,0.021556,0.166724,3905,joy
14,:red_heart:,0.755782,0.998661,0.006021,0.243652,1858,joy
16,#excited,0.896179,0.998899,0.052184,0.150301,3953,joy
25,:smiling_face_with_smiling_eyes:,0.771473,0.998685,0.011343,0.235738,1994,joy


In [11]:
emotion_dict = {
    'emotion': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': []
}

for emotion, result in data_dict.items():
    emotion_dict['emotion'].append(emotion)
    emotion_dict['mean'].append(np.mean(result))
    emotion_dict['max'].append(np.amax(result))
    emotion_dict['min'].append(np.amin(result))
    emotion_dict['std'].append(np.std(result))
    emotion_dict['count'].append(len(result))
    
emotion_df = pd.DataFrame(data=emotion_dict)
display(emotion_df)

Unnamed: 0,emotion,mean,max,min,std,count
0,sadness,0.270465,0.995921,0.001021,0.282968,14443
1,fear,0.43097,0.996809,0.001507,0.261112,16147
2,angry,0.410843,0.995374,0.001863,0.27266,13976
3,joy,0.850665,0.998899,0.006021,0.189662,15700
