## Predict Deep Learning Model

Predict the sentiment analysis label, using a deep learning model

In [1]:
# Add project path to the PYTHONPATH

import os
import sys
from pathlib import Path

sys.path.append(Path(os.path.join(os.path.abspath(''), '../')).resolve().as_posix())

In [2]:
import pickle
import pandas as pd
from pathlib import Path

In [3]:
path_str = '/home/rmohashi/hd/tweet-emotion-recognition/models/sentiment_analysis/checkpoints/LSTM_100_30_GloVe/lstm_100_30_03-0.8184.h5'
model_weights_path = Path(path_str).resolve()
dataset_dir = Path('../datasets/twitter-scraper').resolve()
tokenizer_file = Path('/home/rmohashi/hd/sentiment140/tokenizer.pickle').resolve()

In [4]:
with tokenizer_file.open('rb') as file:
    tokenizer = pickle.load(file)

In [5]:
from sentiment_analysis.models.lstm_model import lstm_model

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [6]:
input_dim = len(tokenizer.word_index) + 1
model = lstm_model(30, input_dim, None, embedding_dim=100)
model.load_weights(model_weights_path.as_posix())

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [7]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nlp.utils import preprocess
from tqdm import tqdm
import numpy as np
import re
import json

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/rmohashi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [8]:
relations_path = Path('../query_relations.json')
with relations_path.open('r') as file:
    relations = json.load(file)

In [9]:
data_dict = {}

query_dict = {
    'query': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': [],
    'emotion': []
}

dir_files = os.listdir(dataset_dir)

with tqdm(total=len(dir_files)) as t:
    for filename in dir_files:
        dataset = pd.read_csv(os.path.join(dataset_dir, filename))
        cleaned_texts = preprocess(dataset.text, quiet=True)

        query = re.findall(r'(#[^.]+|:.+:)', filename)[0]

        predict_sequences = [text.split() for text in cleaned_texts]
        list_tokenized_predict = tokenizer.texts_to_sequences(predict_sequences)
        x_predict = pad_sequences(list_tokenized_predict, maxlen=30)

        result = model.predict(x_predict)
        
        emotion = relations[query]
        query_dict['query'].append(query)
        query_dict['mean'].append(np.mean(result))
        query_dict['max'].append(np.amax(result))
        query_dict['min'].append(np.amin(result))
        query_dict['count'].append(len(dataset))
        query_dict['std'].append(np.std(result))
        query_dict['emotion'].append(emotion)

        if emotion in data_dict:
            data_dict[emotion] = np.concatenate([data_dict[emotion], result])
        else:
            data_dict[emotion] = result
        
        t.update()

100%|██████████| 24/24 [04:17<00:00, 10.65s/it]


In [10]:
df = pd.DataFrame(data=query_dict)
for emotion in df.emotion.unique():
    display(df[df.emotion == emotion])

Unnamed: 0,query,mean,max,min,std,count,emotion
0,:face_screaming_in_fear:,0.548185,0.998261,0.002083,0.291319,15284,fear
1,#worried,0.378944,0.993429,0.004501,0.287545,799,fear
5,:anxious_face_with_sweat:,0.433535,0.998288,0.001323,0.302745,16118,fear
16,:fearful_face:,0.489887,0.99725,0.001671,0.297598,17500,fear
22,#fear,0.546099,0.999367,0.003608,0.295192,8270,fear
23,:worried_face:,0.42703,0.997387,0.001582,0.304716,16170,fear


Unnamed: 0,query,mean,max,min,std,count,emotion
2,#depressed,0.258158,0.9973,0.001076,0.286234,2363,sadness
6,:loudly_crying_face:,0.486544,0.99914,0.001798,0.315879,16390,sadness
7,:crying_face:,0.385478,0.998847,0.001358,0.318509,16286,sadness
8,#sad,0.275516,0.999575,0.000771,0.302986,12887,sadness
11,:pensive_face:,0.413027,0.999302,0.001371,0.308612,18126,sadness
13,:broken_heart:,0.349612,0.997891,0.001377,0.306455,18825,sadness


Unnamed: 0,query,mean,max,min,std,count,emotion
3,:pouting_face:,0.424262,0.998282,0.001356,0.292892,17621,anger
9,:angry_face:,0.450032,0.998452,0.000986,0.297087,17331,anger
10,:face_with_steam_from_nose:,0.509272,0.998655,0.001377,0.306527,16333,anger
14,#pissed,0.298599,0.996117,0.002878,0.264178,1366,anger
17,:face_with_symbols_on_mouth:,0.388339,0.998416,0.001066,0.284264,19066,anger
20,#angry,0.379943,0.997759,0.003219,0.305013,1862,anger


Unnamed: 0,query,mean,max,min,std,count,emotion
4,#happiness,0.857517,0.999757,0.006958,0.207121,12870,joy
12,#excited,0.801466,0.999555,0.008384,0.232901,10323,joy
15,:beaming_face_with_smiling_eyes:,0.7113,0.999112,0.002184,0.269698,17752,joy
18,:smiling_face_with_smiling_eyes:,0.781852,0.999285,0.005255,0.252528,18846,joy
19,:face_with_tears_of_joy:,0.564255,0.998934,0.004052,0.285944,18314,joy
21,:grinning_face_with_smiling_eyes:,0.701784,0.999376,0.004433,0.275191,20482,joy


In [11]:
emotion_dict = {
    'emotion': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': []
}

for emotion, result in data_dict.items():
    emotion_dict['emotion'].append(emotion)
    emotion_dict['mean'].append(np.mean(result))
    emotion_dict['max'].append(np.amax(result))
    emotion_dict['min'].append(np.amin(result))
    emotion_dict['std'].append(np.std(result))
    emotion_dict['count'].append(len(result))
    
emotion_df = pd.DataFrame(data=emotion_dict)
display(emotion_df)

Unnamed: 0,emotion,mean,max,min,std,count
0,fear,0.48102,0.999367,0.001323,0.30306,74141
1,sadness,0.382682,0.999575,0.000771,0.317523,84877
2,anger,0.436439,0.998655,0.000986,0.298444,73579
3,joy,0.724023,0.999757,0.002184,0.275604,98587
