## Predict Deep Learning Model

Predict the sentiment analysis label, using a deep learning model

In [1]:
# Add project path to the PYTHONPATH

import os
import sys
from pathlib import Path

sys.path.append(Path(os.path.join(os.path.abspath(''), '../')).resolve().as_posix())

In [2]:
import pickle
import pandas as pd
from pathlib import Path

In [3]:
model_weights_path = Path('../models/sentiment_analysis/model_weights.h5').resolve()
dataset_dir = Path('../datasets/test').resolve()
tokenizer_file = Path('../datasets/sentiment140/tokenizer.pickle').resolve()

In [4]:
with tokenizer_file.open('rb') as file:
    tokenizer = pickle.load(file)

In [5]:
from sentiment_analysis.models.gru_model import gru_model

In [6]:
input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1)
model = gru_model(100, input_dim, embedding_dim=200)
model.load_weights(model_weights_path.as_posix())

W0717 10:00:44.078151 4688356800 deprecation.py:506] From /Users/rmohashi/miniconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/keras/initializers.py:119: calling RandomUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0717 10:00:44.106781 4688356800 deprecation.py:506] From /Users/rmohashi/miniconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0717 10:00:44.113732 4688356800 deprecation.py:506] From /Users/rmohashi/miniconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/ops/init_op

In [7]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nlp.utils import preprocess
from tqdm import tqdm
import numpy as np
import re
import json

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/rmohashi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [8]:
relations_path = Path('../sentiment_analysis/query_relations.json')
with relations_path.open('r') as file:
    relations = json.load(file)

In [9]:
data_dict = {}

query_dict = {
    'query': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': [],
    'emotion': []
}

dir_files = os.listdir(dataset_dir)

with tqdm(total=len(dir_files)) as t:
    for filename in dir_files:
        dataset = pd.read_csv(os.path.join(dataset_dir, filename))
        cleaned_texts = preprocess(dataset.text, quiet=True)

        query = re.findall(r'(#[^.]+|:.+:)', filename)[0]

        predict_sequences = [text.split() for text in cleaned_texts]
        list_tokenized_predict = tokenizer.texts_to_sequences(predict_sequences)
        x_predict = pad_sequences(list_tokenized_predict, maxlen=100)

        result = model.predict(x_predict)
        
        emotion = relations[query]
        query_dict['query'].append(query)
        query_dict['mean'].append(np.mean(result))
        query_dict['max'].append(np.amax(result))
        query_dict['min'].append(np.amin(result))
        query_dict['count'].append(len(dataset))
        query_dict['std'].append(np.std(result))
        query_dict['emotion'].append(emotion)

        if emotion in data_dict:
            data_dict[emotion] = np.concatenate([data_dict[emotion], result])
        else:
            data_dict[emotion] = result
        
        t.update()

100%|██████████| 8/8 [00:05<00:00,  1.57it/s]


In [10]:
df = pd.DataFrame(data=query_dict)
for emotion in df.emotion.unique():
    display(df[df.emotion == emotion])

Unnamed: 0,query,mean,max,min,std,count,emotion
0,:anxious_face_with_sweat:,0.428562,0.983642,0.004371,0.274272,199,fear
6,#worried,0.205504,0.879476,0.004883,0.210547,196,fear


Unnamed: 0,query,mean,max,min,std,count,emotion
1,#sad,0.073413,0.873629,0.002289,0.127914,200,sadness
2,:crying_face:,0.438269,0.996975,0.005851,0.296389,197,sadness


Unnamed: 0,query,mean,max,min,std,count,emotion
3,:red_heart:,0.770384,0.996633,0.042774,0.225747,200,joy
7,#joy,0.832007,0.997057,0.208914,0.152068,191,joy


Unnamed: 0,query,mean,max,min,std,count,emotion
4,:face_with_symbols_on_mouth:,0.40321,0.997371,0.010545,0.261377,194,angry
5,#pissed,0.230712,0.912333,0.008014,0.180684,200,angry


In [11]:
emotion_dict = {
    'emotion': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': []
}

for emotion, result in data_dict.items():
    emotion_dict['emotion'].append(emotion)
    emotion_dict['mean'].append(np.mean(result))
    emotion_dict['max'].append(np.amax(result))
    emotion_dict['min'].append(np.amin(result))
    emotion_dict['std'].append(np.std(result))
    emotion_dict['count'].append(len(result))
    
emotion_df = pd.DataFrame(data=emotion_dict)
display(emotion_df)

Unnamed: 0,emotion,mean,max,min,std,count
0,fear,0.31788,0.983642,0.004371,0.268948,395
1,sadness,0.254463,0.996975,0.002289,0.29174,397
2,joy,0.800486,0.997057,0.042774,0.195736,391
3,angry,0.315648,0.997371,0.008014,0.2401,394
