## Predict Deep Learning Model

Predict the sentiment analysis label, using a deep learning model

In [1]:
# Add project path to the PYTHONPATH

import os
import sys
from pathlib import Path

sys.path.append(Path(os.path.join(os.path.abspath(''), '../')).resolve().as_posix())

In [2]:
import pickle
import pandas as pd
from pathlib import Path

In [3]:
model_weights_path = Path('../models/model_weights.h5').resolve()
dataset_dir = Path('../datasets/tweepy').resolve()
tokenizer_file = Path('../datasets/sentiment140/tokenizer.pickle').resolve()

In [4]:
with tokenizer_file.open('rb') as file:
    tokenizer = pickle.load(file)

In [5]:
from sentiment_analysis.models.lstm_model import lstm_model

In [6]:
input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1)
model = lstm_model(100, input_dim, embedding_dim=200)
model.load_weights(model_weights_path.as_posix())

W0712 15:29:57.347749 4454184384 deprecation.py:506] From /Users/rmohashi/miniconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/keras/initializers.py:119: calling RandomUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0712 15:29:57.372267 4454184384 deprecation.py:506] From /Users/rmohashi/miniconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0712 15:29:57.380251 4454184384 deprecation.py:506] From /Users/rmohashi/miniconda3/envs/emodata/lib/python3.6/site-packages/tensorflow/python/ops/init_op

In [7]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nlp.utils import preprocess
from tqdm import tqdm
import numpy as np
import re
import json

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/rmohashi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [8]:
relations_path = Path('../sentiment_analysis/query_relations.json')
with relations_path.open('r') as file:
    relations = json.load(file)

In [9]:
data_dict = {
    'query': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': [],
    'emotion': []
}

dir_files = os.listdir(dataset_dir)

with tqdm(total=len(dir_files)) as t:
    for filename in dir_files:
        dataset = pd.read_csv(os.path.join(dataset_dir, filename))
        cleaned_texts = preprocess(dataset.text, quiet=True)

        query = re.findall(r'(#[^.]+|:.+:)', filename)[0]
        emotion = relations[query]

        predict_sequences = [text.split() for text in cleaned_texts]
        list_tokenized_predict = tokenizer.texts_to_sequences(predict_sequences)
        x_predict = pad_sequences(list_tokenized_predict, maxlen=100)

        result = model.predict(x_predict)

        data_dict['query'].append(query)
        data_dict['mean'].append(np.mean(result))
        data_dict['max'].append(np.amax(result))
        data_dict['min'].append(np.amin(result))
        data_dict['count'].append(len(dataset))
        data_dict['std'].append(np.std(result))
        
        data_dict['emotion'].append(emotion)
        
        t.update()

100%|██████████| 19/19 [02:43<00:00, 12.48s/it]


In [10]:
df = pd.DataFrame(data=data_dict)
for emotion in df.emotion.unique():
    display(df[df.emotion == emotion])

Unnamed: 0,query,mean,max,min,std,count,emotion
0,#afraid,0.41844,0.888443,0.007437,0.238047,174,fear
3,#anxious,0.450127,0.967831,0.00996,0.27505,392,fear
4,#scary,0.464922,0.967448,0.005703,0.23102,2230,fear
9,#scared,0.184941,0.837783,0.003517,0.164457,609,fear
11,#fear,0.469348,0.983742,0.013291,0.232901,2941,fear
13,:fearful_face:,0.462186,0.994094,0.005427,0.257716,3841,fear
14,#worried,0.162668,0.810234,0.003319,0.161735,175,fear
16,:anxious_face_with_sweat:,0.421906,0.985918,0.004652,0.26765,4872,fear


Unnamed: 0,query,mean,max,min,std,count,emotion
1,:pensive_face:,0.428619,0.98484,0.002427,0.284709,4692,sadness
8,#sad,0.061617,0.877649,0.001285,0.095459,10052,sadness
15,:crying_face:,0.393674,0.989281,0.002653,0.289744,3818,sadness
18,#depressed,0.084354,0.806006,0.003379,0.111074,676,sadness


Unnamed: 0,query,mean,max,min,std,count,emotion
2,#mad,0.365434,0.963891,0.00416,0.249319,1202,angry
6,#angry,0.246035,0.936236,0.004232,0.225551,702,angry
10,#furious,0.437734,0.962447,0.023994,0.280857,108,angry
12,#pissed,0.166272,0.953744,0.011033,0.15316,440,angry
17,:pouting_face:,0.423584,0.987128,0.003748,0.258317,14350,angry


Unnamed: 0,query,mean,max,min,std,count,emotion
5,#joy,0.806676,0.996234,0.040174,0.188624,5991,joy
7,#excited,0.88818,0.995495,0.053203,0.137351,3995,joy
