## Predict Deep Learning Model

Predict the sentiment analysis label, using a deep learning model

In [1]:
# Add project path to the PYTHONPATH

import os
import sys
from pathlib import Path

sys.path.append(Path(os.path.join(os.path.abspath(''), '../')).resolve().as_posix())

In [2]:
import pickle
import pandas as pd
from pathlib import Path

In [11]:
path_str = '/home/rmohashi/hd/tweet-emotion-recognition/models/sentiment_analysis/checkpoints/CNN_100_30/cnn_100_30_02-0.8114.h5'
model_weights_path = Path(path_str).resolve()
dataset_dir = Path('../datasets/grouped').resolve()
tokenizer_file = Path('/home/rmohashi/hd/sentiment140/tokenizer.pickle').resolve()

In [12]:
with tokenizer_file.open('rb') as file:
    tokenizer = pickle.load(file)

In [13]:
from sentiment_analysis.models.cnn_model import cnn_model

In [14]:
input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1)
model = cnn_model(30, input_dim, None, embedding_dim=100)
model.load_weights(model_weights_path.as_posix())

In [15]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nlp.utils import preprocess
from tqdm import tqdm
import numpy as np
import re
import json

In [16]:
relations_path = Path('../query_relations.json')
with relations_path.open('r') as file:
    relations = json.load(file)

In [17]:
data_dict = {}

query_dict = {
    'query': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': [],
    'emotion': []
}

dir_files = os.listdir(dataset_dir)

with tqdm(total=len(dir_files)) as t:
    for filename in dir_files:
        dataset = pd.read_csv(os.path.join(dataset_dir, filename))
        cleaned_texts = preprocess(dataset.text, quiet=True)

        query = re.findall(r'(#[^.]+|:.+:)', filename)[0]

        predict_sequences = [text.split() for text in cleaned_texts]
        list_tokenized_predict = tokenizer.texts_to_sequences(predict_sequences)
        x_predict = pad_sequences(list_tokenized_predict, maxlen=30)

        result = model.predict(x_predict)
        
        emotion = relations[query]
        query_dict['query'].append(query)
        query_dict['mean'].append(np.mean(result))
        query_dict['max'].append(np.amax(result))
        query_dict['min'].append(np.amin(result))
        query_dict['count'].append(len(dataset))
        query_dict['std'].append(np.std(result))
        query_dict['emotion'].append(emotion)

        if emotion in data_dict:
            data_dict[emotion] = np.concatenate([data_dict[emotion], result])
        else:
            data_dict[emotion] = result
        
        t.update()

100%|██████████| 32/32 [01:18<00:00,  2.37s/it]


In [18]:
df = pd.DataFrame(data=query_dict)
for emotion in df.emotion.unique():
    display(df[df.emotion == emotion])

Unnamed: 0,query,mean,max,min,std,count,emotion
0,:crying_face:,0.389819,0.999176,0.000167,0.294897,18443,sadness
5,:pensive_face:,0.415428,0.998604,0.000692,0.28615,19825,sadness
12,#sad,0.294079,0.998038,0.002275,0.276795,13181,sadness
14,#depression,0.345986,0.991077,0.003763,0.269056,435,sadness
17,:loudly_crying_face:,0.486483,0.997916,0.000692,0.298629,14866,sadness
21,#depressed,0.258416,0.998454,0.001319,0.243969,2005,sadness
27,:broken_heart:,0.355373,0.997583,0.000295,0.280944,18035,sadness


Unnamed: 0,query,mean,max,min,std,count,emotion
1,:worried_face:,0.412333,0.999603,0.000692,0.273456,15213,fear
10,#fear,0.515382,0.992322,0.015237,0.249707,3459,fear
11,#scared,0.427396,0.992474,0.009433,0.266837,426,fear
16,#worried,0.368264,0.995939,0.007296,0.261117,706,fear
19,#anxious,0.420223,0.974009,0.005944,0.28285,160,fear
24,:anxious_face_with_sweat:,0.428341,0.998668,0.001314,0.274674,17588,fear
25,#scary,0.487588,0.996501,0.014662,0.265304,699,fear
28,:face_screaming_in_fear:,0.538126,0.998601,0.002501,0.268814,12491,fear
29,:fearful_face:,0.472405,0.997369,0.001314,0.269313,16647,fear


Unnamed: 0,query,mean,max,min,std,count,emotion
2,:pouting_face:,0.428285,0.998537,0.003287,0.265999,19650,anger
9,:angry_face:,0.443163,0.997605,0.00476,0.268354,17977,anger
13,#mad,0.418193,0.995599,0.013319,0.271962,490,anger
15,#furious,0.359501,0.937911,0.010752,0.246785,85,anger
18,:face_with_symbols_on_mouth:,0.396049,0.998876,0.001594,0.259824,19658,anger
20,:face_with_steam_from_nose:,0.500348,0.998836,0.000887,0.283691,17000,anger
22,#pissed,0.301479,0.987701,0.005511,0.227096,1779,anger
31,#angry,0.340126,0.989097,0.003783,0.24013,1477,anger


Unnamed: 0,query,mean,max,min,std,count,emotion
3,:grinning_face_with_smiling_eyes:,0.693964,0.99947,0.001511,0.258356,18609,joy
4,:red_heart:,0.772531,0.999091,0.027825,0.24749,1152,joy
6,#excited,0.784173,0.999571,0.011524,0.221325,6417,joy
7,#joy,0.769934,0.99888,0.048166,0.231173,491,joy
8,:beaming_face_with_smiling_eyes:,0.702905,0.999375,0.008177,0.253736,16918,joy
23,:smiling_face_with_smiling_eyes:,0.784164,0.999633,0.005399,0.235993,19530,joy
26,#happiness,0.788945,0.999444,0.017497,0.210058,2641,joy
30,:face_with_tears_of_joy:,0.564453,0.998985,0.001483,0.265891,17155,joy


In [19]:
emotion_dict = {
    'emotion': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': []
}

for emotion, result in data_dict.items():
    emotion_dict['emotion'].append(emotion)
    emotion_dict['mean'].append(np.mean(result))
    emotion_dict['max'].append(np.amax(result))
    emotion_dict['min'].append(np.amin(result))
    emotion_dict['std'].append(np.std(result))
    emotion_dict['count'].append(len(result))
    
emotion_df = pd.DataFrame(data=emotion_dict)
display(emotion_df)

Unnamed: 0,emotion,mean,max,min,std,count
0,sadness,0.387273,0.999176,0.000167,0.293301,86790
1,fear,0.460389,0.999603,0.000692,0.274546,67389
2,anger,0.434587,0.998876,0.000887,0.271277,78116
3,joy,0.701787,0.999633,0.001483,0.261929,82913
