In [8]:
import numpy as np
import pandas as pd 
import os
from nltk.corpus import stopwords
from textblob import Word
import re
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')
import emoji
from nltk.corpus import stopwords


In [9]:
data = pd.read_csv(os.path.join(os.getcwd(),"../static/dataset/emoji_dataset.csv"))
data

Unnamed: 0,Text,Emotion
0,im feeling groggy and horrid,sadness
1,i could feel the muscles in my arches ankles a...,joy
2,i feel like but im not very fond of that word 💙,love
3,i have to move stop staring at the other ladie...,joy
4,i have this kind of life so my girlfriend woul...,sadness
...,...,...
19995,im feeling ive resolved to live a life of love...,joy
19996,i used feel frustrated all the time,anger
19997,im starting to feel more sociable again i actu...,joy
19998,i am feeling devastated the inner voice within...,sadness


In [10]:
data = data[['Text','Emotion']]

In [11]:
from sklearn.model_selection import train_test_split
train, validate, test = np.split(data.sample(frac=1, random_state=42), [int(.8*len(data)), int(0.9*len(data))])

In [12]:
train.shape

(16000, 2)

In [13]:
def de_repeat(text):
    pattern = re.compile(r"(.)\1{2,}")
    return pattern.sub(r"\1\1", text)

In [14]:
train['Text'] = train['Text'].apply(lambda x: ' '.join([item for item in str(x).split() if item not in stopwords.words('english')]))
train['Text'] = train['Text'].apply(lambda x: " ".join(x.lower() for x in x.split()))
train['Text'] = train['Text'].apply(lambda x: emoji.demojize(x))
train['Text'] = train['Text'].str.replace('[^\w\s]',' ')
train['Text'] = train['Text'].apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()]))
train['Text'] = train['Text'].apply(lambda x: " ".join(de_repeat(x) for x in x.split()))
train


Unnamed: 0,Text,Emotion
10650,ive reading blog year feel like shes faithful ...,joy
2041,im feeling craving naughty sweet snack choose,love
8668,im hoping theyll like new draft better time wo...,sadness
1114,crappy week still feeling agitated like day wa...,fear
13902,easily feel quite pressured routine really not...,fear
...,...,...
7382,feel bit funny actually,surprise
13492,met great people feeling may unintentionally o...,anger
10394,feel must remain faithful,joy
16865,havent felt like real good feeling welcomed op...,joy


In [15]:
validate['Text'] = validate['Text'].apply(lambda x: ' '.join([item for item in str(x).split() if item not in stopwords.words('english')]))
validate['Text'] = validate['Text'].apply(lambda x: " ".join(x.lower() for x in x.split()))
validate['Text'] = validate['Text'].apply(lambda x: emoji.demojize(x))
validate['Text'] = validate['Text'].str.replace('[^\w\s]',' ')
validate['Text'] = validate['Text'].apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()]))
validate['Text'] = validate['Text'].apply(lambda x: " ".join(de_repeat(x) for x in x.split()))
validate



Unnamed: 0,Text,Emotion
1262,feel offended think justly,anger
19010,spent two week zombie mode two week feeling fe...,sadness
7212,love idea white blouse jumper feel jumper woul...,sadness
975,couldnt help feel infuriated left building,anger
2566,think noticing prone feel jealous right helpin...,anger
...,...,...
10900,angry feeling disillusioned,sadness
7758,feel like someone need invest money could gorg...,joy
4837,id let kill matter fact im feeling frightfully...,joy
6548,feel though people find quite pleasant smiling...,joy


In [16]:
test['Text'] = test['Text'].apply(lambda x: ' '.join([item for item in str(x).split() if item not in stopwords.words('english')]))
test['Text'] = test['Text'].apply(lambda x: " ".join(x.lower() for x in x.split()))
test['Text'] = test['Text'].apply(lambda x: emoji.demojize(x))
test['Text'] = test['Text'].str.replace('[^\w\s]',' ')
test['Text'] = test['Text'].apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()]))
test['Text'] = test['Text'].apply(lambda x: " ".join(de_repeat(x) for x in x.split()))
test



Unnamed: 0,Text,Emotion
3716,think many may dislike still feel impressed ed...,surprise
10837,feel smart though,joy
6140,feel desperately fond,love
9956,also able get appointment osteopath freaking a...,love
1549,feel taste dessert sweet suit many customer,love
...,...,...
11284,id gotten past whole oh gawd im humiliated did...,sadness
11964,look see stare feel also know sympathetic glan...,love
5390,sound desperate pathetic feel frantic need anx...,fear
860,worried feeling supposed church rich dr,joy


In [17]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

train_texts = train['Text']
tokenizer = Tokenizer(15212,lower=True,oov_token='UNK')
tokenizer.fit_on_texts(train_texts)

print('Found %d unique words.' % len(tokenizer.word_index))

# texts_to_sequences: Transforms each text in texts to a sequence of integers. 
# It basically takes each word in the text and replaces it with its corresponding integer value from the word_index dictionary.

train_texts_sequences = tokenizer.texts_to_sequences(train_texts)

# pad_sequences: Ensure that all sequences in a list have the same length. 
train_texts_pad_sequences = pad_sequences(train_texts_sequences, maxlen=80, padding='post') 

Found 13470 unique words.


In [18]:
from tensorflow.keras.utils import to_categorical
emotions = {'sadness': 0, 'joy': 1, 'surprise': 2, 'love': 3, 'anger': 4, 'fear': 5}

# Step 1: Replace all emotion values with integers
train['Emotion'] = train.Emotion.replace(emotions)
train_emotion_integers = train['Emotion'].values

# Step 2: Changing the integers to binary
train_emotion_categorical = to_categorical(train_emotion_integers)
train_emotion_categorical[:6] 

array([[0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0.]], dtype=float32)

In [19]:
validate_texts = validate['Text']
validate_emotion_integers = validate.Emotion.replace(emotions)
validate_texts_sequences = tokenizer.texts_to_sequences(validate_texts)
validate_texts_pad_sequences = pad_sequences(validate_texts_sequences, maxlen=80, padding='post')
validate_emotion_categorical = to_categorical(validate_emotion_integers.values)
validate_emotion_categorical[:6]

array([[0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0., 0.]], dtype=float32)

In [20]:
import tensorflow as tf
try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
  tf.config.experimental_connect_to_cluster(tpu)
  tf.tpu.experimental.initialize_tpu_system(tpu)
  print("All devices: ", tf.config.list_logical_devices('TPU'))
  tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError:
  tpu_strategy = tf.distribute.get_strategy() 

In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Bidirectional,Dense,Embedding,Dropout

# instantiating the model in the strategy scope creates the model on the TPU
with tpu_strategy.scope():
    model=Sequential()
    model.add(Embedding(15212,64,input_length=80))
    model.add(Dropout(0.6))
    model.add(Bidirectional(LSTM(80,return_sequences=True)))
    model.add(Bidirectional(LSTM(160)))
    model.add(Dense(len(emotions),activation='softmax'))
    print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 80, 64)            973568    
                                                                 
 dropout (Dropout)           (None, 80, 64)            0         
                                                                 
 bidirectional (Bidirectiona  (None, 80, 160)          92800     
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 320)              410880    
 nal)                                                            
                                                                 
 dense (Dense)               (None, 6)                 1926      
                                                                 
Total params: 1,479,174
Trainable params: 1,479,174
Non-

In [22]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [23]:
hist=model.fit(train_texts_pad_sequences, train_emotion_categorical, epochs=10, validation_data = (validate_texts_pad_sequences, validate_emotion_categorical))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [24]:
test_texts = test['Text']
test_emotion_integers = test.Emotion.replace(emotions)
test_texts_sequences = tokenizer.texts_to_sequences(test_texts)
test_texts_pad_sequences = pad_sequences(test_texts_sequences, maxlen=80, padding='post')
test_emotion_categorical = to_categorical(test_emotion_integers.values)
test_emotion_categorical[:7]

array([[0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.]], dtype=float32)

In [25]:
x = model.evaluate(test_texts_pad_sequences, test_emotion_categorical)
print(x)

[0.1847209930419922, 0.934499979019165]


In [26]:
model.save("../static/model/m2.hdf5")

In [27]:
from tensorflow import keras
model = keras.models.load_model("../static/model/m2.hdf5")

In [28]:
def preprocess_text(text):
    df = pd.DataFrame({'Text':[text]})
    df['Text'] = df['Text'].apply(lambda x: ' '.join([item for item in str(x).split() if item not in stopwords.words('english')]))
    df['Text'] = df['Text'].apply(lambda x: " ".join(x.lower() for x in x.split()))
    df['Text'] = df['Text'].apply(lambda x: emoji.demojize(x))
    df['Text'] = df['Text'].str.replace('[^\w\s]',' ')
    df['Text'] = df['Text'].apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()]))
    df['Text'] = df['Text'].apply(lambda x: " ".join(de_repeat(x) for x in x.split()))
    df_texts = df['Text']
    df_texts_sequences = tokenizer.texts_to_sequences(df_texts)
    df_texts_pad_sequences = pad_sequences(df_texts_sequences, maxlen=80, padding='post')
    return df_texts_pad_sequences


def mood_result(sentence):
    testing = preprocess_text(sentence)  
    result = model.predict(testing)[0]
    for key,val in emotions.items():
        print(key + ': ' + str(round(result[val]*100,2)) + ' %')
    val = np.argmax(result)
    emotion = [key for key, value in emotions.items() if value == val]
    return emotion[0]

In [37]:
sentence = "I am happy 💔 because he is angry 🎁 "
mood_result(sentence)

sadness: 2.08 %
joy: 1.34 %
surprise: 1.48 %
love: 0.9 %
anger: 90.67 %
fear: 3.53 %


'anger'

In [42]:
sentence = "A wedding can be a highly emotional event."
mood_result(sentence)

sadness: 99.96 %
joy: 0.01 %
surprise: 0.0 %
love: 0.0 %
anger: 0.01 %
fear: 0.02 %


'sadness'