In [1]:
import pandas as pd
import numpy as np
import random
from tensorflow.keras import models, Sequential, layers
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras import callbacks
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('Emotion_final.csv')

In [3]:
df = df.rename(columns={'Text': 'text', 'Emotion': 'emotion'})

In [4]:
df.head()

Unnamed: 0,text,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [5]:
df.emotion.value_counts().count()

6

In [6]:
df.shape

(21459, 2)

In [7]:
X = df.text
df.emotion = pd.Categorical(df.emotion)

In [8]:
df['code'] = df['emotion'].cat.codes

In [9]:
y = to_categorical(df['code'].values)

In [10]:
df.head()

Unnamed: 0,text,emotion,code
0,i didnt feel humiliated,sadness,4
1,i can go from feeling so hopeless to so damned...,sadness,4
2,im grabbing a minute to post i feel greedy wrong,anger,0
3,i am ever feeling nostalgic about the fireplac...,love,3
4,i am feeling grouchy,anger,0


In [11]:
df.groupby(['emotion', 'code']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,text
emotion,code,Unnamed: 2_level_1
anger,0,2993.0
anger,1,
anger,2,
anger,3,
anger,4,
anger,5,
fear,0,
fear,1,2652.0
fear,2,
fear,3,


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [13]:
y_train

array([[0., 0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       ...,
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.]], dtype=float32)

In [14]:
df.head()

Unnamed: 0,text,emotion,code
0,i didnt feel humiliated,sadness,4
1,i can go from feeling so hopeless to so damned...,sadness,4
2,im grabbing a minute to post i feel greedy wrong,anger,0
3,i am ever feeling nostalgic about the fireplac...,love,3
4,i am feeling grouchy,anger,0


In [15]:
df.emotion.value_counts()

happy       7029
sadness     6265
anger       2993
fear        2652
love        1641
surprise     879
Name: emotion, dtype: int64

In [16]:
df.head()

Unnamed: 0,text,emotion,code
0,i didnt feel humiliated,sadness,4
1,i can go from feeling so hopeless to so damned...,sadness,4
2,im grabbing a minute to post i feel greedy wrong,anger,0
3,i am ever feeling nostalgic about the fireplac...,love,3
4,i am feeling grouchy,anger,0


In [17]:
### Let's tokenize the vocabulary 
tk = Tokenizer()
tk.fit_on_texts(X)
vocab_size = len(tk.word_index) + 1
print(f'There are {vocab_size} different words in your corpus')
X_train_token = tk.texts_to_sequences(X_train)
X_test_token = tk.texts_to_sequences(X_test)


### Pad your inputs
X_train_pad = pad_sequences(X_train_token, maxlen=66, dtype='float32', padding='post' )
X_test_pad = pad_sequences(X_test_token, maxlen=66, dtype='float32', padding='post')

There are 19259 different words in your corpus


In [18]:
X_train_pad.shape

(15021, 66)

In [19]:
X_test_pad.shape

(6438, 66)

In [20]:
vocab_size

19259

In [21]:
# model.add(layers.Masking(mark_value=-999))

In [22]:
import gensim.downloader as api
glove_gensim  = api.load('glove-wiki-gigaword-100') #100 dimension

In [23]:
# Vectorize
vector_size = 100
gensim_weight_matrix = np.zeros((vocab_size ,vector_size))
gensim_weight_matrix.shape
for word, index in tk.word_index.items():
    if index < vocab_size: # since index starts with zero 
        if word in glove_gensim.wv.vocab:
            gensim_weight_matrix[index] = glove_gensim[word]
        else:
            gensim_weight_matrix[index] = np.zeros(100)

  import sys


In [24]:
def create_model(EMBEDDING_DIM = 100): # this means the embedding layer will create  a vector in 100 dimension
  model_cnn = Sequential()
  model_cnn.add(layers.Embedding(input_dim = vocab_size,# the whole vocabulary size 
                            output_dim = EMBEDDING_DIM, # vector space dimension
                            input_length= X_train_pad.shape[1], # max_len of text sequence
                            weights = [gensim_weight_matrix],
                            trainable = False,
                            mask_zero=True))
  model_cnn.add(layers.Conv1D(10, kernel_size=3))
  model_cnn.add(layers.Flatten())
  #model_cnn.add(layers.Dense(60, activation="relu"))
  model_cnn.add(layers.Dense(30, activation="relu"))
  model_cnn.add(layers.Dense(6, activation="softmax"))
  model_cnn.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
  return model_cnn

In [25]:
model = create_model()

In [26]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 66, 100)           1925900   
_________________________________________________________________
conv1d (Conv1D)              (None, 64, 10)            3010      
_________________________________________________________________
flatten (Flatten)            (None, 640)               0         
_________________________________________________________________
dense (Dense)                (None, 30)                19230     
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 186       
Total params: 1,948,326
Trainable params: 22,426
Non-trainable params: 1,925,900
_________________________________________________________________


In [27]:
es = callbacks.EarlyStopping(patience=10, restore_best_weights=True)

In [28]:
history = model.fit(X_train_pad, y_train, batch_size=8, epochs=100, verbose=1, validation_split=0.3, callbacks=[es])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


In [29]:
model.evaluate(X_test_pad, y_test)



[1.2209725379943848, 0.5462876558303833]

In [30]:
def create_model2(EMBEDDING_DIM = 100): # this means the embedding layer will create  a vector in 100 dimension
  model_cnn = Sequential()
  model_cnn.add(layers.Embedding(input_dim = vocab_size,# the whole vocabulary size 
                            output_dim = EMBEDDING_DIM, # vector space dimension
                            input_length= X_train_pad.shape[1], # max_len of text sequence
                            weights = [gensim_weight_matrix],
                            trainable = False,
                            mask_zero=True))
  model_cnn.add(layers.Bidirectional(layers.LSTM(100,return_sequences=True)))
  model_cnn.add(layers.Bidirectional(layers.LSTM(100,return_sequences=True)))
  model_cnn.add(layers.Dropout(.2,))
  model_cnn.add(layers.Bidirectional(layers.LSTM(100,return_sequences=False)))
  #model_cnn.add(layers.Dense(60, activation="relu"))
  model_cnn.add(layers.Dense(30, activation="relu"))
  model_cnn.add(layers.Dense(6, activation="softmax"))
  model_cnn.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
  return model_cnn

In [31]:
model2 = create_model2()

In [32]:
es = callbacks.EarlyStopping(patience=3, restore_best_weights=True)

In [33]:
history2 = model2.fit(X_train_pad, y_train, batch_size=8, epochs=10, verbose=1, validation_split=0.3, callbacks=[es])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [34]:
model2.evaluate(X_test_pad, y_test)



[0.23301631212234497, 0.909599244594574]

In [35]:
def create_model3(EMBEDDING_DIM = 100): # this means the embedding layer will create  a vector in 100 dimension
  model_cnn = Sequential()
  model_cnn.add(layers.Embedding(input_dim = vocab_size,# the whole vocabulary size 
                            output_dim = EMBEDDING_DIM, # vector space dimension
                            input_length= X_train_pad.shape[1], # max_len of text sequence
                            weights = [gensim_weight_matrix],
                            trainable = False,
                            mask_zero=True))
  model_cnn.add(layers.Bidirectional(layers.LSTM(100,return_sequences=True)))
  model_cnn.add(layers.Dropout(.3,))
  model_cnn.add(layers.Bidirectional(layers.LSTM(100,return_sequences=True)))
  model_cnn.add(layers.Dropout(.3,))
  model_cnn.add(layers.Bidirectional(layers.LSTM(100,return_sequences=False)))
  #model_cnn.add(layers.Dense(60, activation="relu"))
  model_cnn.add(layers.Dense(30, activation="relu"))
  model_cnn.add(layers.Dropout(.3,))
  model_cnn.add(layers.Dense(6, activation="softmax"))
  model_cnn.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
  return model_cnn

In [36]:
model3 = create_model3()

In [37]:
es = callbacks.EarlyStopping(patience=3, restore_best_weights=True)

In [38]:
history3 = model3.fit(X_train_pad, y_train, batch_size=8, epochs=10, verbose=1, validation_split=0.3, callbacks=[es])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [39]:
model3.evaluate(X_test_pad, y_test)



[0.2446947693824768, 0.9103758931159973]

In [50]:
models.save_model(
    model, filepath='/Users/keirferguson/code/keir20/book-woofer/models'
)

INFO:tensorflow:Assets written to: /Users/keirferguson/code/keir20/book-woofer/models/assets


In [51]:
models.save_model(
    model2, filepath='/Users/keirferguson/code/keir20/book-woofer/models'
)



INFO:tensorflow:Assets written to: /Users/keirferguson/code/keir20/book-woofer/models/assets


INFO:tensorflow:Assets written to: /Users/keirferguson/code/keir20/book-woofer/models/assets


In [52]:
models.save_model(
    model3, filepath='/Users/keirferguson/code/keir20/book-woofer/models'
)



INFO:tensorflow:Assets written to: /Users/keirferguson/code/keir20/book-woofer/models/assets


INFO:tensorflow:Assets written to: /Users/keirferguson/code/keir20/book-woofer/models/assets


In [53]:
def get_key(value):
    dictionary={'anger':0,'fear':1,'happy':2,'love':3,'sadness':4,'surprise':5}
    for key,val in dictionary.items():
          if (val==value):
            return key

In [86]:
# def predict(sentence):
#     sentence_lst=[]
#     sentence_lst.append(sentence)
#     sentence_seq=tk.texts_to_sequences(sentence_lst)
#     sentence_padded=pad_sequences(sentence_seq,maxlen=300,padding='post')
#     ans=get_key(model.predict_classes(sentence_padded))
#     print("The emotion predicted is",ans)


def predict(sentence):
    sentence_lst=[]
    sentence_lst.append(sentence)
    sentence_seq=tk.texts_to_sequences(sentence_lst)
    sentence_padded=pad_sequences(sentence_seq,maxlen=300,padding='post')
    ans=model3.predict(sentence_padded)
    print("The emotion predicted is",ans)


In [87]:
predict(str(input('Enter a sentence : ')))

Enter a sentence : The grown-ups advised me to set drawings of open or closed boa constrictors aside, and to concentrate instead on geography, history, mathematics and grammar. So it was, at the age of six, that I abandoned a magnificent career as a painter. I’d been discouraged by the failure of my drawing number 1 and my drawing number 2. Grown-ups never understand anything on their own, and it’s tiring, for children, to be for ever and ever explaining…
The emotion predicted is [[0.5395469  0.12869728 0.2298384  0.00785758 0.04694536 0.04711451]]


In [63]:
# bi directional lstm
# dropout layers
# model.add(Bidirectional(LSTM(100,return_sequences=True)))

In [65]:
df.head(20)

Unnamed: 0,text,emotion,code
0,i didnt feel humiliated,sadness,4
1,i can go from feeling so hopeless to so damned...,sadness,4
2,im grabbing a minute to post i feel greedy wrong,anger,0
3,i am ever feeling nostalgic about the fireplac...,love,3
4,i am feeling grouchy,anger,0
5,ive been feeling a little burdened lately wasn...,sadness,4
6,ive been taking or milligrams or times recomme...,surprise,5
7,i feel as confused about life as a teenager or...,fear,1
8,i have been with petronas for years i feel tha...,happy,2
9,i feel romantic too,love,3
