<a href="https://colab.research.google.com/github/macabdul9/emoji-prediction-using-recurrent-neural-networks/blob/master/emoji_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import warnings
warnings.filterwarnings('ignore')

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [14]:
!unzip drive/My\ Drive/emoji

Archive:  drive/My Drive/emoji.zip
   creating: emoji/
  inflating: emoji/test_emoji.csv    
  inflating: emoji/train_emoji.csv   


In [0]:
# !pip install emoji

In [0]:
import emoji

In [0]:
emoji_dict = {
    0:":beating_heart:",
    1:":baseball:",
    2:":beaming_face_with_smiling_eyes:",
    3:":disappointed_face:",
    4:":fork_and_knife:"    
}

In [16]:
for each in emoji_dict.values():
  print(emoji.emojize(each))

💓
⚾
😁
😞
🍴


In [0]:
import pandas as pd
import numpy as np

In [0]:
test = pd.read_csv('emoji/test_emoji.csv', header=None)
train = pd.read_csv('emoji/train_emoji.csv', header=None)

In [128]:
test.head()

Unnamed: 0,0,1
0,I want to eat\t,4
1,he did not answer\t,3
2,he got a raise\t,2
3,she got me a present\t,0
4,ha ha ha it was so funny\t,2


In [129]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [0]:
train_sentences = train[0]
train_labels = train[1]
test_sentences = test[0]
test_labels = test[1]

In [131]:
for i in range(5):
  print(train_sentences[i])
  print(emoji.emojize(emoji_dict[train_labels[i]]))

never talk to me again
😞
I am proud of your achievements
😁
It is the worst day in my life
😞
Miss you so much
💓
food is life
🍴


### Converting sentences into embeddings

In [0]:
# !unzip /content/drive/My\ Drive/glove6b50dtxt

In [0]:
glove = open('glove.6B.50d.txt')

In [0]:
embedding_index = {}

for line in glove:
  values = line.split()
  word = values[0]
  vec = np.asarray(values[1:], dtype='float')
  embedding_index[word] = vec

glove.close()

In [0]:
emb_dim = embedding_index['eat'].shape[0]

## converting sentences into word embeddings

In [0]:
def embedding_output(x):
  max_len = 10
  embedding_out = np.zeros((x.shape[0], max_len, emb_dim))
  for i in range(x.shape[0]):
    x[i] = x[i].split()
    for j in range(len(x[i])):
      try:
        embedding_out[i][j] = embedding_index[x[i][j].lower()]
      except:
        embedding_out[i][j] = np.zeros((50, ))
        
  return embedding_out
  

In [0]:
x_train = embedding_output(train_sentences)
x_test = embedding_output(test_sentences)

In [139]:
x_test[0].shape

(10, 50)

In [140]:
print(x_train.shape)
print(x_test.shape)

(132, 10, 50)
(56, 10, 50)


### Creating LSTM Architecture

In [0]:
import keras
from keras.models import *
from keras.layers import *

In [0]:
y_train = keras.utils.np_utils.to_categorical(train_labels)
y_test = keras.utils.np_utils.to_categorical(test_labels)

In [0]:
model = Sequential()

In [0]:
model.add(LSTM(64, input_shape = (10, 50), return_sequences=True)) # return seq for stacked LSTM
model.add(Dropout(0.25))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(5, activation='softmax'))

In [0]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [169]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 10, 64)            29440     
_________________________________________________________________
dropout_2 (Dropout)          (None, 10, 64)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 325       
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
_________________________________________________________________


In [0]:
es =  keras.callbacks.EarlyStopping(monitor='val_acc', min_delta=0.05, patience=10)
cp = keras.callbacks.ModelCheckpoint('emoji_prediction_weights.h5',save_best_only=True, save_weights_only=True)

In [178]:
# time to train the model

hist = model.fit(x_train, y_train, batch_size=64, epochs=50, shuffle=True, validation_split=0.2, callbacks=[cp, es])

Train on 105 samples, validate on 27 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50


In [179]:
model.evaluate(x_test, y_test)



[1.9255683081490653, 0.5535714370863778]

In [0]:
pred = model.predict_classes(x_test)

In [181]:
pred[:5]

array([4, 3, 2, 2, 2])

In [0]:
model.load_weights('emoji_prediction_weights.h5')

In [183]:
model.evaluate(x_test, y_test)



[1.758664880480085, 0.44642857568604605]

In [187]:
for i in range(5):
  print(test_sentences[i])
  print(emoji.emojize(emoji_dict[pred[i]]))

['I', 'want', 'to', 'eat']
🍴
['he', 'did', 'not', 'answer']
😞
['he', 'got', 'a', 'raise']
😁
['she', 'got', 'me', 'a', 'present']
😁
['ha', 'ha', 'ha', 'it', 'was', 'so', 'funny']
😁
