In [1]:
import emoji

In [2]:
len(emoji.EMOJI_UNICODE)

2811

In [3]:
emoji_dictionary = {
    "0":"\u2764\uFE0F",
    "1":":baseball:",
    "2":":grinning_face_with_big_eyes:",
    "3":":disappointed_face:",
    "4":":fork_and_knife:",
}

In [4]:
for e in emoji_dictionary.values():
    print(emoji.emojize(e))

❤️
⚾
😃
😞
🍴


# Processing Dataset

In [5]:
import pandas as pd
import numpy as np

In [6]:
train = pd.read_csv('train_emoji.csv',header = None)
test = pd.read_csv('test_emoji.csv', header = None)

In [7]:
test.head()

Unnamed: 0,0,1
0,I want to eat\t,4
1,he did not answer\t,3
2,he got a raise\t,2
3,she got me a present\t,0
4,ha ha ha it was so funny\t,2


In [8]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [9]:
data = train.values
print(data.shape)

(132, 4)


In [10]:
X_train = train[0]
Y_train = train[1]

X_test = test[0]
Y_test = test[1]

In [11]:
for i in range(5):
    print(X_train[i],emoji.emojize(emoji_dictionary[str(Y_train[i])]))

never talk to me again 😞
I am proud of your achievements 😃
It is the worst day in my life 😞
Miss you so much ❤️
food is life 🍴


# converting sentences using Embeddings using Glove

In [12]:
f = open('Datasets/glove6b50dtxt/glove.6B.50d.txt',encoding='utf-8')

In [13]:
embeddings_index = {}
cnt=0
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:],dtype='float')
    embeddings_index[word] = coefs

In [14]:
f.close()

In [15]:
emb_dim = embeddings_index['eat'].shape[0]

In [16]:
print(emb_dim)

50


# converting sentence into vectors

In [17]:
def embedding_output_train(X):
    maxLen = 10
    embedding_out = np.zeros((X.shape[0],maxLen,emb_dim))
    
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split()
        for ij in range(len(X[ix])):
            try:
                embedding_out[ix][ij] = embeddings_index[X[ix][ij].lower()]
            except:
                embedding_out[ix][ij] = np.zeros((50,))
    return embedding_out
            

In [18]:
embeddings_matrix_train = embedding_output_train(X_train)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [19]:
embeddings_matrix_test = embedding_output_train(X_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [20]:
print(embeddings_matrix_test.shape,embeddings_matrix_train.shape)

(56, 10, 50) (132, 10, 50)


In [21]:
from keras.utils import to_categorical

Using TensorFlow backend.


In [22]:
Y_train = to_categorical(Y_train,num_classes=5)
Y_test = to_categorical(Y_test,num_classes=5)
print(Y_train.shape)
print(Y_train[0])

(132, 5)
[0. 0. 0. 1. 0.]


# Define RNN/LSTM MODEL

In [23]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential

In [50]:
model = Sequential()
model.add(LSTM(64,input_shape=(10,emb_dim)))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 64)                29440     
_________________________________________________________________
dropout_4 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 325       
_________________________________________________________________
activation_4 (Activation)    (None, 5)                 0         
Total params: 29,765
Trainable params: 29,765
Non-trainable params: 0
_________________________________________________________________


In [51]:
from keras.callbacks import EarlyStopping,ModelCheckpoint
checkpoint = ModelCheckpoint("best_model.h5",monitor='val_loss',verbose=True,save_best_only=True)
earlystop = EarlyStopping(monitor='val_acc',patience=10)
hist = model.fit(embeddings_matrix_train,Y_train,epochs=100,batch_size=64,shuffle=True,validation_split=0.2,callbacks=[earlystop,checkpoint])

Train on 105 samples, validate on 27 samples
Epoch 1/100
Epoch 00001: val_loss improved from inf to 1.60425, saving model to best_model.h5
Epoch 2/100
Epoch 00002: val_loss improved from 1.60425 to 1.59984, saving model to best_model.h5
Epoch 3/100
Epoch 00003: val_loss did not improve from 1.59984
Epoch 4/100
Epoch 00004: val_loss did not improve from 1.59984
Epoch 5/100
Epoch 00005: val_loss did not improve from 1.59984
Epoch 6/100




Epoch 00006: val_loss did not improve from 1.59984
Epoch 7/100
Epoch 00007: val_loss did not improve from 1.59984
Epoch 8/100
Epoch 00008: val_loss did not improve from 1.59984
Epoch 9/100
Epoch 00009: val_loss did not improve from 1.59984
Epoch 10/100
Epoch 00010: val_loss did not improve from 1.59984
Epoch 11/100
Epoch 00011: val_loss did not improve from 1.59984
Epoch 12/100
Epoch 00012: val_loss did not improve from 1.59984
Epoch 13/100
Epoch 00013: val_loss improved from 1.59984 to 1.59453, saving model to best_model.h5
Epoch 14/100
Epoch 00014: val_loss improved from 1.59453 to 1.55823, saving model to best_model.h5
Epoch 15/100
Epoch 00015: val_loss improved from 1.55823 to 1.51942, saving model to best_model.h5
Epoch 16/100
Epoch 00016: val_loss improved from 1.51942 to 1.48202, saving model to best_model.h5
Epoch 17/100
Epoch 00017: val_loss improved from 1.48202 to 1.44638, saving model to best_model.h5
Epoch 18/100
Epoch 00018: val_loss improved from 1.44638 to 1.40750, sav

Epoch 33/100
Epoch 00033: val_loss improved from 0.95087 to 0.93815, saving model to best_model.h5
Epoch 34/100
Epoch 00034: val_loss did not improve from 0.93815
Epoch 35/100
Epoch 00035: val_loss did not improve from 0.93815
Epoch 36/100
Epoch 00036: val_loss did not improve from 0.93815
Epoch 37/100
Epoch 00037: val_loss did not improve from 0.93815
Epoch 38/100
Epoch 00038: val_loss did not improve from 0.93815
Epoch 39/100
Epoch 00039: val_loss did not improve from 0.93815
Epoch 40/100
Epoch 00040: val_loss did not improve from 0.93815
Epoch 41/100
Epoch 00041: val_loss did not improve from 0.93815
Epoch 42/100
Epoch 00042: val_loss did not improve from 0.93815
Epoch 43/100
Epoch 00043: val_loss did not improve from 0.93815
Epoch 44/100
Epoch 00044: val_loss did not improve from 0.93815
Epoch 45/100
Epoch 00045: val_loss did not improve from 0.93815
Epoch 46/100
Epoch 00046: val_loss did not improve from 0.93815
Epoch 47/100
Epoch 00047: val_loss did not improve from 0.93815
Epoch

Epoch 62/100
Epoch 00062: val_loss did not improve from 0.93815
Epoch 63/100
Epoch 00063: val_loss did not improve from 0.93815
Epoch 64/100
Epoch 00064: val_loss did not improve from 0.93815
Epoch 65/100
Epoch 00065: val_loss did not improve from 0.93815
Epoch 66/100
Epoch 00066: val_loss did not improve from 0.93815
Epoch 67/100
Epoch 00067: val_loss did not improve from 0.93815
Epoch 68/100
Epoch 00068: val_loss did not improve from 0.93815
Epoch 69/100
Epoch 00069: val_loss did not improve from 0.93815
Epoch 70/100
Epoch 00070: val_loss did not improve from 0.93815
Epoch 71/100
Epoch 00071: val_loss did not improve from 0.93815
Epoch 72/100
Epoch 00072: val_loss did not improve from 0.93815
Epoch 73/100
Epoch 00073: val_loss did not improve from 0.93815
Epoch 74/100
Epoch 00074: val_loss did not improve from 0.93815
Epoch 75/100
Epoch 00075: val_loss did not improve from 0.93815
Epoch 76/100
Epoch 00076: val_loss did not improve from 0.93815
Epoch 77/100
Epoch 00077: val_loss did n

Epoch 91/100
Epoch 00091: val_loss did not improve from 0.93815
Epoch 92/100
Epoch 00092: val_loss did not improve from 0.93815
Epoch 93/100
Epoch 00093: val_loss did not improve from 0.93815
Epoch 94/100
Epoch 00094: val_loss did not improve from 0.93815
Epoch 95/100
Epoch 00095: val_loss did not improve from 0.93815
Epoch 96/100
Epoch 00096: val_loss did not improve from 0.93815
Epoch 97/100
Epoch 00097: val_loss did not improve from 0.93815
Epoch 98/100
Epoch 00098: val_loss did not improve from 0.93815
Epoch 99/100
Epoch 00099: val_loss did not improve from 0.93815
Epoch 100/100
Epoch 00100: val_loss did not improve from 0.93815


In [52]:
model.load_weights("best_model.h5")

In [53]:
pred = model.predict_classes(embeddings_matrix_test)

In [54]:
print(pred)

[4 3 3 0 2 2 3 2 4 2 1 2 0 3 1 3 2 2 3 2 0 3 4 0 3 3 3 0 4 2 0 1 0 2 3 3 2
 3 1 2 1 0 0 1 2 3 2 2 3 1 3 0 3 2 2 3]


In [55]:
model.evaluate(embeddings_matrix_test,Y_test)



[1.024472645350865, 0.58928573]

# Stacked LSTM 

In [72]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential

In [73]:
model = Sequential()
model.add(LSTM(64,input_shape=(10,emb_dim),return_sequences=True))
model.add(Dropout(0.6))
model.add(LSTM(64,input_shape=(10,emb_dim),return_sequences=False))
model.add(Dropout(0.6))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_13 (LSTM)               (None, 10, 64)            29440     
_________________________________________________________________
dropout_13 (Dropout)         (None, 10, 64)            0         
_________________________________________________________________
lstm_14 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dropout_14 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_9 (Dense)              (None, 5)                 325       
_________________________________________________________________
activation_9 (Activation)    (None, 5)                 0         
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
__________________________________________________

In [74]:
from keras.callbacks import EarlyStopping,ModelCheckpoint
checkpoint = ModelCheckpoint("best_model1.h5",monitor='val_loss',verbose=True,save_best_only=True)
earlystop = EarlyStopping(monitor='val_acc',patience=10)
hist = model.fit(embeddings_matrix_train,Y_train,epochs=150,batch_size=64,shuffle=True,validation_split=0.2)

Train on 105 samples, validate on 27 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150


Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/1

In [66]:
#model.load_weights("best_model1.h5")

In [75]:
pred = model.predict_classes(embeddings_matrix_test)

In [80]:
print(len(pred))

56


In [77]:
model.evaluate(embeddings_matrix_test,Y_test)



[2.590971793447222, 0.60714287]

In [79]:
for i in range(len(pred)):
    print(' '.join(X_test[i]))
    print(emoji.emojize(emoji_dictionary[str(np.argmax(Y_test[i]))]))
    print(emoji.emojize(emoji_dictionary[str(pred[i])]))

I want to eat
🍴
🍴
he did not answer
😞
😞
he got a raise
😃
😞
she got me a present
❤️
😞
ha ha ha it was so funny
😃
😃
he is a good friend
❤️
😃
I am upset
❤️
😞
We had such a lovely dinner tonight
❤️
😃
where is the food
🍴
🍴
Stop making this joke ha ha ha
😃
😃
where is the ball
⚾
⚾
work is hard
😞
😃
This girl is messing with me
😞
😞
are you serious ha ha
😃
😞
Let us go play baseball
⚾
⚾
This stupid grader is not working
😞
😞
work is horrible
😞
😃
Congratulation for having a baby
😃
😃
stop messing around
😞
😞
any suggestions for dinner
🍴
😃
I love taking breaks
❤️
😞
you brighten my day
😃
❤️
I boiled rice
🍴
🍴
she is a bully
😞
😃
Why are you feeling bad
😞
😞
I am upset
😞
😞
I worked during my birthday
😞
😃
My grandmother is the love of my life
❤️
❤️
enjoy your break
😃
⚾
valentine day is near
❤️
😃
I miss you so much
❤️
❤️
throw the ball
⚾
⚾
My life is so boring
😞
😞
she said yes
😃
😃
will you be my valentine
❤️
❤️
he can pitch really well
⚾
⚾
dance with me
😃
😃
I am starving
🍴
🍴
See you at the restaurant
🍴
😃
I l