In [153]:
# Emoji Prediction using Transfer Learning & LSTM

In [154]:
import emoji
import numpy as np
import pandas as pd

In [155]:
#emoji.EMOJI_ALIAS_UNICODE

In [156]:
emoji_dictionary = {"0": "\u2764\uFE0F",    # :heart: prints a black instead of red heart depending on the font
                    "1": ":baseball:",
                    "2": ":grinning_face_with_smiling_eyes:",
                    "3": ":disappointed_face:",
                    "4": ":fork_and_knife:",
                    "5": ":hundred_points:",
                    "6": ":fire:",
                    "7": ":face_blowing_a_kiss:",
                    "8": ":chestnut:",
                    "9": ":flexed_biceps:",
                   }

In [157]:
emoji.emojize(":fork_and_knife:")

'🍴'

In [158]:
for e in emoji_dictionary.values():
    print(emoji.emojize(e))


❤️
⚾
😄
😞
🍴
💯
🔥
😘
🌰
💪


In [159]:
train = pd.read_csv('dataset/train_emoji.csv',header=None)
test = pd.read_csv('dataset/test_emoji.csv',header=None)
data = train.values
print(data.shape)

(132, 4)


In [160]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [161]:
X_train = train[0]
Y_train = train[1]
X_test = test[0]
Y_test = test[1]
for i in range(10):
    print(data[i][0],emoji.emojize(emoji_dictionary[str(data[i][1])]))

never talk to me again 😞
I am proud of your achievements 😄
It is the worst day in my life 😞
Miss you so much ❤️
food is life 🍴
I love you mum ❤️
Stop saying bullshit 😞
congratulations on your acceptance 😄
The assignment is too long  😞
I want to go play ⚾


In [162]:
# Converting sentences into Embeddings using Glove 6B.50d.txt

In [163]:
f = open('glove.6B.50d.txt',encoding='utf-8')

In [164]:
embeddings_index = {}
for line in f:
        values = line.split()
        word = values[0]
        coeffs = np.asarray(values[1:],dtype='float32')
        
        #print(word)
        #print(coeffs)
        embeddings_index[word] = coeffs
f.close()        
        

In [165]:
embeddings_index["eat"]
embeddings_index["eat"].shape

(50,)

In [166]:
emb_dim = embeddings_index["eat"].shape[0]

In [167]:
# Converting sentences into vectors (Embedding Layer Output)

In [168]:
def embedding_output(X):
    maxLen = 10
    embedding_out = np.zeros((X.shape[0],maxLen,emb_dim))
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split()
        for jx in range(len(X[ix])):
            # every word in the current(ix) sentence
            embedding_out[ix][jx] = embeddings_index[X[ix][jx].lower()]
            
    return embedding_out

In [169]:
embedding_matrix_train = embedding_output(X_train)
embedding_matrix_test = embedding_output(X_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[ix] = X[ix].split()


In [170]:
print(X_train[0])
print(len(X_train[0]))
print(embedding_matrix_train.shape)
print(embedding_matrix_test.shape)

['never', 'talk', 'to', 'me', 'again']
5
(132, 10, 50)
(56, 10, 50)


In [171]:
from keras.utils import to_categorical

In [172]:
Y_train = to_categorical(Y_train,num_classes=5)
Y_test = to_categorical(Y_test,num_classes=5)
print(Y_train.shape)
#print(Y_test[0])

(132, 5)


In [173]:
# Define the RNN/LSTM Model

In [174]:
from keras.layers import *
from keras.models import Sequential

In [180]:
model = Sequential()
model.add(LSTM(64,input_shape=(10,50),return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64,input_shape=(10,50),return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])
model.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_16 (LSTM)               (None, 10, 64)            29440     
_________________________________________________________________
dropout_16 (Dropout)         (None, 10, 64)            0         
_________________________________________________________________
lstm_17 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dropout_17 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 5)                 325       
_________________________________________________________________
activation_11 (Activation)   (None, 5)                 0         
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
_________________________________________________

In [181]:
from keras.callbacks import EarlyStopping 
from keras.callbacks import ModelCheckpoint
#checkpoint = ModelCheckpoint("best_model.h5",monitor = "val_loss",verbose= True,save_best_only = True)
#earlystop= EarlyStopping(monitor = "val_acc",patience=10)
hist = model.fit(embedding_matrix_train,Y_train,batch_size=64,epochs=150,shuffle=True,validation_split=0.2)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150


Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150


Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


In [182]:
pred = model.predict_classes(embedding_matrix_test)

In [183]:
model.evaluate(embedding_matrix_test,Y_test)



[2.3397772312164307, 0.5892857313156128]

In [184]:
for i in range(30):
    print(' '.join(X_test[i]))
    print(emoji.emojize(emoji_dictionary[str(np.argmax(Y_test[i]))]))
    print(emoji.emojize(emoji_dictionary[str(pred[i])]))

I want to eat
🍴
🍴
he did not answer
😞
😞
he got a raise
😄
😞
she got me a present
❤️
😄
ha ha ha it was so funny
😄
😄
he is a good friend
❤️
😄
I am upset
❤️
⚾
We had such a lovely dinner tonight
❤️
😄
where is the food
🍴
🍴
Stop making this joke ha ha ha
😄
😄
where is the ball
⚾
⚾
work is hard
😞
😄
This girl is messing with me
😞
❤️
are you serious ha ha
😄
❤️
Let us go play baseball
⚾
⚾
This stupid grader is not working
😞
😞
work is horrible
😞
😄
Congratulation for having a baby
😄
😄
stop messing around
😞
😞
any suggestions for dinner
🍴
😄
I love taking breaks
❤️
❤️
you brighten my day
😄
❤️
I boiled rice
🍴
🍴
she is a bully
😞
❤️
Why are you feeling bad
😞
😞
I am upset
😞
⚾
I worked during my birthday
😞
😄
My grandmother is the love of my life
❤️
❤️
enjoy your break
😄
🍴
valentine day is near
❤️
😄
