In [1]:
import emoji
import pandas as pd
import numpy as np
from keras.layers import *
from keras.models import Sequential
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
emoji.EMOJI_UNICODE

{':1st_place_medal:': '🥇',
 ':2nd_place_medal:': '🥈',
 ':3rd_place_medal:': '🥉',
 ':AB_button_(blood_type):': '🆎',
 ':ATM_sign:': '🏧',
 ':A_button_(blood_type):': '🅰',
 ':Afghanistan:': '🇦🇫',
 ':Albania:': '🇦🇱',
 ':Algeria:': '🇩🇿',
 ':American_Samoa:': '🇦🇸',
 ':Andorra:': '🇦🇩',
 ':Angola:': '🇦🇴',
 ':Anguilla:': '🇦🇮',
 ':Antarctica:': '🇦🇶',
 ':Antigua_&_Barbuda:': '🇦🇬',
 ':Aquarius:': '♒',
 ':Argentina:': '🇦🇷',
 ':Aries:': '♈',
 ':Armenia:': '🇦🇲',
 ':Aruba:': '🇦🇼',
 ':Ascension_Island:': '🇦🇨',
 ':Australia:': '🇦🇺',
 ':Austria:': '🇦🇹',
 ':Azerbaijan:': '🇦🇿',
 ':BACK_arrow:': '🔙',
 ':B_button_(blood_type):': '🅱',
 ':Bahamas:': '🇧🇸',
 ':Bahrain:': '🇧🇭',
 ':Bangladesh:': '🇧🇩',
 ':Barbados:': '🇧🇧',
 ':Belarus:': '🇧🇾',
 ':Belgium:': '🇧🇪',
 ':Belize:': '🇧🇿',
 ':Benin:': '🇧🇯',
 ':Bermuda:': '🇧🇲',
 ':Bhutan:': '🇧🇹',
 ':Bolivia:': '🇧🇴',
 ':Bosnia_&_Herzegovina:': '🇧🇦',
 ':Botswana:': '🇧🇼',
 ':Bouvet_Island:': '🇧🇻',
 ':Brazil:': '🇧🇷',
 ':British_Indian_Ocean_Territory:': '🇮🇴',
 ':British_Virgin_Is

In [3]:
emoji_dict = {"0":"\u2764\uFE0F",
             "1":":baseball:",
             "2":":grinning_face_with_big_eyes:",
             "3":":disappointed_face:",
             "4":":fork_and_knife:"}

In [4]:
for e in emoji_dict.values():
    print(emoji.emojize(e))

❤️
⚾
😃
😞
🍴


## Preprocessing data

In [5]:
train = pd.read_csv("dataset/train_emoji.csv", header=None)
test = pd.read_csv("dataset/test_emoji.csv", header=None)

In [6]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [7]:
x_train = train[0]
y_train = train[1]

x_test = test[0]
y_test = test[1]

In [8]:
for i in range(5):
    print(x_train[i], emoji.emojize(emoji_dict[str(y_train[i])]))

never talk to me again 😞
I am proud of your achievements 😃
It is the worst day in my life 😞
Miss you so much ❤️
food is life 🍴


 ## Converting sentences into embeddings

In [9]:
f = open("glove6b50dtxt/glove.6B.50d.txt")

In [10]:
embedding_index = {}

for line in f:

    sent = line.split()
    word = sent[0]
    values = np.asarray(sent[1:], dtype='float')

    embedding_index[word] = values

f.close()

## Converting senteces into vectors

In [11]:
def out(x):
    
    maxlen = 10
    embedding_dim = 50
    embedding_out = np.zeros((x.shape[0], maxlen, embedding_dim))
    
    for ix in range(x.shape[0]):
        x[ix] = x[ix].split()
        
        for ij in range(len(x[ix])):
            try:
                embedding_out[ix][ij] = embedding_index[x[ix][ij].lower()]
            except:
                embedding_out[ix][ij] = np.zeros((50,))
            
    return embedding_out
        
    

In [12]:
train_emb = out(x_train)
test_emb = out(x_test)
print(train_emb.shape, test_emb.shape)

(132, 10, 50) (56, 10, 50)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


## LSTM architechture

In [13]:
model = Sequential()
model.add(LSTM(64, input_shape=(10, 50), return_sequences=True))
model.add(Dropout(.5))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 10, 64)            29440     
_________________________________________________________________
dropout_1 (Dropout)          (None, 10, 64)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 325       
_________________________________________________________________
activation_1 (Activat

In [14]:
y_train = to_categorical(y_train, num_classes=5)
y_test = to_categorical(y_test, num_classes=5)

In [15]:
# checkpoint = ModelCheckpoint("best_model1.h5", monitor="val_loss", verbose=True, save_best_only=True)
earlystop = EarlyStopping(monitor="val_acc", patience=10)

hist = model.fit(train_emb, y_train, epochs=100, batch_size=64, shuffle=True, validation_split=.2)

Instructions for updating:
Use tf.cast instead.
Train on 105 samples, validate on 27 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100


Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [16]:
pred = model.predict_classes(test_emb)

In [17]:
print(pred)

[4 3 2 2 2 2 3 2 2 2 1 2 0 0 1 3 2 2 3 2 0 0 4 2 3 3 2 0 3 2 0 1 3 2 0 3 2
 3 2 2 2 0 0 0 2 0 2 2 3 1 3 0 3 2 2 3]


In [18]:
# model.load_weights("best_model1.h5")

In [19]:
model.evaluate(test_emb, y_test)



[1.9841623306274414, 0.6071428486279079]

In [20]:
for i in range(10):
    print(" ".join(x_test[i]))
    print(emoji.emojize(emoji_dict[str(np.argmax(y_test[i]))]))
    print(emoji.emojize(emoji_dict[str(pred[i])]))

I want to eat
🍴
🍴
he did not answer
😞
😞
he got a raise
😃
😃
she got me a present
❤️
😃
ha ha ha it was so funny
😃
😃
he is a good friend
❤️
😃
I am upset
❤️
😞
We had such a lovely dinner tonight
❤️
😃
where is the food
🍴
😃
Stop making this joke ha ha ha
😃
😃
