## Emoji Predictor

In [41]:
import emoji


In [70]:
#gives dictionary where name of the emoji is key and the value is the emoji
emoji.EMOJI_UNICODE

{':1st_place_medal:': '🥇',
 ':2nd_place_medal:': '🥈',
 ':3rd_place_medal:': '🥉',
 ':AB_button_(blood_type):': '🆎',
 ':ATM_sign:': '🏧',
 ':A_button_(blood_type):': '🅰',
 ':Afghanistan:': '🇦🇫',
 ':Albania:': '🇦🇱',
 ':Algeria:': '🇩🇿',
 ':American_Samoa:': '🇦🇸',
 ':Andorra:': '🇦🇩',
 ':Angola:': '🇦🇴',
 ':Anguilla:': '🇦🇮',
 ':Antarctica:': '🇦🇶',
 ':Antigua_&_Barbuda:': '🇦🇬',
 ':Aquarius:': '♒',
 ':Argentina:': '🇦🇷',
 ':Aries:': '♈',
 ':Armenia:': '🇦🇲',
 ':Aruba:': '🇦🇼',
 ':Ascension_Island:': '🇦🇨',
 ':Australia:': '🇦🇺',
 ':Austria:': '🇦🇹',
 ':Azerbaijan:': '🇦🇿',
 ':BACK_arrow:': '🔙',
 ':B_button_(blood_type):': '🅱',
 ':Bahamas:': '🇧🇸',
 ':Bahrain:': '🇧🇭',
 ':Bangladesh:': '🇧🇩',
 ':Barbados:': '🇧🇧',
 ':Belarus:': '🇧🇾',
 ':Belgium:': '🇧🇪',
 ':Belize:': '🇧🇿',
 ':Benin:': '🇧🇯',
 ':Bermuda:': '🇧🇲',
 ':Bhutan:': '🇧🇹',
 ':Bolivia:': '🇧🇴',
 ':Bosnia_&_Herzegovina:': '🇧🇦',
 ':Botswana:': '🇧🇼',
 ':Bouvet_Island:': '🇧🇻',
 ':Brazil:': '🇧🇷',
 ':British_Indian_Ocean_Territory:': '🇮🇴',
 ':British_Virgin_Is

In [71]:
#emoji dictionary where keep the description of some emojis
emoji_dictionary = {"0": "\u2764\uFE0F",    # :heart: prints a black instead of red heart depending on the font
                    "1": ":baseball:",
                    "2": ":beaming_face_with_smiling_eyes:",
                    "3": ":downcast_face_with_sweat:",
                    "4": ":fork_and_knife:",
                   }

In [72]:
emoji.emojize(emoji_dictionary['2'])

'😁'

In [73]:
for i in emoji_dictionary.values():
    print(emoji.emojize(i))

❤️
⚾
😁
😓
🍴


## Next step : Processing a custom Dataset

In [74]:
import pandas as pd

In [75]:
import numpy as np

In [76]:
#header = none because we didn't give any header and our data is starting from the first line only
train = pd.read_csv('train_emoji.csv',header = None)
test = pd.read_csv('test_emoji.csv',header = None)

In [77]:
train.head(n=5)

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [100]:
Xtrain = train[0]
Ytrain = train[1]

In [101]:
Xtest = test[0]
Ytest = test[1]

In [80]:
print(Xtrain.shape)

(132,)


In [53]:
#checking the data
#printing the emoji based on the emoji dictionary we just created
for i in range(5):
    print(Xtrain[i],emoji.emojize(emoji_dictionary[str(Ytrain[i])]))

never talk to me again 😓
I am proud of your achievements 😁
It is the worst day in my life 😓
Miss you so much ❤️
food is life 🍴


## Next step - Converting sentences into embeddings 

In [54]:
f = open('glove.6B.50d.txt',encoding = 'utf-8')

In [55]:
embeddings = {}
ct = 0
#glove vector has words followed by their embedding vector
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.array(values[1:],dtype = 'float32')
#     print(word,coefs)
    embeddings[word] = coefs
f.close()

## Next step - Converting sentences into vectors(Embedding Layer Output)  

In [81]:
#function to get embeddings
def embedding_output(X):
    #maxlen for each sentence will not be more than 10, after all they are emojis
    maxlen = 10
    output_embed = np.zeros((X.shape[0],maxlen,50)) #since each embedding vector is of length 50 units
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split()
        for ij in range(len(X[ix])):
            output_embed[ix][ij] = embeddings[X[ix][ij].lower()]


    return output_embed


In [82]:
emb_Xtrain = embedding_output(Xtrain)
emb_Xtest = embedding_output(Xtest)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [84]:
print(emb_Xtrain.shape)
print(emb_Xtest.shape)

(132, 10, 50)
(56, 10, 50)


## Next Step - Defining RNN/LSTM Model Architecture 

In [105]:
from keras.layers import *
from keras.models import Sequential
model = Sequential()
#Stacked LSTM layer
#return_sequences is made true so that this LSTM layer can feed data into the next LSTM layer
model.add(LSTM(64,input_shape=(10,50),return_sequences=True))
model.add(Dropout(0.4))
model.add(LSTM(64,input_shape=(10,50)))
model.add(Dropout(0.3))
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 10, 64)            29440     
_________________________________________________________________
dropout_3 (Dropout)          (None, 10, 64)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_4 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 325       
_________________________________________________________________
activation_2 (Activation)    (None, 5)                 0         
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
__________________________________________________

In [106]:
model.compile(optimizer = 'adam',loss = 'categorical_crossentropy',metrics = ['acc'])

In [107]:
#before we move ahead, we should convert y into one hot vector form
from keras.utils import to_categorical

In [108]:
YTrain = to_categorical(Ytrain)
YTest = to_categorical(Ytest)
print(YTrain.shape)

(132, 5)


In [109]:
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping

In [110]:
#training the model
checkpoint = ModelCheckpoint('best_model.h5',monitor = 'val_loss',save_best_only=True)
hist = model.fit(emb_Xtrain,YTrain,batch_size=32,epochs = 100, validation_split = 0.2,callbacks = [checkpoint])

Train on 105 samples, validate on 27 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Ep

In [111]:
model.load_weights('best_model.h5')

In [112]:
model.evaluate(emb_Xtest,YTest)



[1.060029889856066, 0.6785714030265808]

In [113]:
pred = model.predict_classes(emb_Xtest)

In [115]:
# now we see how many emojis we can get right
for i in range(30):
    print(' '.join(Xtest[i]))
    print(emoji.emojize(emoji_dictionary[str(np.argmax(YTest[i]))]))
    print(emoji.emojize(emoji_dictionary[str(pred[i])]))

I want to eat
🍴
🍴
he did not answer
😓
😓
he got a raise
😁
😓
she got me a present
❤️
😓
ha ha ha it was so funny
😁
😁
he is a good friend
❤️
😁
I am upset
❤️
⚾
We had such a lovely dinner tonight
❤️
😁
where is the food
🍴
🍴
Stop making this joke ha ha ha
😁
😁
where is the ball
⚾
⚾
work is hard
😓
😁
This girl is messing with me
😓
❤️
are you serious ha ha
😁
😓
Let us go play baseball
⚾
⚾
This stupid grader is not working
😓
😓
work is horrible
😓
😁
Congratulation for having a baby
😁
😁
stop messing around
😓
😓
any suggestions for dinner
🍴
🍴
I love taking breaks
❤️
❤️
you brighten my day
😁
😁
I boiled rice
🍴
🍴
she is a bully
😓
😓
Why are you feeling bad
😓
😓
I am upset
😓
⚾
I worked during my birthday
😓
😁
My grandmother is the love of my life
❤️
❤️
enjoy your break
😁
😓
valentine day is near
❤️
😁


In [116]:
from keras.models import model_from_json

In [117]:
with open('best_model.json','w') as file:
    file.write(model.to_json())
model.save_weights('model.h5')

In [118]:
with open('best_model.json','r') as file:
    model = model_from_json(file.read())
model.load_weights('model.h5')