In [11]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import emoji
from tensorflow.keras.layers import Dense, Dropout, Input, LSTM, Activation, Embedding
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.initializers import glorot_uniform

In [2]:
def read_glove_vecs(glove_file):
    with open(glove_file, 'r', encoding='utf8') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
        
        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map

In [3]:
word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('glove.6B.50d.txt')

In [4]:
df = pd.read_csv('train_emoji.csv', header=None, usecols=[0,1])
dataset= np.array(df)
phrase, emoji = list(), list()
for p, e in dataset:
    phrase.append(p)
    emoji.append(e)


In [5]:
def read_csv(filename):
    phrase , emoji = list(), list()
    df = pd.read_csv(filename, header=None, usecols=[0,1])
    dataset= np.array(df)
    phrase, emoji = list(), list()
    for p, e in dataset:
        if '\t' in p:
            p = p[:-1]
        phrase.append(p)
        emoji.append(e)
    X = np.array(phrase)
    y = np.array(emoji, dtype=int)
    return X,y

In [6]:
emoji_dictionary = {"0": "\u2764\uFE0F",    # :heart: prints a black instead of red heart depending on the font
                    "1": ":baseball:",
                    "2": ":smile:",
                    "3": ":disappointed:",
                    "4": ":fork_and_knife:"}

In [7]:
def label_to_emoji(val):
    return emoji.emojize(emoji_dictionary[str(val)], use_aliases=True)

In [8]:
X_train, Y_train = read_csv('train_emoji.csv')
X_test, Y_test = read_csv('tesss.csv')

In [9]:
maxLen = len(max(X_train, key=len).split())
print(maxLen)

10


In [12]:
for idx in range(10):
    print(X_train[idx] , label_to_emoji(Y_train[idx]))

never talk to me again 😞
I am proud of your achievements 😄
It is the worst day in my life 😞
Miss you so much ❤️
food is life 🍴
I love you mum ❤️
Stop saying bullshit 😞
congratulations on your acceptance 😄
The assignment is too long  😞
I want to go play ⚾


In [13]:
def sentences_to_indices(X, word_to_index, max_len):
    m = X.shape[0]
    X_indices = np.zeros((m, max_len))
    
    for i in range(m):
        sentence_words = X[i].lower().split()
        j = 0
        for w in sentence_words:
            X_indices[i,j] = word_to_index[w]
            j+=1
    return X_indices

In [14]:
def convert_to_one_hot(Y,C):
    Y = np.eye(C)[Y.reshape(-1)]
    return Y

In [15]:
def pretrained_embedding_layer(word_to_vec_map, word_to_index):    
    vocab_len = len(word_to_index) + 1                  # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[0]      # define dimensionality of your GloVe word vectors (= 50)
    emb_matrix = np.zeros((vocab_len, emb_dim))
    
    for word, idx in word_to_index.items():
        emb_matrix[idx, :] = word_to_vec_map[word]

    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)

    embedding_layer.build((None,)) 
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer

In [16]:
def define_model(input_shape, word_to_vec_map, word_to_index):
    sentence_indices = Input(shape=input_shape, dtype='int32')
    embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)
    embeddings = embedding_layer(sentence_indices)
    X = LSTM(128, return_sequences=True)(embeddings)
    X = Dropout(rate=0.5)(X)
    X = LSTM(128, return_sequences=False)(X)
    X = Dropout(rate=0.5)(X)
    X = Dense(units=5, activation='softmax')(X)
    X = Activation('softmax')(X)

    model = Model(sentence_indices,X)
    
    return model

In [17]:
model = define_model((maxLen,), word_to_vec_map, word_to_index)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 10)]              0         
_________________________________________________________________
embedding (Embedding)        (None, 10, 50)            20000050  
_________________________________________________________________
lstm (LSTM)                  (None, 10, 128)           91648     
_________________________________________________________________
dropout (Dropout)            (None, 10, 128)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense (Dense)                (None, 5)                 645   

In [18]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [19]:
X_train_indices = sentences_to_indices(X_train, word_to_index, maxLen)
Y_train_oh = convert_to_one_hot(Y_train, C = 5)

In [20]:
history = model.fit(X_train_indices, Y_train_oh, epochs = 60, batch_size = 32, shuffle=True)

Train on 188 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [21]:
X_test_indices = sentences_to_indices(X_test, word_to_index, max_len = maxLen)
Y_test_oh = convert_to_one_hot(Y_test, C = 5)
loss, acc = model.evaluate(X_test_indices, Y_test_oh,verbose=2)
print()
print("Test accuracy = ", acc)

56/1 - 1s - loss: 0.9049 - accuracy: 1.0000

Test accuracy =  1.0


In [67]:
# This code allows you to see the mislabelled examples
C = 5
y_test_oh = np.eye(C)[Y_test.reshape(-1)]
X_test_indices = sentences_to_indices(X_test, word_to_index, maxLen)
pred = model.predict(X_test_indices)
for i in range(10):
    x = X_test_indices
    num = np.argmax(pred[i])
    print('Expected emoji:'+ label_to_emoji(Y_test[i]) + ' prediction: '+ X_test[i] + label_to_emoji(num).strip())

Expected emoji:🍴 prediction: I want to eat🍴
Expected emoji:😞 prediction: he did not answer😞
Expected emoji:😄 prediction: he got a very nice raise😄
Expected emoji:😄 prediction: she got me a nice present😄
Expected emoji:😄 prediction: ha ha ha it was so funny😄
Expected emoji:😄 prediction: he is a good friend😄
Expected emoji:😞 prediction: I am upset😞
Expected emoji:😄 prediction: We had such a lovely dinner tonight😄
Expected emoji:🍴 prediction: where is the food🍴
Expected emoji:😄 prediction: Stop making this joke ha ha ha😄


In [68]:
# Change the sentence below to see your prediction. Make sure all the words are in the Glove embeddings.  
x_test = np.array(['I am good man'])
print(x_test.shape)
X_test_indices = sentences_to_indices(x_test, word_to_index, maxLen)
print(x_test[0] +' '+  label_to_emoji(np.argmax(model.predict(X_test_indices))))

(1,)
I am good man 😄


In [92]:
def emojify(sentences):
    sentences = sentences.split('.')
    out = []
    for s in sentences:
        s_arr = np.array([s])
        s_indices = sentences_to_indices(s_arr, word_to_index, maxLen)
        out.append( s+ ' ' + label_to_emoji(np.argmax(model.predict(s_indices))))
    out = '.'.join(out) + '.'
    return out
    
    

In [93]:
sentences = 'this is so good man. I love You. I do not want to joke. I wanna play right now. I am hungry'
emojify(sentences)

'this is so good man 😄. I love You ❤️. I do not want to joke 😞. I wanna play right now ⚾. I am hungry 🍴.'