In [None]:
# Import the package and load the dataset
import numpy as np
import pandas as pd
import emoji
print(emoji.emojize('Python is :thumbs_up:'))

In [None]:
# Emoji package
emoji_dictionary = {"0": "\u2764\uFE0F",
                    "1": ":baseball:",
                    "2": ":beaming_face_with_smiling_eyes:",
                    "3": ":downcast_face_with_sweat:",
                    "4": ":fork_and_knife:",
                   }
for e in emoji_dictionary.values():
    print(emoji.emojize(e))


In [None]:
# Explore the data
train = pd.read_csv('dataset/train_emoji.csv', header=None)
test = pd.read_csv('dataset/test_emoji.csv', header=None)
data = train.values
for i in range(10):
        print(data[i][0], emoji.emojize(emoji_dictionary[str(data[i][1])]))


In [None]:
# Load the GloVe Embedding
embeddings = {}
with open('glove.6B.50d.txt', 'r', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coeffs = np.asarray(values[1:], dtype='float32')
        embeddings[word] = coeffs
print(embeddings['i'])
print(len(embeddings['i']))     

In [None]:
#Create input and output data
from tensorflow.keras.utils import to_categorical
XT = train[0]  
Xt = test[0]

YT = to_categorical(train[1])
Yt = to_categorical(test[1])

print(XT.shape, Xt.shape, YT.shape, Yt.shape)

In [None]:
# Convert the sentences into vectors using GloVe
def get_embedding_output(X):
    maxlen = 10
    embedding_output = np.zeros((X.shape[0], maxlen, 50))
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split()
        for ij in range(len(X[ix])):
            # Go to every word in the current (ix) sentence
            try:
                embedding_output[ix][ij] = embeddings[X[ix][ij].lower()]
            except:
                embedding_output[ix][ij] = np.zeros((50,))
    return embedding_output
emb_XT = get_embedding_output(XT)
emb_Xt = get_embedding_output(Xt)
print(emb_XT.shape, emb_Xt.shape)

In [None]:
#Create the Model Architecture
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *

model = Sequential()
model.add(LSTM(64, input_shape=(10, 50), return_sequences=True))
model.add(Dropout(0.4))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


In [None]:
# Train the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
hist = model.fit(emb_XT, YT, epochs=50, batch_size=32, shuffle=True, validation_split=0.1,)

In [None]:
# Evaluate the model
model.evaluate(emb_Xt, Yt)

In [None]:
# Visualize model metrics
import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.title('Accuracy')
plt.plot(hist.history['accuracy'], label='accuracy')
plt.plot(hist.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.show()
plt.title('Loss')
plt.plot(hist.history['loss'], label='loss')
plt.plot(hist.history['val_loss'], label='val_loss')
plt.legend()
plt.show()
