In [44]:
import numpy as np
import emoji
import matplotlib.pyplot as plt
import csv

%matplotlib inline

In [45]:


X_train, y_train, X_test, y_test = ([] for i in range(4))

with open('emoji_train.csv') as c:
    csvReader = csv.reader(c)
    for line in csvReader:
        X_train.append(line[0])
        y_train.append(line[1])

with open('emoji_test.csv') as c:
    csvReader = csv.reader(c)
    for line in csvReader:
        X_test.append(line[0])
        y_test.append(line[1])

X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train, dtype=int)
y_test = np.asarray(y_test, dtype=int)




In [46]:
max_len = len(max(X_train, key=len).split())

emoji_dict = {"0": "\u2764\uFE0F",   
              "1": ":baseball:",
              "2": ":smile:",
              "3": ":disappointed:",
              "4": ":fork_and_knife:"}




In [47]:
#y_train = np.eye(5)[y_train.reshape(-1)]
#y_test = np.eye(5)[y_test.reshape(-1)]





In [48]:

word_vec_dict = {}

with open('glove.6B.50d.txt','r', encoding='utf-8') as f:
    vocab = set()
    
    for line in f:
        line = line.strip().split()
        vocab.add(line[0])
        word_vec_dict[line[0]] = np.array(line[1:], dtype=np.float64)
        
        word_dict = {}
        inverse_word_dict = {}
        
    idx = 1
    for word in sorted(vocab):
            word_dict[idx] = word
            inverse_word_dict[word] = idx
            idx += 1

                    

In [57]:
def model(X, y, word_vec_dict, num_iterations, learning_rate):
    
    np.random.seed(1)
    
    num_examples = y.shape[0]
    ou_size = 5
    h_size = 50
    
    w = np.random.randn(ou_size, h_size) / np.sqrt(h_size)
    b = np.zeros((ou_size,))
    
    y_onehot = np.eye(ou_size)[y.reshape(-1)]
    
    
    for i in range(num_iterations):
        for j in range(num_examples):
            words = X[j].lower().split()
            sent_vec = np.zeros(word_vec_dict[words[0]].shape)
            for word in words:
                sent_vec += word_vec_dict[word]
            sent_vec /= len(words)
            
            z = np.matmul(w, sent_vec) + b
            
            expn = np.exp(z - np.max(z))
            a = expn / expn.sum(axis=0)
            
            cost = -np.sum(y_onehot[j] * np.log(a))
            
            dz = a - y_onehot[j]
            dw = np.dot(dz.reshape(ou_size, 1), sent_vec.reshape(1, h_size))
            db = dz
            
            w = w - learning_rate * dw
            b = b - learning_rate * db
            
            
        if i % 100 == 0:
            print('Cost after iteration %d: %f' % (i, cost))
            y_predict = predict(X, y, w, b, word_vec_dict)

                
            
    
    
    return y_predict, w, b

In [62]:
def predict(X, y, w, b, word_vec_dict):
    
    
    num_examples = X.shape[0]
    y_predict = np.zeros((num_examples, 1))
    for j in range(num_examples):
        words = X[j].lower().split()
        sent_vec = np.zeros(word_vec_dict[words[0]].shape)
        for word in words:
            sent_vec += word_vec_dict[word]
        sent_vec /= len(words)
            
        z = np.matmul(w, sent_vec) + b
            
        expn = np.exp(z - np.max(z))
        a = expn / expn.sum(axis=0)
                    
        y_predict[j] = np.argmax(a)
                    
    print('accuracy' + str(np.mean((y_predict[:] == y.reshape(y.shape[0],1)[:]))))
    return y_predict, w, b

In [68]:
pred, w, b = model(X_train, y_train, word_vec_dict, num_iterations=400, learning_rate=0.01)


Cost after iteration 0: 2.229326
accuracy0.333333333333
Cost after iteration 100: 0.140127
accuracy0.937984496124
Cost after iteration 200: 0.062608
accuracy0.953488372093
Cost after iteration 300: 0.041340
accuracy0.968992248062


In [71]:
y_predict = predict(X_train, y_train, w, b, word_vec_dict)

y_predict = predict(X_test, y_test, w, b, word_vec_dict)


accuracy0.976744186047
accuracy0.607843137255


In [75]:
X_my_sentences = np.array(["i adore you", "i love you", "funny lol", "lets play with a ball", "food is ready", "not feeling happy"])
Y_my_labels = np.array([[0], [0], [2], [1], [4],[3]])

pred = predict(X_my_sentences, Y_my_labels , w, b, word_vec_dict)

for i in range(X_my_sentences.shape[0]):
        print(X_my_sentences[i], emoji.emojize(emoji_dict[str(int(pred[i]))], use_aliases=True))


accuracy1.0
i adore you ❤️
i love you ❤️
funny lol 😄
lets play with a ball ⚾
food is ready 🍴
not feeling happy 😞
