In [1]:
import numpy as np
from numpy import ndarray
import tensorflow as tf
import pandas as pd
from my_utils import read_glove_vec, softmax, xavier_init, read_csv, print_predictions, label_to_emoji
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM, Dropout, Dense, Activation

%load_ext autoreload
%autoreload 2
%matplotlib inline


In [2]:
X, Y = read_csv('./data/train_emoji.csv')
X_test, Y_test = read_csv('./data/tesss.csv')

In [3]:
maxLen = len(max(X, key=lambda x: len(x.split()) ).split())

In [4]:
print(X.shape, Y.shape)

(132,) (132,)


## Emojifier - V1
In the first half of this notebook is to replicate the model below. 

Then we try to train and evaluate the model.

<img src='./images/image_1.png'>

In [5]:
word_to_vec, word_to_index, index_to_word = read_glove_vec('./data/glove.6B.50d.txt')

In [6]:
emb_features = list(word_to_vec.values())[0].shape[0]
print('Number of embedding features:',emb_features)

Number of embedding features: 50


In [7]:
def sentence_to_embeddings(sentence: str, word_to_vec: dict): 
    """_summary_

    Args:
        sentence (str): a target sentence
        word_to_vec (dict of ndarray): a dictionary with keys are words, each value has shape (emb_features,)
        
    Returns:
        embeddings (ndarray, (n_words, emb_features))
    """
    words = sentence.lower().split()
    embeddings = np.array([word_to_vec[w] for w in words])
    
    return embeddings

In [8]:
# unit test 
embeddings = sentence_to_embeddings('Hello friend tram', word_to_vec)
assert embeddings.shape == (3, emb_features), 'Wrong shape for embeddings'

In [9]:
def avg_embeddings(sentence: str, word_to_vec: dict): 
    """_summary_

    Args:
        sentence (str): a target sentence
        word_to_vec (dict of ndarray): a dictionary with keys are words, each value has shape (emb_features,)
        
    Returns:
        avg (ndarray, (1, emb_features))
    """
    embeddings = sentence_to_embeddings(sentence, word_to_vec)
    n_words, emb_features = embeddings.shape
    
    avg = np.zeros((1, emb_features))
    # Sum all e
    for e in embeddings: 
        avg = avg + e
    # Take average
    avg = avg / n_words
    return avg

In [10]:
avg = avg_embeddings('Hello friend tram', word_to_vec)
assert avg.shape == (1, emb_features), 'Wrong shape for avg'

In [11]:
np.random.seed(1)
W, b = xavier_init(5, emb_features)
parameters = {}
parameters['W'] = W
parameters['b'] = b

In [12]:
def predict(sentence: str, word_to_vec: dict, parameters: dict): 
    """_summary_

    Args:
        sentence (str): a target sentence
        word_to_vec (dict of ndarray): a dictionary with keys are words, each value has shape (emb_features,)
        
    Returns:
        a (ndarray, (n_classes, m))
    """
    avg = avg_embeddings(sentence, word_to_vec)
    W, b = parameters['W'], parameters['b']
    z = np.dot(W, avg.T) + b
    
    a = softmax(z)
    return a, avg

In [13]:
a, avg = predict('Hello friend tram', word_to_vec, parameters)
assert a.shape == (b.shape[0], 1), 'Wrong shape for a'

In [14]:
def convert_to_one_hot(Y: ndarray): 
    """_summary_

    Args:
        Y (m,): _description_

    Returns:
        Y (n_classes, m)
    """
    return np.array(tf.one_hot(Y, depth=len(np.unique(Y)))).T 

In [15]:
def train_model(X: ndarray[str], Y: ndarray, n_iters: int, learning_rate: float): 
    """_summary_

    Args:
        X (ndarray, (m,)): senetences
        Y (ndarray, (m,)): chosen emojies indices for every sentences 
        n_iters (int): number of iterations
        learning_rate (float): learning rate
    """
    m = X.shape[0]
    n_y = len(np.unique(Y))
    np.random.seed(1)
    W, b = xavier_init(n_y, emb_features)
    parameters['W'] = W
    parameters['b'] = b
    
    Y_hot = convert_to_one_hot(Y)

    for t in range(n_iters): 
        cost = 0
        dW = 0
        db = 0
        
        for i in range(m): 
            a, avg = predict(X[i], word_to_vec, parameters)
            y_i = np.expand_dims(Y_hot[:, i], axis=-1) 
            cost += -np.sum(y_i  * np.log(a)) 

            # Compute gradients
            dz = a - y_i 
            dW += np.dot(dz, avg)
            db += dz

            # Update parameters with Stochastic Gradient Descent
            parameters['W'] = parameters['W'] - learning_rate * dW
            parameters['b'] = parameters['b'] - learning_rate * db
            
        if t % 100 == 0:
            print("Epoch: " + str(t) + " --- cost = " + str(cost))
        
        
    return parameters

parameters = train_model(X, Y, n_iters=401, learning_rate=0.01)

2023-12-28 18:57:59.216514: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2023-12-28 18:57:59.216533: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2023-12-28 18:57:59.216539: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2023-12-28 18:57:59.216566: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-28 18:57:59.216581: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch: 0 --- cost = 410.43365788314725
Epoch: 100 --- cost = 18.682235704620712
Epoch: 200 --- cost = 10.867104727024016
Epoch: 300 --- cost = 0.3004909068288898
Epoch: 400 --- cost = 0.21364235173925147


In [16]:
def compute_accuracy(X, Y): 
    """_summary_

    Args:
        X (m,): list of setences
        Y (m,): list of true labels

    Returns:
        accuracy (float): accuracy on given set
        ypred (nadarray, (m,)): model predictions
    """
    accurate_count = 0  
    m = len(X)
    ypred = []
    for i in range(m): 
        a_i, avg = predict(X[i], word_to_vec, parameters)
        ypred_i = np.argmax(a_i[:, 0])
        ypred.append(ypred_i)
        if ypred_i == int(Y[i]): 
            
            accurate_count += 1
    accuracy = accurate_count / m
    return accuracy, np.array(ypred)

In [17]:
accuracy, ypred = compute_accuracy(X_test, Y_test)

In [18]:
X_my_sentences = np.array(["i treasure you", "i love you", "funny lol", "lets play with a ball", "food is ready", "have you eaten yet"])
Y_my_labels = np.array([0, 0, 2, 1, 4, 4])

acc, pred = compute_accuracy(X_my_sentences, Y_my_labels)

### Model predictions

In [19]:
print_predictions(X_my_sentences, pred)


i treasure you ❣️
i love you ❣️
funny lol 😂
lets play with a ball ⚾
food is ready 🍴
have you eaten yet 🍴


### Actual predictions


In [20]:
print_predictions(X_my_sentences, Y_my_labels)


i treasure you ❣️
i love you ❣️
funny lol 😂
lets play with a ball ⚾
food is ready 🍴
have you eaten yet 🍴


## Keras model
This is the model that we try to replicate by `tensorflow`.

<img src='images/emojifier-v2.png'>

In [21]:
from typing import Dict
def pretrained_embedding_layer(word_to_vec: Dict[str, np.ndarray], word_to_index: Dict[str, int]):
    """_summary_

    Args:
        word_to_vec (Dict[str, np.ndarray(emb_f,)]): map word to its embedding vector
        word_to_index (Dict[str, int]): mapping from words to their indices in vocabulary
    """
    vocab_size = len(word_to_index) + 1 # adding 1 to fit Keras embedding (required)
    any_word = list(word_to_vec.keys())[0]
    emb_f = word_to_vec[any_word].shape[0]
    
    emb_matrix = np.zeros((vocab_size, emb_f))
    
    for w, i in word_to_index.items():
        emb_matrix[i, :] = word_to_vec[w]

    embedding_layer = Embedding(input_dim=vocab_size, 
                                output_dim=emb_f, 
                                trainable=False)
    
    embedding_layer.build((None,))
    
    embedding_layer.set_weights([emb_matrix])
    return embedding_layer
    
    
        

In [22]:
def sentences_to_indices(X, word_to_index, max_len):
    """
    Converts an array of sentences (strings) into an array of indices corresponding to words in the sentences.
    The output shape should be such that it can be given to `Embedding()` (described in Figure 4). 
    
    Arguments:
    X -- array of sentences (strings), of shape (m,)
    word_to_index -- a dictionary containing the each word mapped to its index
    max_len -- maximum number of words in a sentence. You can assume every sentence in X is no longer than this. 
    
    Returns:
    X_indices -- array of indices corresponding to words in the sentences from X, of shape (m, max_len)
    """
    
    m = X.shape[0]                                   # number of training examples
    
    # Initialize X_indices as a numpy matrix of zeros and the correct shape (≈ 1 line)
    X_indices = np.zeros((m, max_len))
    
    for i in range(m):                               # loop over training examples
        
        # Convert the ith training sentence to lower case and split it into words. You should get a list of words.
        sentence_words = X[i].lower().split()
        
        # Initialize j to 0
        j = 0
        
        # Loop over the words of sentence_words
        available_word = word_to_index
        for w in sentence_words:
            # if w exists in the word_to_index dictionary
            if w in available_word:
                # Set the (i,j)th entry of X_indices to the index of the correct word.
                X_indices[i, j] = available_word[w]
                # Increment j to j + 1
                j += 1
            
    
    return X_indices

In [23]:
def Emojify_V2(input_shape, word_to_vec, word_to_index): 
    """
    Function creating the Emojify-v2 model's graph.
    
    Arguments:
    input_shape -- shape of the input, usually (max_len,)
    word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    model -- a model instance in Keras
    """
    
    sentence_indices = tf.keras.Input(input_shape, dtype='int32')
    embedding_layer = pretrained_embedding_layer(word_to_vec, word_to_index)
    embeddings = embedding_layer(sentence_indices)
    
    X = LSTM(128, return_sequences=True)(embeddings)
    
    X = Dropout(0.5)(X)
    
    X = LSTM(128, return_sequences=False)(X)

    X = Dropout(0.5)(X)
    
    X = Dense(5)(X)
    
    X = Activation('softmax')(X)
    model = tf.keras.Model(inputs=sentence_indices, outputs=X)
    return model

In [24]:
model = Emojify_V2((maxLen,), word_to_vec, word_to_index)
model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10)]              0         
                                                                 
 embedding (Embedding)       (None, 10, 50)            20000050  
                                                                 
 lstm (LSTM)                 (None, 10, 128)           91648     
                                                                 
 dropout (Dropout)           (None, 10, 128)           0         
                                                                 
 lstm_1 (LSTM)               (None, 128)               131584    
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense (Dense)               (None, 5)                 645   

In [25]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [26]:
X_train_indices = sentences_to_indices(X, word_to_index, maxLen)
Y_train_oh = convert_to_one_hot(Y).T
print(X_train_indices.shape)
print(Y_train_oh.shape)

(132, 10)
(132, 5)


In [27]:
model.fit(X_train_indices, Y_train_oh, epochs = 50, batch_size = 32, shuffle=True)

Epoch 1/50


2023-12-28 18:58:02.760239: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x2968c2980>

In [28]:
X_test_indices = sentences_to_indices(X_test, word_to_index, max_len = maxLen)
Y_test_oh = convert_to_one_hot(Y_test).T
loss, acc = model.evaluate(X_test_indices, Y_test_oh)
print()
print("Test accuracy = ", acc)


Test accuracy =  0.7857142686843872


In [29]:
acc, ypred = compute_accuracy(X_test, Y_test)
print_predictions(X_test, ypred, Y_test)


Expected emoji: 🍴| I want to eat	 🍴
Expected emoji: 😞| he did not answer	 😞
Expected emoji: 😂| he got a very nice raise	 😂
Expected emoji: 😂| she got me a nice present	 😂
Expected emoji: 😂| ha ha ha it was so funny	 😂
Expected emoji: 😂| he is a good friend	 😂
Expected emoji: 😞| I am upset	 😞
Expected emoji: 😂| We had such a lovely dinner tonight	 😂
Expected emoji: 🍴| where is the food	 🍴
Expected emoji: 😂| Stop making this joke ha ha ha	 😂
Expected emoji: ⚾| where is the ball	 ⚾
Expected emoji: 😞| work is hard	 😂
Expected emoji: 😞| This girl is messing with me	 😞
Expected emoji: 😞| are you serious 😞
Expected emoji: ⚾| Let us go play baseball	 ⚾
Expected emoji: 😞| This stupid grader is not working 	 😞
Expected emoji: 😞| work is horrible	 😞
Expected emoji: 😂| Congratulation for having a baby	 😂
Expected emoji: 😞| stop pissing me off 😞
Expected emoji: 🍴| any suggestions for dinner	 🍴
Expected emoji: ❣️| I love taking breaks	 😞
Expected emoji: 😂| you brighten my day	 😂
Expected emoji: 🍴| 