## Importing Libraries and Downloading Glove 6B

In [1]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove.6B.zip
!pip install coremltools

--2024-02-25 17:25:33--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2024-02-25 17:25:33--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2024-02-25 17:25:33--  https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


202

In [2]:
import numpy as np

import tensorflow as tf
import coremltools as ct



In [3]:
embedding_index = {}
with open('glove.6B.50d.txt') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embedding_index[word] = np.array(coefs).tolist()

## Cosine Similarity Calculation Implementation


In [4]:

word_list = embedding_index.keys()
word2index = {word: index for index, word in enumerate(word_list)}
index2word = {index: word for word, index in word2index.items()}

In [5]:
word2index['eat']

3623

In [6]:
words = list(embedding_index.keys())
embeddings_matrix = np.array([embedding_index[word] for word in words])
embeddings_matrix = embeddings_matrix / np.linalg.norm(embeddings_matrix, axis=1, keepdims=True)

In [7]:
input_word = "eat"

In [8]:
index_of_word = word2index[input_word]

target_vector = embeddings_matrix[index_of_word]
target_vector = target_vector / np.linalg.norm(target_vector)
similarities = np.dot(embeddings_matrix, target_vector)
self_index = words.index(input_word)
similarities[self_index] = -np.inf
closest_word_index = np.argmax(similarities)


closest_word = words[closest_word_index]
print(closest_word)

eating


In [9]:
index_of_word = tf.keras.Input(shape=(1,))
index_e3 = index_of_word[1]
index_e3

<KerasTensor: shape=(1,) dtype=float32 (created by layer 'tf.__operators__.getitem')>

## Create a TensorFlow model

In [10]:
import tensorflow as tf
import numpy as np

class ClosestWordIndexLayer(tf.keras.layers.Layer):
    def __init__(self, embeddings_matrix, words, **kwargs):
        super().__init__(**kwargs)
        self.embeddings_matrix = tf.constant(embeddings_matrix, dtype=tf.float32)
        self.words = words

    def call(self, input_word_index):
        input_word_vector = tf.nn.embedding_lookup(self.embeddings_matrix, input_word_index)
        input_word_vector = tf.math.l2_normalize(input_word_vector, axis=-1)

        similarities = tf.matmul(self.embeddings_matrix, input_word_vector, transpose_b=True)
        similarities = tf.reshape(similarities, [-1])

        self_mask = tf.one_hot(input_word_index, depth=tf.shape(self.embeddings_matrix)[0], on_value=-np.inf, off_value=0.0, dtype=tf.float32)
        similarities += self_mask
        return similarities

# Usage in a model
def create_model(embeddings_matrix, words):
    input_word_index = tf.keras.Input(shape=(), dtype=tf.int32)
    res = ClosestWordIndexLayer(embeddings_matrix, words)(input_word_index)
    model = tf.keras.Model(inputs=input_word_index, outputs=res)
    return model

model = create_model(embeddings_matrix, words)


In [11]:
model(np.array([3623]))

<tf.Tensor: shape=(1, 400000), dtype=float32, numpy=
array([[ 0.3161376 ,  0.42795926,  0.4791656 , ..., -0.01428229,
        -0.17280248, -0.60417926]], dtype=float32)>

## Convert the TensorFlow model to a CoreML model



In [12]:
iOS_find_word = ct.convert(
    model,
    source="tensorflow",
    inputs=[ct.TensorType(name="input_2", dtype=np.int32, shape=(1,))],
    outputs=[ct.TensorType(name="Identity", dtype=np.float32)],
    minimum_deployment_target=ct.target.macOS13
)

Running TensorFlow Graph Passes: 100%|██████████| 6/6 [00:01<00:00,  3.62 passes/s]
Converting TF Frontend ==> MIL Ops: 100%|██████████| 20/20 [00:00<00:00, 1572.20 ops/s]
Running MIL frontend_tensorflow2 pipeline: 100%|██████████| 7/7 [00:00<00:00, 3209.11 passes/s]
Running MIL default pipeline: 100%|██████████| 71/71 [00:01<00:00, 51.62 passes/s]
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 3278.93 passes/s]


In [13]:
iOS_find_word.save('iOS_find_wordV2.mlpackage')

In [14]:
!zip -r /content/iOS_find_wordV2.mlpackage.zip /content/iOS_find_wordV2.mlpackage

  adding: content/iOS_find_wordV2.mlpackage/ (stored 0%)
  adding: content/iOS_find_wordV2.mlpackage/Manifest.json (deflated 60%)
  adding: content/iOS_find_wordV2.mlpackage/Data/ (stored 0%)
  adding: content/iOS_find_wordV2.mlpackage/Data/com.apple.CoreML/ (stored 0%)
  adding: content/iOS_find_wordV2.mlpackage/Data/com.apple.CoreML/model.mlmodel (deflated 78%)
  adding: content/iOS_find_wordV2.mlpackage/Data/com.apple.CoreML/weights/ (stored 0%)
  adding: content/iOS_find_wordV2.mlpackage/Data/com.apple.CoreML/weights/weight.bin (deflated 8%)
