# Embeddings vector arithmetics

In [1]:
from enum import Enum
from typing import List
from sentence_transformers import SentenceTransformer
import numpy as np

class Words(Enum):
    king = 0
    man = 1
    woman = 2
    queen = 3
    philosophy = 4

words: List[str] = [Words.king.name, 
                    Words.man.name, 
                    Words.woman.name, 
                    Words.queen.name,
                    Words.philosophy.name]

embeddings: List[float] = []

# Load a pre-trained model for embeddings
model = SentenceTransformer('flax-sentence-embeddings/all_datasets_v4_MiniLM-L6') # 65MB in RAM?

for word in words:
    embedding = model.encode(word)  # Encode the chunk and take the first element
    embeddings.append(np.array(embedding))

queen_res = embeddings[Words.king.value] - embeddings[Words.man.value] + embeddings[Words.woman.value]
print(queen_res[:25])

[ 0.00448464  0.01810506 -0.06230397  0.15338692 -0.11522211  0.11418289
  0.07872835  0.0495244   0.10117409  0.00895918  0.00329942 -0.18080738
 -0.03512578 -0.10034303 -0.06474461  0.10942298  0.04074675  0.09414344
 -0.12549578 -0.04378415 -0.011129   -0.11219814 -0.09034315  0.12971823
 -0.16390741]


In [2]:
from numpy import dot
from numpy.linalg import norm

def cosine_similarity(vec_a, vec_b):
    '''Compute the cosine similarity between two vectors.'''
    return dot(vec_a, vec_b) / (norm(vec_a) * norm(vec_b))

In [3]:
cosine_similarity(queen_res, embeddings[Words.queen.value])

0.57947886

In [4]:
import numpy as np
import pandas as pd
from numpy.linalg import norm

_embeddings = embeddings.copy()
_embeddings.append(queen_res)
n = len(_embeddings)
similarity_matrix = np.zeros((n, n))
for i in range(n):
    for j in range(n):
        similarity_matrix[i][j] = cosine_similarity(_embeddings[i], _embeddings[j])

# Display results in a table
index_labels = [w.name for w in Words]
index_labels.append("queen bis")
similarity_df = pd.DataFrame(similarity_matrix, index=index_labels, columns=index_labels)
similarity_df

Unnamed: 0,king,man,woman,queen,philosophy,queen bis
king,1.0,0.321646,0.263995,0.680713,0.17725,0.630572
man,0.321646,1.0,0.325679,0.254117,0.177514,-0.235992
woman,0.263995,0.325679,1.0,0.439399,0.259879,0.627873
queen,0.680713,0.254117,0.439399,1.0,0.161236,0.579479
philosophy,0.17725,0.177514,0.259879,0.161236,1.0,0.173721
queen bis,0.630572,-0.235992,0.627873,0.579479,0.173721,1.0
