# Vector Embeddings demo - partee
https://partee.io/2022/08/11/vector-embeddings/

In [1]:
import numpy as np

from numpy.linalg import norm
from sentence_transformers import SentenceTransformer

In [2]:

# Define the model we want to use (it'll download itself)
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [3]:

sentences = [
  "That is a very happy person",
  "That is a happy dog",
  "Today is a sunny day"
]

In [4]:
# vector embeddings created from dataset
embeddings = model.encode(sentences)

# query vector embedding
query_embedding = model.encode("That is a happy person")

In [5]:
embeddings

array([[-0.00248317,  0.09151708,  0.04838625, ..., -0.02641121,
        -0.07529832,  0.02803211],
       [ 0.00504994,  0.06316979,  0.01415728, ...,  0.04035438,
         0.07584123,  0.0908735 ],
       [-0.01629126,  0.10406609,  0.09740778, ...,  0.00676727,
        -0.08788458,  0.03404385]], dtype=float32)

In [6]:
# define our distance metric
def cosine_similarity(a, b):
    return np.dot(a, b)/(norm(a)*norm(b))

In [7]:
# run semantic similarity search
print("Query: That is a happy person")
for e, s in zip(embeddings, sentences):
    print(s, " -> similarity score = ",
         cosine_similarity(e, query_embedding))


Query: That is a happy person
That is a very happy person  -> similarity score =  0.942915
That is a happy dog  -> similarity score =  0.6945775
Today is a sunny day  -> similarity score =  0.25687605
