In [1]:
import ollama
import numpy as np

In [2]:
def get_embedding(text, model="llama3"):
    response = ollama.embeddings(model=model, prompt=text)
    return response["embedding"]


In [3]:
documents = [
    "The cat jumped over the dog",
    "The cow jumped over the moon",
    "The turkey ran in circles",
]

embeddings = [get_embedding(doc) for doc in documents]


In [4]:

print(len(embeddings[0]))  # dimension size
print(embeddings[0][:10])  # first 10 values

4096
[-2.2180778980255127, 1.0529645681381226, 1.1047967672348022, -2.0559418201446533, -2.8292582035064697, 0.3583463728427887, -2.9368104934692383, 1.6765278577804565, -4.249149799346924, 0.8415858745574951]


In [5]:
np.array(embeddings[0]).shape

(4096,)

In [6]:
def calculate_cosine_metrics(v1, v2):
    dot_product = np.dot(v1, v2)
    magnitude1 = np.linalg.norm(v1)
    magnitude2 = np.linalg.norm(v2)
    cosine_similarity = dot_product / (magnitude1 * magnitude2)
    cosine_distance = 1 - cosine_similarity
    return int(cosine_similarity * 100), int(cosine_distance * 100)

In [7]:
print(calculate_cosine_metrics(embeddings[0], embeddings[0]))
print(calculate_cosine_metrics(embeddings[0], embeddings[1]))
print(calculate_cosine_metrics(embeddings[0], embeddings[2]))

(100, 0)
(70, 29)
(54, 45)


In [8]:
query_str = "The moose sat by the turkey"
query_embedding = get_embedding(query_str)

for embedding in embeddings:
    print(calculate_cosine_metrics(query_embedding, embedding))

(37, 62)
(28, 71)
(32, 67)


In [10]:
phrase_1_embedding = get_embedding("The dog ate my homework and then burped it up")
phrase_2_embedding = get_embedding("The green dog ate my homework and then burped it up")

In [11]:
calculate_cosine_metrics(phrase_1_embedding, phrase_2_embedding)

(98, 1)