In [1]:
import numpy as np

In [2]:
def normalize(v):
	return v / np.linalg.norm(v)

## Why cos function is used and not sin or tan
- cosign similarity only looks at angle between two items and not magnitude, allowing to compare semantic meaning which is significant in `NLP`
- cosign also gives value between -1 and 1, which is not possible with sin or tan
- if use sin to compare angle it will be max when they are orthogonal and low when they are near, giving complete reverse info of required
- tan because of ratio only gives value when both vector are perpendicular, again would not work for similarity comparision.

In [3]:
def cosine_fun(a, b):
	return np.dot(normalize(a), normalize(b))

In [4]:
vec1 = [1, 2, 3]
vect2 = [2, 3, 4]
vect3 = [-1, -2, -3]

In [5]:
print(f"similarity between {vec1} and {vect2} is {cosine_fun(vec1, vect2)}")
print(f"similarity between {vec1} and {vect3} is {cosine_fun(vec1, vect3)}")
print(f"similarity between {vect2} and {vect3} is {cosine_fun(vect2, vect3)}")

similarity between [1, 2, 3] and [2, 3, 4] is 0.9925833339709303
similarity between [1, 2, 3] and [-1, -2, -3] is -1.0
similarity between [2, 3, 4] and [-1, -2, -3] is -0.9925833339709303


In [6]:
import os

from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

In [7]:
API_KEY = os.getenv("EURI_API_KEY")

In [8]:
import requests

In [9]:
def generate_embeddings(text):
	url = "https://api.euron.one/api/v1/euri/embeddings"
	headers = {"Content-Type": "application/json", "Authorization": f"Bearer {API_KEY}"}
	payload = {"input": text, "model": "text-embedding-3-small"}

	response = requests.post(url, headers=headers, json=payload)
	data = response.json()

	# convert to numpy array for vector operations
	embedding = np.array(data["data"][0]["embedding"])

	return embedding

In [14]:
text1 = "my name is Tejas"
text2 = "I am Tejas"
text3 = "Gen AI is awesome"

In [15]:
embd1 = generate_embeddings(text1)
embd2 = generate_embeddings(text2)
embd3 = generate_embeddings(text3)

In [13]:
print(f"similarity between `{text1}` and `{text2}` is {cosine_fun(embd1, embd2)}")

similarity between `my name is Tejas` and `I am Tejas` is 0.8946471153891769


In [16]:
print(f"similarity between `{text1}` and `{text3}` is {cosine_fun(embd1, embd3)}")

similarity between `my name is Tejas` and `Gen AI is awesome` is 0.14948278342980978
