# Embeddings - Consine Similarity

- Simplest example of generating an embedding with a REST call

In [5]:
import common
import requests
import json

"""curl https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/embeddings?api-version=2023-05-15 \
  -H "Content-Type: application/json" \
  -H "api-key: YOUR_API_KEY" \
  -d "{\"input\": \"The food was delicious and the waiter...\"}"""

# Get a configured model
client = common.get_openai_client(api_key=common.api_KEY,
        api_version=common.api_version,
        azure_endpoint=common.api_URI)

In [6]:
def post_request(url:str, prompt:str):
    headers = {
        "Content-Type": "application/json",
        "api-key": common.api_KEY
    }
    response = requests.post(url, headers=headers, json={"input": prompt})
    return response.json()

In [7]:
def get_embedding(prompt:str):
    res = post_request(common.ada_full_URI, prompt)
    return (prompt,res['data'][0]['embedding'])

In [8]:
def cosine_similarity(v1, v2):
    dot_product = sum(a*b for a, b in zip(v1, v2))
    magnitude_A = sum(a*a for a in v1)**0.5
    magnitude_B = sum(b*b for b in v2)**0.5
    # cosine_similarity = dot_product(A*B) / (magnitude_A * magnitude_B) is the cosine of the angle
    # With numpy, it's simply:
    # dot_product = np.dot(A, B)
    # magnitude_A = np.linalg.norm(A)
    # magnitude_B = np.linalg.norm(B)
    # cosine_similarity = dot_product / (magnitude_A * magnitude_B)
    return dot_product / (magnitude_A * magnitude_B)

### Prepare the mock vector database

In [27]:
vector_database = [
    {"content":"The chemical composition of water is H2O.", "embedding":[]},
    {"content":"The speed of light is 300,000 km/s.", "embedding":[]},
    {"content":"Acceleration of gravity on earth is 9.8m/s^2.", "embedding":[]},
    {"content":"The chemical composition of salt or sodium clorida is NaCl.", "embedding":[]},
    {"content":"", "embedding":[]},
]
for prompt in vector_database:
    prompt["embedding"] = get_embedding(prompt["content"])[1]


### Embed the question

In [28]:
(p1,e1) = get_embedding("What is the speed of light?")

### Nearest search

In [29]:
limit =3
relevance=0.5
count = 0
local_list = []
for entry in vector_database:
    cs = cosine_similarity(e1, entry["embedding"])
    if cs>relevance:
        #print(entry)
        local_list.append({"content":entry["content"],"similarity":cs})
    count+=1    
    if count==limit:
        break
    #if (cs>0.7):
    #    print(f"Content: {entry["prompt"]} Similarity: {cs}")

{'content': 'The chemical composition of water is H2O.', 'embedding': [0.022812204, 0.033907063, -0.005532173, -0.027023125, -0.006658137, -0.0070243035, -0.019748608, -0.035029978, -0.00016563336, -0.02624197, -0.0045526763, 0.03317473, -0.009642398, 0.007903105, -0.0069815842, -0.006584903, 0.018735545, -0.0050714132, 0.027828693, -0.00016267732, -0.015977086, 0.0049279975, 0.012107923, 0.0044062096, -0.029757172, 0.016294431, 0.032222696, -0.023629978, 0.01129015, -0.029073661, -0.0015958779, -0.014585652, -0.036958456, -0.0063285865, 0.028390149, 0.0031673447, -0.0034907921, -0.00028778438, 0.0059807277, -0.009599678, 0.006014293, 0.01550107, 0.017222054, 0.0081472155, -0.017063383, -0.008232655, 0.0078115626, -0.008897859, -0.014634475, 0.017820127, 0.013731263, -0.01260835, -0.005623715, -0.025924625, 0.010856852, -0.0047174515, 0.009227409, -0.0026302997, 0.005718308, -0.026779015, -0.0016843682, 0.0062736613, -0.013072162, -0.0021191915, -0.018808778, -0.011174196, -0.001313624