# Consine Similarity - Near Search

- Perform a near search using cosine similarity

### Importt packages

In [17]:
import common
import requests

"""curl https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/embeddings?api-version=2023-05-15 \
  -H "Content-Type: application/json" \
  -H "api-key: YOUR_API_KEY" \
  -d "{\"input\": \"The food was delicious and the waiter...\"}"""

# Get a configured model
client = common.get_openai_client(api_key=common.api_KEY,
        api_version=common.api_version,
        azure_endpoint=common.api_URI)

### Make a POST request

In [18]:
def post_request(url:str, input:str):
    headers = {
        "Content-Type": "application/json",
        "api-key": common.api_KEY
    }
    response = requests.post(url, headers=headers, json={"input": input})
    return response.json()

### Get an embedding

In [19]:
def get_embedding(input:str):
    res = post_request(common.ada_full_URI, input)
    return (input,res['data'][0]['embedding'])

### Calculate the Cosine Similarity

In [20]:
def cosine_similarity(v1, v2):
    dot_product = sum(a*b for a, b in zip(v1, v2))
    magnitude_A = sum(a*a for a in v1)**0.5
    magnitude_B = sum(b*b for b in v2)**0.5
    # cosine_similarity = dot_product(A*B) / (magnitude_A * magnitude_B) is the cosine of the angle
    # With numpy, it's simply:
    # dot_product = np.dot(A, B)
    # magnitude_A = np.linalg.norm(A)
    # magnitude_B = np.linalg.norm(B)
    # cosine_similarity = dot_product / (magnitude_A * magnitude_B)
    return dot_product / (magnitude_A * magnitude_B)

### Prepare the mock vector database

In [21]:
content = [
    "The chemical composition of water is H2O.",
    "The speed of light is 300,000 km/s.",
    "Acceleration of gravity on earth is 9.8m/s^2.",
    "The chemical composition of salt or sodium clorida is NaCl.",
]
vector_database = [get_embedding(c) for c in content]

### Embed the question

In [22]:
(p1,e1) = get_embedding("What is the speed of light?")

### Perform Nearest search

In [27]:
limit =3
relevance=0.5
count = 0
results_list = []
for entry in vector_database:
    (content,entry_embedding) = entry
    cs = cosine_similarity(e1, entry_embedding)
    if cs>relevance:
        results_list.append((content,cs))
    count+=1    
    if count==limit:
        break

### Print Results

In [28]:
results_list.sort(key=lambda x: x[1], reverse=True)
for entry in results_list:
    print(f"Similarity: {entry[1]}, Content: {entry[0]}")

Similarity: 0.9076484494720922, Content: The speed of light is 300,000 km/s.
Similarity: 0.8044927978430234, Content: Acceleration of gravity on earth is 9.8m/s^2.
Similarity: 0.7380432345349796, Content: The chemical composition of water is H2O.
