In [22]:
import os
import numpy as np


from azure.ai.inference import EmbeddingsClient
from azure.core.credentials import AzureKeyCredential


In [None]:

endpoint = "https://models.inference.ai.azure.com"
token = os.environ["GITHUB_TOKEN"]

embedding_models = [
    "text-embedding-3-small",
    "text-embedding-3-large",
    "Cohere-embed-v3-english",
    "Cohere-embed-v3-multilingual",
]

In [5]:
# Import Required Libraries
import ipywidgets as widgets
from IPython.display import display

# Example of creating a simple dropdown widget
dropdown = widgets.Dropdown(
    options=embedding_models,
    value=embedding_models[0],
    description='Select:',
)

# Display the dropdown widget
display(dropdown)

Dropdown(description='Select:', options=('text-embedding-3-small', 'text-embedding-3-large', 'Cohere-embed-v3-…

In [15]:
selected_model = dropdown.value
print(selected_model)

Cohere-embed-v3-multilingual


In [12]:
client = EmbeddingsClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(token)
)


In [13]:
response = client.embed(
    input=["first phrase", "second phrase", "third phrase"],
    model=selected_model
)


In [14]:

for item in response.data:
    length = len(item.embedding)
    print(
        f"data[{item.index}]: length={length}, "
        f"[{item.embedding[0]}, {item.embedding[1]}, "
        f"..., {item.embedding[length-2]}, {item.embedding[length-1]}]"
    )
print(response.usage)

data[0]: length=1024, [0.0013399124, -0.01576233, ..., 0.007843018, 0.000238657]
data[1]: length=1024, [0.036590576, -0.0059547424, ..., 0.011405945, 0.004863739]
data[2]: length=1024, [0.04196167, 0.029083252, ..., -0.0027484894, 0.0073127747]
{'prompt_tokens': 6, 'completion_tokens': 0, 'total_tokens': 6}


# Embeddings and Similarity

In [29]:
def embed_and_compare_similarity(input_msgs,selected_model):

    response = client.embed(
        input=input_msgs,
        model=selected_model
    )


    for item in response.data:
        length = len(item.embedding)
        print(
            f"data[{item.index}]: length={length}, "
            f"[{item.embedding[0]}, {item.embedding[1]}, "
            f"..., {item.embedding[length-2]}, {item.embedding[length-1]}]"
        )
    print(response.usage)

    embedding_a = response.data[0].embedding
    embedding_b = response.data[1].embedding

    similarity_score = np.dot(embedding_a, embedding_b)
    print(f"Similarity Score: {similarity_score:.4f}")

In [30]:
embed_and_compare_similarity(
    input_msgs=["first phrase", "second phrase"],
    selected_model=selected_model
)

data[0]: length=1024, [0.029876709, 0.050628662, ..., 0.010917664, -0.043762207]
data[1]: length=1024, [-0.0034561157, 0.022415161, ..., -0.0045433044, -0.054626465]
{'prompt_tokens': 4, 'completion_tokens': 0, 'total_tokens': 4}
Similarity Score: 0.8547


In [None]:
embed_and_compare_similarity(
    [
        'Azure OpenAI is a cloud-based AI platform.',
        'The Eiffel Tower.'
    ],
    selected_model
)

data[0]: length=1024, [-0.0071868896, -0.009231567, ..., 0.027679443, 0.0026359558]
data[1]: length=1024, [0.034942627, 0.016296387, ..., 0.070129395, -0.06185913]
{'prompt_tokens': 15, 'completion_tokens': 0, 'total_tokens': 15}
Similarity Score: 0.3837


In [None]:
embed_and_compare_similarity(
    [
        'The Sydney Harbour Bridge.',
        'The Eiffel Tower.'
    ],
    selected_model
)

data[0]: length=1024, [0.044952393, 0.064575195, ..., -0.019424438, -0.04840088]
data[1]: length=1024, [0.034942627, 0.016296387, ..., 0.070129395, -0.06185913]
{'prompt_tokens': 10, 'completion_tokens': 0, 'total_tokens': 10}
Similarity Score: 0.6571
