### Demo for calling a embedding model

This type of model can be fed with some texts and return the numerical encoding of the texts.
</br>
Typical usecase includes: assess similarity between text snippets, search and retrieve relevant texts by semantics ("meaning" of texts).

In [1]:
# Load in the packages and secrets

from dotenv import load_dotenv
import os
from openai import AzureOpenAI
env_path = os.path.join(".venv/.env")
load_dotenv(dotenv_path=env_path)

True

In [2]:
# create a client and point to the right resource

client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-02-01",
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )

# Please make sure the model has been deployed before using 
deployment_name='embedding_demo'#'document_summarisation' 

In [3]:
# Call the model, note that please specify the dimension (here uses 4, we tend to use larger and that'll be more costly)
# A single or collection of text snippet(s) can be passed as inputs

response = client.embeddings.create(
    input = ["Your text string goes here", "This is my second string."],
    model= deployment_name,
    dimensions= 4
)

In [4]:
# See the full print of results

print(response.model_dump_json(indent=2))

{
  "data": [
    {
      "embedding": [
        0.07968918234109879,
        -0.22091856598854065,
        -0.06623643636703491,
        0.9697718024253845
      ],
      "index": 0,
      "object": "embedding"
    },
    {
      "embedding": [
        0.8777574300765991,
        -0.22160345315933228,
        -0.3061474859714508,
        0.29446136951446533
      ],
      "index": 1,
      "object": "embedding"
    }
  ],
  "model": "text-embedding-3-large",
  "object": "list",
  "usage": {
    "prompt_tokens": 11,
    "total_tokens": 11
  }
}


In [5]:
# To access the embeddings only (converting to numpy array in this example)
import numpy as np

embedding_ = np.array([i.embedding for i in response.data])
embedding_

array([[ 0.07968918, -0.22091857, -0.06623644,  0.9697718 ],
       [ 0.87775743, -0.22160345, -0.30614749,  0.29446137]])

In [6]:
embedding_.shape

(2, 4)