In [1]:
import os
from openai import AzureOpenAI
import numpy as np
from dotenv import load_dotenv
load_dotenv()

client = AzureOpenAI(
  api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version = os.getenv("AZURE_OPENAI_API_VERSION"),
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
)

In [2]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [3]:
text = 'the quick brown fox jumped over the lazy dog'
# Make sure the deployment name is correct
model = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT")
if not model:
	raise ValueError("Please set the AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT environment variable to your deployment name.")

client.embeddings.create(input=[text], model=model).data[0].embedding

[-0.00457075284793973,
 0.009826643392443657,
 -0.01506032980978489,
 -0.006350206211209297,
 -0.011406265199184418,
 0.015542463399469852,
 -0.024157429113984108,
 -0.015872344374656677,
 -0.015999222174286842,
 -0.029587775468826294,
 0.021543757990002632,
 0.02127731405198574,
 0.01896814815700054,
 0.004006149247288704,
 -0.0007402494666166604,
 -0.007644354365766048,
 0.024893317371606827,
 -0.004237700253725052,
 0.011127134785056114,
 -0.008576902560889721,
 -0.008957534097135067,
 0.02120118774473667,
 -0.00641364511102438,
 -0.00848808791488409,
 0.006423160899430513,
 0.013284048065543175,
 0.007504789158701897,
 -0.003229025984182954,
 -0.00906537938863039,
 0.0013853409327566624,
 0.006591273006051779,
 0.003971257712692022,
 -0.039433449506759644,
 -0.002605741610750556,
 -0.012713100761175156,
 -0.021670633926987648,
 -0.0037270192988216877,
 -0.010461029596626759,
 0.026187464594841003,
 -0.04544743150472641,
 0.009407947771251202,
 0.01572009176015854,
 -0.0224192105233

In [4]:
# compare several words
automobile_embedding    = client.embeddings.create(input = 'automobile', model=model).data[0].embedding
vehicle_embedding       = client.embeddings.create(input = 'vehicle', model=model).data[0].embedding
dinosaur_embedding      = client.embeddings.create(input = 'dinosaur', model=model).data[0].embedding
stick_embedding         = client.embeddings.create(input = 'stick', model=model).data[0].embedding

# comparing cosine similarity, automobiles vs automobiles should be 1.0, i.e exactly the same, while automobiles vs dinosaurs should be between 0 and 1, i.e. not the same
print(cosine_similarity(automobile_embedding, automobile_embedding))
print(cosine_similarity(automobile_embedding, vehicle_embedding))
print(cosine_similarity(automobile_embedding, dinosaur_embedding))
print(cosine_similarity(automobile_embedding, stick_embedding))

1.0
0.915739416418029
0.8329017718558467
0.7818148774886836
