### Setup

In [4]:
import os

import azure.identity
import dotenv
import openai
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    HnswAlgorithmConfiguration,
    HnswParameters,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SimpleField,
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchProfile,
)
from azure.search.documents.models import VectorizedQuery

dotenv.load_dotenv()

# Initialize Azure search variables
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"
AZURE_SEARCH_SERVICE_KEY = os.getenv("AZURE_SEARCH_SERVICE_KEY")


# Set up OpenAI client based on environment variables
dotenv.load_dotenv()
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv("AZURE_OPENAI_ADA_DEPLOYMENT")
AZURE_OPENAI_SERVICE_KEY = os.getenv("AZURE_OPENAI_SERVICE_KEY")

azure_credential = azure.identity.DefaultAzureCredential()
azure_search_credential = AzureKeyCredential(AZURE_SEARCH_SERVICE_KEY)

token_provider = azure.identity.get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
print(azure_credential.get_token("https://cognitiveservices.azure.com/.default"))

openai_client = openai.AzureOpenAI(
    api_version="2023-07-01-preview",
    azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    api_key=AZURE_OPENAI_SERVICE_KEY)
    #azure_ad_token_provider=token_provider)

def get_embedding(text):
    get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)
    return get_embeddings_response.data[0].embedding

AccessToken(token='eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IkwxS2ZLRklfam5YYndXYzIyeFp4dzFzVUhIMCIsImtpZCI6IkwxS2ZLRklfam5YYndXYzIyeFp4dzFzVUhIMCJ9.eyJhdWQiOiJodHRwczovL2NvZ25pdGl2ZXNlcnZpY2VzLmF6dXJlLmNvbSIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzQ0NjBkNmM3LTNjZGQtNGQ4NS1iZGE0LTg3Yzg1Yzk4YWYwNC8iLCJpYXQiOjE3MTU5MTQwNDQsIm5iZiI6MTcxNTkxNDA0NCwiZXhwIjoxNzE1OTE3OTQ0LCJhaW8iOiJFMk5nWU5qRi9mRnZuZlFYNzRWWGpTdVBYUG4wRlFBPSIsImFwcGlkIjoiNWVlMWUwZmItNWZjMC00MWI1LTk0N2MtZGUxYTRiOWQxNjE3IiwiYXBwaWRhY3IiOiIxIiwiaWRwIjoiaHR0cHM6Ly9zdHMud2luZG93cy5uZXQvNDQ2MGQ2YzctM2NkZC00ZDg1LWJkYTQtODdjODVjOThhZjA0LyIsIm9pZCI6ImI1ODZlN2Y5LTA2Y2EtNDg0OC05YTJlLWFmYjQ5YTQ0NDEzZCIsInJoIjoiMC5BVmtBeDlaZ1JOMDhoVTI5cElmSVhKaXZCSkFpTVgzSUtEeEhvTzJPVTNTYmJXMmRBQUEuIiwic3ViIjoiYjU4NmU3ZjktMDZjYS00ODQ4LTlhMmUtYWZiNDlhNDQ0MTNkIiwidGlkIjoiNDQ2MGQ2YzctM2NkZC00ZDg1LWJkYTQtODdjODVjOThhZjA0IiwidXRpIjoiZ3JoTE1lREljRVNmTHdGamlwYlZBQSIsInZlciI6IjEuMCJ9.dnw7KBr7fkLvQmlXcVsG_r0d_P_cYyltcXq5UYW7I0koytgNcN_7GwfQzo7gERgyMeGLIgRiAiL

## Azure AI Search: vector search, step by step

### Create a tiny vector index

In [5]:


AZURE_SEARCH_TINY_INDEX = "teenyindex"

index = SearchIndex(
    name=AZURE_SEARCH_TINY_INDEX, 
    fields=[
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SearchField(name="embedding", 
                    type=SearchFieldDataType.Collection(SearchFieldDataType.Single), 
                    searchable=True, 
                    vector_search_dimensions=3,
                    vector_search_profile_name="embedding_profile")
    ],
    vector_search=VectorSearch(
        algorithms=[HnswAlgorithmConfiguration( # Hierachical Navigable Small World, IVF
                            name="hnsw_config",
                            kind=VectorSearchAlgorithmKind.HNSW,
                            parameters=HnswParameters(metric="cosine"),
                        )],
        profiles=[VectorSearchProfile(name="embedding_profile", algorithm_configuration_name="hnsw_config")]
    )
)

index_client = SearchIndexClient(endpoint=AZURE_SEARCH_ENDPOINT, credential=azure_search_credential)
index_client.create_index(index)

HttpResponseError: (ResourceNameAlreadyInUse) Cannot create index 'teenyindex' because it already exists.
Code: ResourceNameAlreadyInUse
Message: Cannot create index 'teenyindex' because it already exists.
Exception Details:	(CannotCreateExistingIndex) Cannot create index 'teenyindex' because it already exists.
	Code: CannotCreateExistingIndex
	Message: Cannot create index 'teenyindex' because it already exists.

### Insert a few documents with tiny vectors

In [6]:
search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_TINY_INDEX, credential=azure_search_credential)
search_client.upload_documents(documents=[
    {"id": "1", "embedding": [1, 2, 3]},
    {"id": "2", "embedding": [1, 1, 3]},
    {"id": "3", "embedding": [4, 5, 6]}])

[<azure.search.documents._generated.models._models_py3.IndexingResult at 0x73b947168790>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x73b9471d8190>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x73b9471db710>]

### Search using vector similarity

In [7]:
r = search_client.search(search_text=None, vector_queries=[
    VectorizedQuery(vector=[-2, -1, -1], k_nearest_neighbors=3, fields="embedding")])
for doc in r:
    print(f"id: {doc['id']}, score: {doc['@search.score']}")

id: 2, score: 0.36515692
id: 1, score: 0.3618256
id: 3, score: 0.34674543


### Searching on a larger index

In [8]:
AZURE_SEARCH_FULL_INDEX = "vector-1715911082937"
search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_search_credential)

### Application performs vectorization

In [9]:

search_query = "learning about underwater activities"
search_vector = get_embedding(search_query)
vector_query = VectorizedQuery(vector=search_vector, k_nearest_neighbors=5, fields="vector")

r = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["parent_id", "chunk_id", "chunk"],
    top=5
)  
for doc in r:
    content = doc["chunk"].replace("\n", " ")[:150]
    print(f"Score: {doc['@search.score']:.5f}\tContent:{content}")

Score: 0.79683	Content:Yoga and Pilates classes   • Fitness equipment purchases   • Sports team fees   • Health retreats and spas   • Outdoor adventure activities (such as r
Score: 0.78356	Content:to help employees develop and grow in their   roles.      Employees will receive a written summary of their performance review which will be   discuss
Score: 0.78137	Content:with attending clinical trials. Additionally, any experimental treatments   or services that are not part of the clinical trial are not covered.    Wh
Score: 0.78087	Content:treatment of dental injuries,   such as root canals, crowns, fillings, extractions, and periodontal services. This coverage   also includes dental ane
Score: 0.78068	Content:such as nausea, vomiting, and dizziness. Therefore, it is     important to make sure you are informed about the potential risks associated with any   


### Searching with Integrated Vectorization

In [14]:
from azure.search.documents.models import VectorizableTextQuery

search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_search_credential)

search_query = "learning about underwater activities"
vector_query = VectorizableTextQuery(text=search_query, k_nearest_neighbors=3, fields="vector", exhaustive=True)

r = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["parent_id", "chunk_id", "chunk"],
    top=5
)  

for doc in r:
    content = doc["chunk"].replace("\n", " ")[:150]
    print(f"Score: {doc['@search.score']:.5f}\tparent_id: {doc['parent_id']}\tContent:{content}")

Score: 0.79683	parent_id: aHR0cHM6Ly9haTEwMnN0cjM2MTI1MTMzLmJsb2IuY29yZS53aW5kb3dzLm5ldC9kYXRhL1BlcmtzUGx1cy5wZGY1	Content:Yoga and Pilates classes   • Fitness equipment purchases   • Sports team fees   • Health retreats and spas   • Outdoor adventure activities (such as r
Score: 0.78356	parent_id: aHR0cHM6Ly9haTEwMnN0cjM2MTI1MTMzLmJsb2IuY29yZS53aW5kb3dzLm5ldC9kYXRhL2VtcGxveWVlX2hhbmRib29rLnBkZg2	Content:to help employees develop and grow in their   roles.      Employees will receive a written summary of their performance review which will be   discuss
Score: 0.78137	parent_id: aHR0cHM6Ly9haTEwMnN0cjM2MTI1MTMzLmJsb2IuY29yZS53aW5kb3dzLm5ldC9kYXRhL05vcnRod2luZF9TdGFuZGFyZF9CZW5lZml0c19EZXRhaWxzLnBkZg2	Content:with attending clinical trials. Additionally, any experimental treatments   or services that are not part of the clinical trial are not covered.    Wh
