# Azure AI Search: vector search, step by step

## Setup API client


In [1]:
import os

import azure.identity
import dotenv
from azure.search.documents.indexes import SearchIndexClient
from azure.core.credentials import AzureKeyCredential

dotenv.load_dotenv()

AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"
AZURE_SEARCH_SERVICE_KEY = os.getenv("AZURE_SEARCH_SERVICE_KEY")
# azure_credential = azure.identity.AzureDeveloperCliCredential(tenant_id=os.getenv("AZURE_TENANT_ID"))
search_service_cred = AzureKeyCredential(AZURE_SEARCH_SERVICE_KEY)
index_client = SearchIndexClient(endpoint=AZURE_SEARCH_ENDPOINT, credential=search_service_cred)

## Search a tiny index

### Create index

In [2]:
from azure.search.documents.indexes.models import (
    HnswAlgorithmConfiguration,
    HnswParameters,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SimpleField,
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchProfile,
)

AZURE_SEARCH_TINY_INDEX = "teeenytinyindex"

index = SearchIndex(
    name=AZURE_SEARCH_TINY_INDEX, 
    fields=[
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SearchField(name="embedding", 
                    type=SearchFieldDataType.Collection(SearchFieldDataType.Single), 
                    searchable=True, 
                    vector_search_dimensions=3,
                    vector_search_profile_name="embedding_profile")
    ],
    vector_search=VectorSearch(
        algorithms=[HnswAlgorithmConfiguration( # Hierachical Navigable Small World, IVF
                            name="hnsw_config",
                            kind=VectorSearchAlgorithmKind.HNSW,
                            parameters=HnswParameters(metric="cosine"),
                        )],
        profiles=[VectorSearchProfile(name="embedding_profile", algorithm_configuration_name="hnsw_config")]
    )
)

index_client.create_index(index)

HttpResponseError: (ResourceNameAlreadyInUse) Cannot create index 'teeenytinyindex' because it already exists.
Code: ResourceNameAlreadyInUse
Message: Cannot create index 'teeenytinyindex' because it already exists.
Exception Details:	(CannotCreateExistingIndex) Cannot create index 'teeenytinyindex' because it already exists.
	Code: CannotCreateExistingIndex
	Message: Cannot create index 'teeenytinyindex' because it already exists.

### Insert a few documents with tiny vectors

In [8]:
from azure.search.documents import SearchClient

search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_TINY_INDEX, credential=key_cred)
search_client.upload_documents(documents=[
    {"id": "1", "embedding": [1, 2, 3]},
    {"id": "2", "embedding": [1, 1, 3]},
    {"id": "3", "embedding": [4, 5, 6]}])

[<azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89e142d190>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89e142d890>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89e142dc90>]

### Search using vector similarity

In [9]:
from azure.search.documents.models import VectorizedQuery

r = search_client.search(search_text=None, vector_queries=[
    VectorizedQuery(vector=[-2, -1, -1], k_nearest_neighbors=3, fields="embedding")])
for doc in r:
    print(f"id: {doc['id']}, score: {doc['@search.score']}")

id: 2, score: 0.36515692
id: 1, score: 0.3618256
id: 3, score: 0.34674543


## Search a larger index

In [4]:
import azure.identity
import dotenv
import openai

dotenv.load_dotenv()

# Initialize Azure search variables
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"

# Set up OpenAI client based on environment variables
dotenv.load_dotenv()
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv("AZURE_OPENAI_ADA_DEPLOYMENT")

token_provider = azure.identity.get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
openai_client = openai.AzureOpenAI(
    api_version="2023-07-01-preview",
    azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    azure_ad_token_provider=token_provider)

def get_embedding(text):
    get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)
    return get_embeddings_response.data[0].embedding

In [30]:
products = []
with open("./data/products.txt", "r") as f:
    for line in f:
        products.append(line)

In [31]:
products

['Dell, $999.00, Dell laptops are known for their durability and performance, making them a popular choice for both personal and professional use.\n',
 'Lenovo, $899.00, Lenovo laptops offer a great balance of performance and price, with a variety of models suitable for different needs.\n',
 'HP, $799.00, HP laptops are versatile and reliable, with a range of options from budget-friendly to high-end models.\n',
 'Apple, $1299.00, Apple laptops are known for their sleek design and powerful performance, ideal for creative professionals.\n',
 'Asus, $699.00, Asus laptops provide excellent value for money, with a focus on gaming and high-performance computing.\n',
 'Dell Laptop, $999.00, Dell laptops are known for their durability and performance, making them a popular choice for both personal and professional use. They come with a variety of features including high-resolution displays, powerful processors, and long battery life, ensuring that users can work efficiently and effectively.\n'

Create new index

In [32]:
from azure.search.documents.indexes.models import SearchableField
AZURE_SEARCH_FULL_INDEX = "gptkbindex"

index = SearchIndex(
    name=AZURE_SEARCH_FULL_INDEX, 
    fields=[
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SearchableField(name="content", type=SearchFieldDataType.String),
        SearchableField(name="sourcefile", type=SearchFieldDataType.String),
        SearchField(name="embedding", 
                    type=SearchFieldDataType.Collection(SearchFieldDataType.Single), 
                    searchable=True, 
                    vector_search_dimensions=1536,
                    vector_search_profile_name="embedding_profile")
    ],
    vector_search=VectorSearch(
        algorithms=[HnswAlgorithmConfiguration( # Hierachical Navigable Small World, IVF
                            name="hnsw_config",
                            kind=VectorSearchAlgorithmKind.HNSW,
                            parameters=HnswParameters(metric="cosine"),
                        )],
        profiles=[VectorSearchProfile(name="embedding_profile", algorithm_configuration_name="hnsw_config")]
    )
)

index_client.create_index(index)

<azure.search.documents.indexes.models._index.SearchIndex at 0x7f89e1476f10>

Index document

In [33]:
# Create vectors from documents
documents = [{"id": str(i), "content": prod, "embedding": get_embedding(prod), "sourcefile": "products.txt"} for i, prod in enumerate(products)]

In [None]:
documents[0]

Upload to Vector DB

In [34]:

search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=key_cred)
search_client.upload_documents(documents=documents)

[<azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba682fd0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba683f90>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba682dd0>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba683a90>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba5d8190>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba5db450>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba5dba50>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba5db310>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba5d8850>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba5dae50>,
 <azure.search.documents._generated.models._models_py3.IndexingResult at 0x7f89ba5dba10>,
 <azure.se

In [36]:
AZURE_SEARCH_FULL_INDEX = "gptkbindex"
# search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)

search_query = "do you have laptop?"
search_vector = get_embedding(search_query)
r = search_client.search(search_text=None, top=5, vector_queries=[
    VectorizedQuery(vector=search_vector, k_nearest_neighbors=5, fields="embedding")])
for doc in r:
    content = doc["content"].replace("\n", " ")[:150]
    print(f"Score: {doc['@search.score']:.5f}\tContent:{content}")

Score: 0.83161	Content:Dell, $999.00, Dell laptops are known for their durability and performance, making them a popular choice for both personal and professional use. 
Score: 0.83101	Content:Dell Laptop, $999.00, Dell laptops are known for their durability and performance, making them a popular choice for both personal and professional use
Score: 0.82883	Content:HP, $799.00, HP laptops are versatile and reliable, with a range of options from budget-friendly to high-end models. 
Score: 0.82771	Content:Lenovo Laptop, $899.00, Lenovo laptops offer a great balance of performance and price, with a variety of models suitable for different needs. Whether 
Score: 0.82685	Content:Lenovo, $899.00, Lenovo laptops offer a great balance of performance and price, with a variety of models suitable for different needs. 
