#  AI Search - Search index

### Load .env file (Copy .env-sample to .env and update accordingly)

In [1]:
import os
from dotenv import load_dotenv

load_dotenv(override=True) # take environment variables from .env.

from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential

endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
credential = AzureKeyCredential(os.environ["AZURE_SEARCH_ADMIN_KEY"]) if len(os.environ["AZURE_SEARCH_ADMIN_KEY"]) > 0 else DefaultAzureCredential()
index_name = os.environ["AZURE_SEARCH_INDEX"]

## Perform a vector similarity search

This example shows a pure vector search using the vectorizable text query, all you need to do is pass in text and your vectorizer will handle the query vectorization.

If you indexed the health plan PDF file, send queries that ask plan-related questions.

### Dependencies
- https://learn.microsoft.com/en-us/azure/search/search-api-versions

In [2]:
#! pip install -r requirements.txt

### Global variables

In [3]:
from azure.search.documents import SearchClient

search_client = SearchClient(endpoint, index_name, credential=credential)

### Global evaluation helper

In [4]:
def evaluate_results(results):
    i=0 
    for result in results:  
        i=i+1
        print(f"Result index: {i}, score: {result['@search.score']}, title: {result['title']}") # , content: {result['content']}
        if '@search.reranker_score' in result and result['@search.reranker_score'] is not None:   
            print(f"Semantic reranker score: {result['@search.reranker_score']}")

In [16]:
query = "information theory"  
# query = "ZYX1"

### Perform a vector similarity search
This example shows a pure vector search using the vectorizable text query, all you need to do is pass in text and your vectorizer will handle the query vectorization.

In [17]:
from azure.search.documents.models import VectorizableTextQuery

# https://learn.microsoft.com/en-us/python/api/azure-search-documents/azure.search.documents.models.vectorizabletextquery?view=azure-python-preview
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=5, fields="contentVector", exhaustive=True)
# Use the below query to pass in the raw vector query instead of the query vectorization
# vector_query = RawVectorQuery(vector=generate_embeddings(query), k_nearest_neighbors=3, fields="vector")
  
results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "chunk_id", "content"],
    # filter="type eq 1",
    top=5
)  
  
evaluate_results(results)

Result index: 1, score: 0.78535944, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt
Result index: 2, score: 0.7845987, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt
Result index: 3, score: 0.7844443, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt
Result index: 4, score: 0.78249663, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt
Result index: 5, score: 0.78184956, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt


## Hybrid search using vectors and full text in Azure AI Search

https://learn.microsoft.com/en-us/azure/search/hybrid-search-overview

Hybrid search is a combination of full text and vector queries that execute against a search index that contains both searchable plain text content and generated embeddings. For query purposes, hybrid search is:

- A single query request that includes both search and vectors query parameters
- Executing in parallel
- With merged results in the query response, scored using Reciprocal Rank Fusion (RRF, https://learn.microsoft.com/en-us/azure/search/hybrid-search-ranking)


In [18]:
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=1, fields="contentVector", exhaustive=True)
  
results = search_client.search(  
    search_text=query,  
    vector_queries= [vector_query],
    select=["title", "chunk_id", "content"],
    top=5
)  

evaluate_results(results)

Result index: 1, score: 0.029487181454896927, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt
Result index: 2, score: 0.01666666753590107, title: push.txt
Result index: 3, score: 0.016393441706895828, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt
Result index: 4, score: 0.016129031777381897, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt
Result index: 5, score: 0.01587301678955555, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt


## Semantic ranking in Azure AI Search

https://learn.microsoft.com/en-us/azure/search/semantic-search-overview?source=recommendations

In [12]:
from azure.search.documents.models import (
    QueryType,
    QueryCaptionType,
    QueryAnswerType
)

vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=5, fields="contentVector", exhaustive=True)

results = search_client.search(  
    search_text=query,
    vector_queries=[vector_query],
    select=["title", "chunk_id", "content"],
    query_type=QueryType.SEMANTIC,
    semantic_configuration_name='my-semantic-config',
    query_caption=QueryCaptionType.EXTRACTIVE,
    query_answer=QueryAnswerType.EXTRACTIVE,
    top=4
)

semantic_answers = results.get_answers()
if semantic_answers:
    for answer in semantic_answers:
        if answer.highlights:
            print(f"Semantic Answer: {answer.highlights}")
        else:
            print(f"Semantic Answer: {answer.text}")
        print(f"Semantic Answer Score: {answer.score}\n")


evaluate_results(results)

Result index: 1, score: 0.01666666753590107, title: push.txt
Semantic reranker score: 2.292738914489746
Result index: 2, score: 0.016393441706895828, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt
Semantic reranker score: 1.3575366735458374
Result index: 3, score: 0.016129031777381897, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt
Semantic reranker score: 1.146905541419983
Result index: 4, score: 0.015625, title: 266 # Christoph Adami on How Information Makes Sense of Biology.txt
Semantic reranker score: 1.109375
