# Azure AI Search: Relevance

## Setup API clients

In [75]:
import os

# import azure.identity
import dotenv
import openai
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
from azure.core.credentials import AzureKeyCredential
import json

dotenv.load_dotenv()

# azure_credential = azure.identity.DefaultAzureCredential()

# Set up OpenAI client based on environment variables
dotenv.load_dotenv()
AZURE_OPENAI_SERVICE = "ragsearchpocopenai"
AZURE_OPENAI_ADA_DEPLOYMENT = "text-embedding-ada-002"

# azure_credential = azure.identity.DefaultAzureCredential()
azure_openai_key = os.getenv("AZURE_OPENAI_KEY")
# token_provider = azure.identity.get_bearer_token_provider(azure_openai_key,
#     "https://cognitiveservices.azure.com/.default")
openai_client = openai.AzureOpenAI(
    api_version="2024-06-01",
    azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    api_key=azure_openai_key,
)


def get_embedding(text):
    get_embeddings_response = openai_client.embeddings.create(
        model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text
    )
    return get_embeddings_response.data[0].embedding


# Initialize Azure search client
AZURE_SEARCH_SERVICE = "ragsearchpocsch"
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"

AZURE_SEARCH_FULL_INDEX = "ragsearch"
search_client = SearchClient(
    AZURE_SEARCH_ENDPOINT,
    AZURE_SEARCH_FULL_INDEX,
    credential=AzureKeyCredential(os.getenv("AZURE_SEARCH_KEY")),
)

#### Vector search is not always optimal

For example, consider searches for exact strings.

In [76]:
search_query = "$45.00"
search_vector = get_embedding(search_query)
r = search_client.search(None, top=3, vector_queries=[
    VectorizedQuery(vector=search_vector, k_nearest_neighbors=50, fields="vector")])
results = []
for doc in r:
    found_content = "Not found"
    if search_query.lower() in doc['chunk'].lower():
        found_content = doc['chunk'][doc['chunk'].lower().find(search_query.lower()):].replace("\n", " ")
    results.append({
        "score": round(doc['@search.score'],5),
        "content": found_content,
        "title": doc['title']
    })

print(json.dumps(results, indent=2))

[
  {
    "score": 0.81123,
    "content": "Not found",
    "title": "Northwind_Health_Plus_Benefits_Details.pdf"
  },
  {
    "score": 0.80838,
    "content": "$45.00</td>\r <td>$55.00</td>\r </tr>\r <tr>\r <td>Employee +1</td>\r <td>$65.00</td>\r <td>$71.00</td>\r </tr>\r <tr>\r <td>Employee +2 or more</td>\r <td>$78.00</td>\r <td>$89.00</td>\r </tr>\r </table>",
    "title": "Benefit_Options.pdf"
  },
  {
    "score": 0.80753,
    "content": "Not found",
    "title": "Northwind_Standard_Benefits_Details.pdf"
  }
]


Compare to a text search for same query:

In [77]:
r = search_client.search(search_query, top=3)
results = []
for doc in r:
    found_content = "Not found"
    if search_query.lower() in doc['chunk'].lower():
        found_content = doc['chunk'][doc['chunk'].lower().find(search_query.lower()):].replace("\n", " ")
    results.append({
        "score": round(doc['@search.score'],5),
        "content": found_content,
        "title": doc['title']
    })

print(json.dumps(results, indent=2))

[
  {
    "score": 3.863,
    "content": "$45.00</td>\r <td>$55.00</td>\r </tr>\r <tr>\r <td>Employee +1</td>\r <td>$65.00</td>\r <td>$71.00</td>\r </tr>\r <tr>\r <td>Employee +2 or more</td>\r <td>$78.00</td>\r <td>$89.00</td>\r </tr>\r </table>",
    "title": "Benefit_Options.pdf"
  }
]


#### Hybrid retrieval

Uses RRF to combine vector and text results.

In [78]:
search_vector = get_embedding(search_query)
r = search_client.search(search_query, top=15, vector_queries=[
    VectorizedQuery(vector=search_vector, k_nearest_neighbors=10, fields="vector")])
results = []
for doc in r:
    found_content = "Not found"
    if search_query.lower() in doc['chunk'].lower():
        found_content = doc['chunk'][doc['chunk'].lower().find(search_query.lower()):].replace("\n", " ")
    results.append({
        "score": round(doc['@search.score'],5),
        "content": found_content,
        "title": doc['title']
    })

print(json.dumps(results, indent=2))

[
  {
    "score": 0.03306,
    "content": "$45.00</td>\r <td>$55.00</td>\r </tr>\r <tr>\r <td>Employee +1</td>\r <td>$65.00</td>\r <td>$71.00</td>\r </tr>\r <tr>\r <td>Employee +2 or more</td>\r <td>$78.00</td>\r <td>$89.00</td>\r </tr>\r </table>",
    "title": "Benefit_Options.pdf"
  },
  {
    "score": 0.01667,
    "content": "Not found",
    "title": "Northwind_Health_Plus_Benefits_Details.pdf"
  },
  {
    "score": 0.01613,
    "content": "Not found",
    "title": "Northwind_Standard_Benefits_Details.pdf"
  },
  {
    "score": 0.01587,
    "content": "Not found",
    "title": "Northwind_Standard_Benefits_Details.pdf"
  },
  {
    "score": 0.01562,
    "content": "Not found",
    "title": "Northwind_Health_Plus_Benefits_Details.pdf"
  },
  {
    "score": 0.01538,
    "content": "Not found",
    "title": "Northwind_Standard_Benefits_Details.pdf"
  },
  {
    "score": 0.01515,
    "content": "Not found",
    "title": "Northwind_Standard_Benefits_Details.pdf"
  },
  {
    "score": 0.

#### Hybrid ranking is not always optimal

In [79]:
search_query = "learning about underwater activities"
search_vector = get_embedding(search_query)
r = search_client.search(search_query, top=5, vector_queries=[
    VectorizedQuery(vector=search_vector, k_nearest_neighbors=10, fields="vector")])
results = []
for doc in r:
    content = doc["chunk"].replace("\n", " ")
    results.append({
        "score": round(doc['@search.score'],5),
        "content": content,
        "title": doc['title']
    })
print(json.dumps(results, indent=2))

[
  {
    "score": 0.032,
    "content": "Introducing PerksPlus - the ultimate benefits program designed to support the health and wellness of\r employees. With PerksPlus, employees have the opportunity to expense up to $1000 for fitness-related\r programs, making it easier and more affordable to maintain a healthy lifestyle. PerksPlus is not only\r designed to support employees' physical health, but also their mental health. Regular exercise has been\r shown to reduce stress, improve mood, and enhance overall well-being. With PerksPlus, employees can\r invest in their health and wellness, while enjoying the peace of mind that comes with knowing they are\r getting the support they need to lead a healthy life.\r \r What is Covered?\r \r PerksPlus covers a wide range of fitness activities, including but not limited to:\r \r \u00b7 Gym memberships\r \r \u00b7 Personal training sessions\r \r \u00b7 Yoga and Pilates classes\r \r \u00b7 Fitness equipment purchases\r \r \u00b7 Sports team fee

#### Hybrid + semantic reranking 🎉

In [80]:
search_query = "learning about underwater activities"
search_vector = get_embedding(search_query)
r = search_client.search(
        search_query,
        top=5, 
        vector_queries=[
                VectorizedQuery(vector=search_vector, k_nearest_neighbors=50, fields="vector")],
        query_type="semantic",
        semantic_configuration_name="ragsearch-semantic-config",)
results = []
for doc in r:
    content = doc["chunk"].replace("\n", " ")
    results.append({
        "score": round(doc['@search.score'],5),
        "content": content,
        "Reranker":round(doc['@search.reranker_score'],5),
        "title": doc['title']
    })
print(json.dumps(results, indent=2))

[
  {
    "score": 0.032,
    "content": "Introducing PerksPlus - the ultimate benefits program designed to support the health and wellness of\r employees. With PerksPlus, employees have the opportunity to expense up to $1000 for fitness-related\r programs, making it easier and more affordable to maintain a healthy lifestyle. PerksPlus is not only\r designed to support employees' physical health, but also their mental health. Regular exercise has been\r shown to reduce stress, improve mood, and enhance overall well-being. With PerksPlus, employees can\r invest in their health and wellness, while enjoying the peace of mind that comes with knowing they are\r getting the support they need to lead a healthy life.\r \r What is Covered?\r \r PerksPlus covers a wide range of fitness activities, including but not limited to:\r \r \u00b7 Gym memberships\r \r \u00b7 Personal training sessions\r \r \u00b7 Yoga and Pilates classes\r \r \u00b7 Fitness equipment purchases\r \r \u00b7 Sports team fee