# BEIR Benchmarking for Azure AI Search (Part2)

In this part2 notebook, we will show how to use the BEIR package to benchmark Azure AI Search with various types of searches.


## Preparation


### Configuration


In [None]:
import os
import json

# Vectorize query text if True
vectorize_query = False

# BEIR datasets to download
dataset_name = "scifact"

### Environment variables


In [None]:
# Load environment variabls from .env file
from dotenv import load_dotenv

load_dotenv()

### Connect to Azure AI Search


In [None]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents import SearchClient

service_name = "benchmark-ai-search"
index_name = dataset_name + "-vector"

admin_key = os.environ["SEARCH_ADMIN_KEY"]
endpoint = "https://{}.search.windows.net/".format(service_name)

# admin_client = SearchIndexClient(endpoint=endpoint,
#                     index_name=index_name,
#                     credential=AzureKeyCredential(admin_key))

search_client = SearchClient(
    endpoint=endpoint, index_name=index_name, credential=AzureKeyCredential(admin_key)
)

### Download BEIR datasets


In [None]:
from beir import util, LoggingHandler
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval.search.lexical import BM25Search as BM25

In [None]:
url = (
    "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(
        dataset_name
    )
)
out_dir = "./datasets"
data_path = util.download_and_unzip(url, out_dir)

In [None]:
corpus, queries, qrels = GenericDataLoader(data_path).load(
    split="test"
)  # pull data from corpus and queries

### Vectorize Queries


In [None]:
from azure.search.documents.models import VectorizedQuery

In [None]:
from openai import AzureOpenAI
from tenacity import retry, wait_random_exponential, stop_after_attempt

openai_client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2023-05-15",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
)

model = "text-embedding-ada-002-v2"


# Generate Document Embeddings using OpenAI Ada 002
# Read the text-sample.json
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
# Function to generate embeddings for title and content fields, also used for query embeddings
def generate_embeddings(text, model=model):
    return openai_client.embeddings.create(input=[text], model=model).data[0].embedding

In [None]:
if vectorize_query == True:
    query_ids = list(queries)
    queriesVector = {}
    dict_results = {}
    for query_id in query_ids:
        query = queries[query_id]
        queriesVector[query_id] = generate_embeddings(query)
    with open("scifact_query_vector.json", "w") as f:
        json.dump(queriesVector, f)
else:
    queriesVector = json.load(open("scifact_query_vector.json"))

## Search for BEIR dataset


### Simple


In [None]:
query_ids = list(queries)
dict_results = {}
for query_id in query_ids:
    query = queries[query_id]
    results = search_client.search(
        search_text=query,
        include_total_count=True,
        select="corpusId, title, text",
        top=100,
        query_type="simple",
    )
    id_score = {}
    for result in results:
        id_score[result["corpusId"]] = result["@search.score"]
    dict_results[query_id] = id_score

# Evaluate the performance
from beir.retrieval.evaluation import EvaluateRetrieval

ndcg, _map, recall, precision = EvaluateRetrieval.evaluate(
    qrels, dict_results, [1, 3, 5, 10, 50, 100]
)
print(ndcg, _map, recall, precision)

### Semantic search


In [None]:
query_ids = list(queries)
dict_results = {}
for query_id in query_ids:
    query = queries[query_id]
    results = search_client.search(
        search_text=query,
        include_total_count=True,
        select="corpusId, title, text",
        top=50,
        semantic_configuration_name="my-semantic-config",
        query_type="semantic",
    )
    id_score = {}
    for result in results:
        id_score[result["corpusId"]] = result["@search.reranker_score"]
    dict_results[query_id] = id_score

# Evaluate the performance
from beir.retrieval.evaluation import EvaluateRetrieval

ndcg, _map, recall, precision = EvaluateRetrieval.evaluate(
    qrels, dict_results, [1, 3, 5, 10, 50, 100]
)
print(ndcg, _map, recall, precision)

### Vector Search


#### HNSW


In [None]:
query_ids = list(queries)
dict_results = {}
for query_id in query_ids:
    query = queries[query_id]
    vector_query = VectorizedQuery(
        vector=queriesVector[query_id],
        k_nearest_neighbors=50,
        fields="titleVector, textVector",
    )
    results = search_client.search(
        vector_queries=[vector_query],
        include_total_count=True,
        select="corpusId, title, text",
        top=50,
    )
    id_score = {}
    for result in results:
        id_score[result["corpusId"]] = result["@search.score"]
    dict_results[query_id] = id_score

# Evaluate the performance
from beir.retrieval.evaluation import EvaluateRetrieval

ndcg, _map, recall, precision = EvaluateRetrieval.evaluate(
    qrels, dict_results, [1, 3, 5, 10, 50, 100]
)
print(ndcg, _map, recall, precision)

#### Exhausive KNN


In [None]:
query_ids = list(queries)
dict_results = {}
for query_id in query_ids:
    query = queries[query_id]
    vector_query = VectorizedQuery(
        vector=queriesVector[query_id],
        k_nearest_neighbors=50,
        fields="titleVector, textVector",
        exhaustive=True,
    )
    results = search_client.search(
        vector_queries=[vector_query],
        include_total_count=True,
        select="corpusId, title, text",
        top=50,
    )
    id_score = {}
    for result in results:
        id_score[result["corpusId"]] = result["@search.score"]
    dict_results[query_id] = id_score

# Evaluate the performance
from beir.retrieval.evaluation import EvaluateRetrieval

ndcg, _map, recall, precision = EvaluateRetrieval.evaluate(
    qrels, dict_results, [1, 3, 5, 10, 50, 100]
)
print(ndcg, _map, recall, precision)

### Hybrid search


#### HNSW + Reranker


In [None]:
query_ids = list(queries)
dict_results = {}
for query_id in query_ids:
    query = queries[query_id]
    vector_query = VectorizedQuery(
        vector=queriesVector[query_id],
        k_nearest_neighbors=50,
        fields="titleVector, textVector",
    )
    results = search_client.search(
        search_text=query,
        vector_queries=[vector_query],
        include_total_count=True,
        select="corpusId, title, text",
        top=50,
        semantic_configuration_name="my-semantic-config",
        query_type="semantic",
    )
    id_score = {}
    for result in results:
        id_score[result["corpusId"]] = result["@search.reranker_score"]
    dict_results[query_id] = id_score

# Evaluate the performance
from beir.retrieval.evaluation import EvaluateRetrieval

ndcg, _map, recall, precision = EvaluateRetrieval.evaluate(
    qrels, dict_results, [1, 3, 5, 10, 50, 100]
)
print(ndcg, _map, recall, precision)

#### ExhaustiveKNN + Reranker


In [None]:
query_ids = list(queries)
dict_results = {}
for query_id in query_ids:
    query = queries[query_id]
    vector_query = VectorizedQuery(
        vector=queriesVector[query_id],
        k_nearest_neighbors=50,
        fields="titleVector, textVector",
        exhaustive=True,
    )
    results = search_client.search(
        search_text=query,
        vector_queries=[vector_query],
        include_total_count=True,
        select="corpusId, title, text",
        top=50,
        semantic_configuration_name="my-semantic-config",
        query_type="semantic",
    )
    id_score = {}
    for result in results:
        id_score[result["corpusId"]] = result["@search.reranker_score"]
    dict_results[query_id] = id_score

# Evaluate the performance
from beir.retrieval.evaluation import EvaluateRetrieval

ndcg, _map, recall, precision = EvaluateRetrieval.evaluate(
    qrels, dict_results, [1, 3, 5, 10, 50, 100]
)
print(ndcg, _map, recall, precision)