In [None]:
import os

from openai import AzureOpenAI
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from tenacity import retry, stop_after_attempt, wait_random_exponential
from azure.search.documents.models import (QueryAnswerType, QueryCaptionType,
                                           QueryType, VectorizedQuery)



def printmd(string):
    display(Markdown(string))

In [None]:
load_dotenv("credentials.env")

ai_search_service_endpoint = os.getenv("AZURE_AI_SEARCH_ENDPOINT")
ai_search_admin_key = os.getenv("AZURE_AI_SEARCH_KEY")
ai_search_api_version = os.getenv("AZURE_AI_SEARCH_API_VERSION")
ai_search_index_name = os.getenv("AZURE_AI_SEARCH_INDEX_NAME")
ai_search_semantic_config_name = os.getenv("AZURE_AI_SEARCH_SEMANTIC_CONFIG_NAME")

azure_ai_search_credential = AzureKeyCredential(ai_search_admin_key)


azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_api_key = os.environ["AZURE_OPENAI_API_KEY"]
azure_openai_api_version = os.environ["AZURE_OPENAI_API_VERSION"]
azure_openai_embedding_deployed_model = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL"]
azure_openai_chat_model_deployment_name = os.environ["AZURE_OPENAI_CHAT_MODEL_DEPLOYEMENT_NAME"]

In [None]:
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def generate_embeddings(text, model, client):
    """Generates embeddings for a given text using Azure OpenAI"""
    return client.embeddings.create(input=[text], model=model).data[0].embedding


In [None]:
def get_semantic_hybrid_results(query: str,
                                search_client,
                                azure_openai_client,
                                ai_search_semantic_config_name,
                                embeddings_deployed_model_name: str = "text-embedding-ada-002",
                                top: int = 5):
    """Searches for documents using a hybrid semantic search approach (text + vector + semantic) search"""

    vector_query = VectorizedQuery(
        vector=generate_embeddings(
            query, model=embeddings_deployed_model_name, client=azure_openai_client),
        k_nearest_neighbors=3,
        fields="vector" # Please replace with the name of the field containing the content vector (See Azure Portal for your index schema)
    )

    results = list(search_client.search(
        search_text=query,
        vector_queries=[vector_query],
        select=["title", "chunk", "chunk_id","parent_id"],
        query_type=QueryType.SEMANTIC,
        semantic_configuration_name=ai_search_semantic_config_name, # Please replace with the name of your semantic search configuration (See Azure Portal)
        query_caption=QueryCaptionType.EXTRACTIVE,
        query_answer=QueryAnswerType.EXTRACTIVE,
        top=top,
    ))

    return results

In [None]:

QUESTION = "Can you give me a brief description of Azure OPenAI service?"


search_client = SearchClient(endpoint=ai_search_service_endpoint,
                             index_name=ai_search_index_name, credential=azure_ai_search_credential)


azure_openai_client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
)

search_results = get_semantic_hybrid_results(
    query=QUESTION, ai_search_semantic_config_name=ai_search_semantic_config_name, top=5, search_client=search_client, azure_openai_client=azure_openai_client)

print(f"Total results: {len(search_results)}")

for result in search_results:
    print(
        f"~~~~~~~~~~~~~~~~~~{result['title']}~~~~~~~~~~~~~~~~~~")
    print(f"Reranker Score: {result['@search.reranker_score']}")
    print(f"Search Score: {result['@search.score']}")
    print(f"Title: {result['title']}")
    print(f"Chunck Id: {result['chunk_id']}")
    print(f"Parent Id: {result['parent_id']}")
    print(f"Content: {result['chunk']}")
