## LlamaIndex + Azure AI Search ##
This notebook is intended to demonstrate the integration between Azure AI Search with LlamaIndex as adapted by https://docs.llamaindex.ai/en/stable/examples/vector_stores/AzureAISearchIndexDemo/

Pre-requisites
- An Azure OpenAI Embedding Model Deployed for this demo I'm using text-ada-002
- Chat Completions Model deployed this is gpt-4o,gpt-4o-mini, gpt-3.5 etc
- Azure AI Search Deployed with API Keys, Endpoints, API Version this will have a helper script to deploy these for you if you don't have them deployed.

We will need imports assuming none of these are already installed the following block puts llama-index and other packages.

In [None]:
!pip install llama-index
!pip install wget
%pip install llama-index-vector-stores-azureaisearch
%pip install azure-search-documents==11.5.1
%pip install python-dotenv

In [23]:
import logging
import sys
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from IPython.display import Markdown, display
from llama_index.core import (
    SimpleDirectoryReader,
    StorageContext,
    VectorStoreIndex,
)
from llama_index.core.settings import Settings
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.vector_stores.azureaisearch import AzureAISearchVectorStore
from llama_index.vector_stores.azureaisearch import (
    IndexManagement,
    MetadataIndexFieldType,
)

## Setup our Azure OpenAI this will have our Chat Completions Client ##

In [None]:
from dotenv import load_dotenv
import os

# Load the environment variables
load_dotenv()

azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_api_key = os.getenv("AZURE_OPENAI_KEY")
chat_completions = os.getenv("CHAT_COMPLETIONS_MODEL")
embed_model_name = os.getenv("EMBEDDING_DEPLOYMENT_NAME")
api_version = "2024-02-15-preview"

llm = AzureOpenAI(
    model="gpt-4o-mini",
    deployment_name=chat_completions,
    api_key=azure_openai_api_key,
    azure_endpoint=azure_openai_endpoint,
    api_version=api_version,
)

# You need to deploy your own embedding model as well as your own chat completion model
embed_model = AzureOpenAIEmbedding(
    model=embed_model_name,
    deployment_name=embed_model_name,
    api_key=azure_openai_api_key,
    azure_endpoint=azure_openai_endpoint,
    api_version=api_version,
)


embed_model
llm

## Setup Azure AI Search ##

In [26]:
from dotenv import load_dotenv
import os

# Load the environment variables
load_dotenv()

search_service_api_key = os.getenv("SEARCH_INDEX_API_KEY")
search_service_endpoint = os.getenv("SEARCH_INDEX_ENDPOINT")
search_service_api_version = os.getenv("SEARCH_SERVICE_API_VERSION")
credential = AzureKeyCredential(search_service_api_key)


# Index name to use
index_name = "llamaindex-vector-demo-1"

# Use index client to demonstrate creating an index
index_client = SearchIndexClient(
    endpoint=search_service_endpoint,
    credential=credential,
)

# Use search client to demonstration using existing index
search_client = SearchClient(
    endpoint=search_service_endpoint,
    index_name=index_name,
    credential=credential,
)


## Creating a index (if it doesn't exist) ##
The above code creates 'llamaindex-vector-demo' if it doesn't exist. This specific index has the following fields
id - Edm.String
chunk - Edm.String
embedding - Collection(Edm.Single)
metadata - Edm.String
doc_id - Edm.String
author - Edm.String
theme - Edm.String
director - Edm.String

In [27]:
metadata_fields = {
    "author": "author",
    "theme": ("topic", MetadataIndexFieldType.STRING),
    "director": "director",
}

vector_store = AzureAISearchVectorStore(
    search_or_index_client=index_client,
    filterable_metadata_field_keys=metadata_fields,
    index_name=index_name,
    index_management=IndexManagement.CREATE_IF_NOT_EXISTS,
    id_field_key="id",
    chunk_field_key="chunk",
    embedding_field_key="embedding",
    embedding_dimensionality=1536,
    metadata_string_field_key="metadata",
    doc_id_field_key="doc_id",
    language_analyzer="en.lucene",
    vector_algorithm_type="exhaustiveKnn",
    # compression_type="binary" # Option to use "scalar" or "binary". NOTE: compression is only supported for HNSW
)

In [28]:
# Load documents
documents = SimpleDirectoryReader("./data/").load_data()
storage_context = StorageContext.from_defaults(vector_store=vector_store)

Settings.llm = llm
Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

## Query the data ##
This uses the concept of a query engine this is accessed by index and shown with a similarity_top_k parameter.

In [None]:
# Query the data
query_engine = index.as_query_engine(similarity_top_k=3)
response = query_engine.query("What is Prompt Injection?")
display(Markdown(f"<b>{response}</b>"))

In [None]:
response = query_engine.query(
    "What is Excessive Agency?",
)
display(Markdown(f"{response}"))

## Performing a vector search ##
Four query modes are supported: Default (Vector Search), Sparse, Hyrbid, and Semantic_Hybrid for the default retriever notice we are passing DEFAULT.

In [None]:
from llama_index.core.vector_stores.types import VectorStoreQueryMode


default_retriever = index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.DEFAULT
)
response = default_retriever.retrieve("What is Model Theft about?")

# Loop through each NodeWithScore in the response
for node_with_score in response:
    node = node_with_score.node  # The TextNode object
    score = node_with_score.score  # The similarity score
    chunk_id = node.id_  # The chunk ID

    # Extract the relevant metadata from the node
    file_name = node.metadata.get("file_name", "Unknown")
    file_path = node.metadata.get("file_path", "Unknown")

    # Extract the text content from the node
    text_content = node.text if node.text else "No content available"

    # Print the results in a user-friendly format
    print(f"Score: {score}")
    print(f"File Name: {file_name}")
    print(f"Id: {chunk_id}")
    print("\nExtracted Content:")
    print(text_content)
    print("\n" + "=" * 40 + " End of Result " + "=" * 40 + "\n")

## Now trying this out with Hybrid Search ##

In [None]:
from llama_index.core.vector_stores.types import VectorStoreQueryMode

hybrid_retriever = index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.HYBRID
)
hybrid_retriever.retrieve("What is Prompt Injection?")

## Perform a Hybrid Search with Semantic Reranking ##
- Semantic search is a concept covered more in detail by docs located https://learn.microsoft.com/azure/search/semantic-search-overview
- This is available for SKU's that are beyond the free tier therefore it didn't work on my end.

In [None]:
from dotenv import load_dotenv
import os
from openai import AzureOpenAI
import json
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import AzureError

# Load the environment variables
load_dotenv()

search_service_api_key = os.getenv("SEARCH_INDEX_API_KEY")
search_service_endpoint = os.getenv("SEARCH_INDEX_ENDPOINT")
search_service_api_version = os.getenv("SEARCH_SERVICE_API_VERSION")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_api_key = os.getenv("AZURE_OPENAI_KEY")
api_version = os.getenv("API_VERSION")
deployment_name = os.getenv("DEPLOYMENT_NAME")
embed_model_name = os.getenv("EMBEDDING_DEPLOYMENT_NAME")
credential = AzureKeyCredential(search_service_api_key)

# Index name to use
index_name = "llamaindex-vector-demo-1"

try:
    # Native AzureOpenAI initiated
    client = AzureOpenAI(
        azure_endpoint=azure_openai_endpoint,
        api_key=azure_openai_api_key,
        api_version=api_version,
    )

    completion = client.chat.completions.create(  
              model=deployment_name,  
              messages=[
        {
            "role": "system",
            "content": "You are an AI assistant that helps people find information."
        }
    ],    
              max_tokens=800,  
              temperature=0.7,  
              top_p=0.95,  
              frequency_penalty=0,  
              presence_penalty=0,  
              stop=None,  
              extra_body={  
                  "data_sources": [  
                      {  
                          "type": "azure_search",  
                          "parameters": {  
                              "endpoint": search_service_endpoint,  
                              "index_name": index_name,  
                              "authentication": {  
                                  "type": "api_key",
                                  "api_key": search_service_api_key 
                              }  
                          }  
                      }  
                  ]  
              }  
          )  
            
    print(completion.model_dump_json(indent=2))

except AzureError as e:
    print(f"An error occurred: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")