# Azure Cognitive Search

# Basic Example

In this basic example, we take  a Paul Graham essay, split it into chunks, embed it using an OpenAI embedding model, load it into an Azure Cognitive Search index, and then query it.

In [None]:
import logging
import sys
from IPython.display import Markdown, display

# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# logger = logging.getLogger(__name__)

In [None]:
!{sys.executable} -m pip install openai
!{sys.executable} -m pip install llama-index
!{sys.executable} -m pip install azure-search-documents==11.4.0b8
!{sys.executable} -m pip install azure-identity

# Get AAD Auth Token

The default Azure AAD credential is used. Ensure "az login" has been run and the logged in user has access to the Azure OpenAI and ACS instances used.

In [None]:
from azure.identity import DefaultAzureCredential

# Request credential
default_credential = DefaultAzureCredential()
token = default_credential.get_token("https://cognitiveservices.azure.com/.default")

# Configure Azure OpenAI LLM and Embedding

In [None]:
aoai_base = "https://demoaoai002.openai.azure.com/"
azure_kwargs = {
    "api_type": "azure_ad",
    "api_version": "2023-03-15-preview",
    "api_base": aoai_base,
}

## LLM Setup

The next step assumes a deployment name of "text-davinci-003", update this name if you have used a different name for your deloyment of the "text-davinci-003" model.

In [None]:
from langchain.llms import AzureOpenAI

llm = AzureOpenAI(
    temperature=0.9,
    deployment_name="text-davinci-003", # Update deployment name if necessary
    model_name="text-davinci-003",
    openai_api_version=azure_kwargs["api_version"],
    openai_api_key=token.token, # Authenticate using AAD token
    model_kwargs=azure_kwargs,
)

from llama_index import LLMPredictor

# define LLM
llm_predictor = LLMPredictor(llm)

## Embedding Setup

The next step assumes a deployment name of "text-embedding-ada-002", update this name if you have used a different name for your deloyment of the "text-embedding-ada-002" model.

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding

# load in AOAI embedding model from langchain
oai_embeddings = OpenAIEmbeddings(
    deployment="text-embedding-ada-002", # Update deployment name if necessary
    model="text-embedding-ada-002",
    openai_api_key=token.token, # Authenticate using AAD token
    openai_api_base=azure_kwargs["api_base"],
    openai_api_type=azure_kwargs["api_type"],
    openai_api_version=azure_kwargs["api_version"],
    chunk_size=1,
)

In [None]:
# set up Azure Cognitive Search
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential
import getpass

search_service_name = getpass.getpass("Azure Cognitive Search Service Name")

key = getpass.getpass("Azure Cognitive Search Key")

cognitive_search_credential = AzureKeyCredential(key)

service_endpoint = f"https://{search_service_name}.search.windows.net"

# Index name to use
index_name = "quickstart"

# Use index client to demonstrate creating an index
index_client = SearchIndexClient(
    endpoint=service_endpoint,
    credential=cognitive_search_credential, 
)

# Use search client to demonstrate using an existing index
search_client = SearchClient(
    endpoint=service_endpoint,
    index_name=index_name,
    credential=cognitive_search_credential, 
)

# Create Index (if it does not exist)

Demonstrates creating a vector index named quickstart01 if one doesn't exist. The index has the following fields:
- id (Edm.String)
- content (Edm.String)
- embedding (Edm.SingleCollection)
- li_jsonMetadata (Edm.String)
- li_doc_id (Edm.String)
- author (Edm.String)
- theme (Edm.String)
- director (Edm.String)

In [None]:
from azure.search.documents import SearchClient
from llama_index.vector_stores import CognitiveSearchVectorStore
from llama_index.vector_stores.cogsearch import (
    IndexManagement,
    MetadataIndexFieldType,
    CognitiveSearchVectorStore,
)

# Example of a complex mapping, metadata field 'theme' is mapped to a differently name index field 'topic' with its type explicitly set
metadata_fields = {
    "author": "author",
    "theme": ("topic", MetadataIndexFieldType.STRING),
    "director": "director",
}

# A simplified metadata specification is available if all metadata and index fields are similarly named
# metadata_fields = {"author", "theme", "director"}


vector_store = CognitiveSearchVectorStore(
    search_or_index_client=index_client,
    index_name=index_name,
    filterable_metadata_field_keys=metadata_fields,
    index_management=IndexManagement.CREATE_IF_NOT_EXISTS,
    id_field_key="id",
    chunk_field_key="content",
    embedding_field_key="embedding",
    metadata_string_field_key="li_jsonMetadata",
    doc_id_field_key="li_doc_id",
)

In [None]:
# define embedding function
from llama_index.embeddings import OpenAIEmbedding
from llama_index import (
    SimpleDirectoryReader,
    StorageContext,
    ServiceContext,
    VectorStoreIndex,
)


# load documents
documents = SimpleDirectoryReader(
    "./doc_samples/paul_graham_essay/data"
).load_data()

storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(embed_model=oai_embeddings, llm_predictor=llm_predictor)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, service_context=service_context
)

In [None]:
# Query Data
query_engine = index.as_query_engine(similarity_top_k=3)
response = query_engine.query("What did the author do growing up?")
display(Markdown(f"<b>{response}</b>"))

In [None]:
response = query_engine.query(
    "What did the author learn?",
)
display(Markdown(f"<b>{response}</b>"))

# Use Existing Index

In [None]:
from llama_index.vector_stores import CognitiveSearchVectorStore
from llama_index.vector_stores.cogsearch import (
    IndexManagement,
    MetadataIndexFieldType,
    CognitiveSearchVectorStore,
)


index_name = "quickstart"

metadata_fields = {
    "author": "author",
    "theme": ("topic", MetadataIndexFieldType.STRING),
    "director": "director",
}
vector_store = CognitiveSearchVectorStore(
    search_or_index_client=search_client,
    filterable_metadata_field_keys=metadata_fields,
    index_management=IndexManagement.NO_VALIDATION,
    id_field_key="id",
    chunk_field_key="content",
    embedding_field_key="embedding",
    metadata_string_field_key="li_jsonMetadata",
    doc_id_field_key="li_doc_id",
)

In [None]:
# define embedding function
from llama_index.embeddings import OpenAIEmbedding
from llama_index import (
    SimpleDirectoryReader,
    StorageContext,
    ServiceContext,
    VectorStoreIndex,
)


storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(embed_model=oai_embeddings, llm_predictor=llm_predictor)
index = VectorStoreIndex.from_documents(
    [], storage_context=storage_context, service_context=service_context
)

In [None]:
query_engine = index.as_query_engine()
response = query_engine.query("What was a hard moment for the author?")
display(Markdown(f"<b>{response}</b>"))

In [None]:
response = query_engine.query("Who is the author?")
display(Markdown(f"<b>{response}</b>"))

In [None]:
import time

query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("What happened at interleaf?")

start_time = time.time()

token_count = 0
for token in response.response_gen:
    print(token, end="")
    token_count += 1

time_elapsed = time.time() - start_time
tokens_per_second = token_count / time_elapsed

print(f"\n\nStreamed output at {tokens_per_second} tokens/s")

# Adding a document to existing index

In [None]:
response = query_engine.query("What colour is the sky?")
display(Markdown(f"<b>{response}</b>"))

In [None]:
from llama_index import Document

index.insert_nodes([Document(text="The sky is indigo today")])

In [None]:
response = query_engine.query("What colour is the sky?")
display(Markdown(f"<b>{response}</b>"))

# Filtering

In [None]:
from llama_index.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
        },
    ),
]

In [None]:
index.insert_nodes(nodes)

In [None]:
from llama_index.vector_stores.types import ExactMatchFilter, MetadataFilters


filters = MetadataFilters(filters=[ExactMatchFilter(key="theme", value="Mafia")])

retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")