# Azure Cognitive Search

# Basic Example

In this basic example, we take the a Paul Graham essay, split it into chunks, embed it using an OpenAI embedding model, load it into an Azure Cognitive Search index, and then query it.

In [1]:
import logging
import sys

# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# logger = logging.getLogger(__name__)

In [None]:
#!{sys.executable} -m pip install llama-index
#!{sys.executable} -m pip install azure-search-documents==11.4.0b8
#!{sys.executable} -m pip install azure-identity

In [2]:
# import
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    ServiceContext,
    Document,
)
from llama_index.storage.storage_context import StorageContext
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings import LangchainEmbedding
from IPython.display import Markdown, display
from llama_index.vector_stores import CognitiveSearchVectorStore

In [3]:
# set up OpenAI
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
import openai

openai.api_key = os.environ["OPENAI_API_KEY"]

In [4]:
# set up Azure Cognitive Search

from azure.core.credentials import AzureKeyCredential

search_service_name = getpass.getpass(
    "Azure Cognitive Search Service Name"
)  

key = getpass.getpass(
    "Azure Cognitive Search Key"
)  

cognitive_search_credential = AzureKeyCredential(key)

service_endpoint = f"https://{search_service_name}.search.windows.net"

index_name = "quickstart"

In [5]:
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    SearchFieldDataType,
    SimpleField,
    SearchableField,
    SearchIndex,
    SemanticConfiguration,
    PrioritizedFields,
    SemanticField,
    SearchField,
    SemanticSettings,
    VectorSearch,
    HnswVectorSearchAlgorithmConfiguration,
)

from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents import SearchClient

from azure.core.credentials import AzureKeyCredential

from typing import Any

In [6]:
def drop_and_create_index(index_name: str, service_endpoint: str, credential: Any):
    index_client = SearchIndexClient(endpoint=service_endpoint, credential=credential)
    if index_name in index_client.list_index_names():
        print(f"Index {index_name} exists, dropping index")
        index_client.delete_index(index_name)

    create_search_index(index_name, service_endpoint, credential)


def create_search_index(index_name: str, service_endpoint: str, credential: Any):
    # if args.verbose: print(f"Ensuring search index {args.index} exists")
    index_client = SearchIndexClient(endpoint=service_endpoint, credential=credential)
    if index_name not in index_client.list_index_names():
        index = SearchIndex(
            name=index_name,
            fields=[
                SimpleField(name="id", type="Edm.String", key=True),
                SearchableField(
                    name="content", type="Edm.String", analyzer_name="en.microsoft"
                ),
                SearchField(
                    name="embedding",
                    type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                    hidden=False,
                    searchable=True,
                    filterable=False,
                    sortable=False,
                    facetable=False,
                    vector_search_dimensions=1536,
                    vector_search_configuration="default",
                ),
                SimpleField(name="li_jsonMetadata", type="Edm.String"),
                SimpleField(name="li_doc_id", type="Edm.String", filterable=True),
            ],
            semantic_settings=SemanticSettings(
                configurations=[
                    SemanticConfiguration(
                        name="default",
                        prioritized_fields=PrioritizedFields(
                            title_field=None,
                            prioritized_content_fields=[
                                SemanticField(field_name="content")
                            ],
                        ),
                    )
                ]
            ),
            vector_search=VectorSearch(
                algorithm_configurations=[
                    HnswVectorSearchAlgorithmConfiguration(
                        name="default",
                        kind="hnsw",
                        parameters={
                            "m": 4,
                            "efConstruction": 400,
                            "efSearch": 1000,
                            "metric": "cosine",
                        },
                    )
                ]
            ),
        )
        print(f"Creating {index_name} search index")
        index_client.create_index(index)
    else:
        print(f"Search index {index_name} already exists")

In [7]:
# drop_and_create_index(index_name=index_name, service_endpoint=service_endpoint, credential=cognitive_search_credential)
create_search_index(
    index_name=index_name,
    service_endpoint=service_endpoint,
    credential=cognitive_search_credential,
)

Index quickstart exists, dropping index
Creating quickstart search index


In [8]:
# define embedding function
from llama_index.embeddings import OpenAIEmbedding

embed_model = OpenAIEmbedding()

# load documents
documents = SimpleDirectoryReader(
    "../../../examples/paul_graham_essay/data"
).load_data()

# set up Azure Cognitive Search vector store and load in data
search_client = SearchClient(
    endpoint=service_endpoint,
    index_name=index_name,
    credential=cognitive_search_credential,
)
vector_store = CognitiveSearchVectorStore(
    search_client,
    id_field_key="id",
    chunk_field_key="content",
    embedding_field_key="embedding",
    metadata_field_key="li_jsonMetadata",
    doc_id_field_key="li_doc_id",
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(embed_model=embed_model)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, service_context=service_context
)

In [9]:
# Query Data
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
display(Markdown(f"<b>{response}</b>"))

<b>The author worked on writing and programming outside of school before college. They wrote short stories and tried writing programs on an IBM 1401 computer. They also built a microcomputer kit and started programming on it, writing simple games and a word processor.</b>

In [10]:
response = query_engine.query(
    "What did the author learn?",
)
display(Markdown(f"<b>{response}</b>"))

<b>The author learned several things during their time at Interleaf. They learned that it's better for technology companies to be run by product people than sales people, that code edited by too many people leads to bugs, that cheap office space can be depressing, that planned meetings are inferior to corridor conversations, that big bureaucratic customers can be a dangerous source of money, and that there's not much overlap between conventional office hours and the optimal time for hacking.</b>

In [11]:
response = query_engine.query("What was a hard moment for the author?")
display(Markdown(f"<b>{response}</b>"))

<b>The author experienced a difficult moment when their mother had a stroke caused by colon cancer. They were determined to help their mother recover and spent a lot of time flying to Oregon to visit her. This event made the author reflect on their life and eventually led them to consider handing over Y Combinator to someone else.</b>

In [12]:
response = query_engine.query("Who is the author?")
display(Markdown(f"<b>{response}</b>"))

<b>The author of the given context information is Paul Graham.</b>

In [13]:
import time

query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("What happened at interleaf?")

start_time = time.time()

token_count = 0
for token in response.response_gen:
    print(token, end="")
    token_count += 1

time_elapsed = time.time() - start_time
tokens_per_second = token_count / time_elapsed

print(f"\n\nStreamed output at {tokens_per_second} tokens/s")

At Interleaf, there was a group called Release Engineering that seemed to be as big as the group that actually wrote the software. The software at Interleaf had to be updated on the server, and there was a lot of emphasis on high production values to make the online store builders look legitimate.

Streamed output at 20.981856143887274 tokens/s


# Adding a document

In [14]:
response = query_engine.query("What colour is the sky?")
display(Markdown(f"<b>{response}</b>"))

<b>The color of the sky can vary depending on various factors such as time of day, weather conditions, and location. It can range from shades of blue during the day to hues of orange, pink, and purple during sunrise or sunset.</b>

In [15]:
index.insert_nodes([Document(text="The sky is indigo today")])

In [16]:
response = query_engine.query("What colour is the sky?")
display(Markdown(f"<b>{response}</b>"))

<b>The colour of the sky is indigo.</b>

# Filters are not supported

In [18]:
from llama_index.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
        },
    ),
]

In [19]:
index.insert_nodes(nodes)

In [None]:
from llama_index.vector_stores.types import ExactMatchFilter, MetadataFilters


filters = MetadataFilters(filters=[ExactMatchFilter(key="theme", value="Mafia")])

retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")