The goal of this notebook is to try AI Search in Azure.

See: https://python.langchain.com/v0.2/docs/integrations/vectorstores/azuresearch/

In [None]:
import os
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.azuresearch import AzureSearch
from dotenv import load_dotenv, find_dotenv

In [None]:
_ = load_dotenv(find_dotenv(filename='.env'))

In [None]:
openai_api_key = os.getenv("OPENAI_API_KEY")
openai_api_version = "2023-05-15"
model = "text-embedding-ada-002"

In [None]:
vector_store_address = "https://ai-search-1-sn.search.windows.net"
vector_store_password = os.getenv("AZURE_SEARCH_ADMIN_KEY")

In [None]:
embeddings = OpenAIEmbeddings(
    openai_api_key=openai_api_key, 
    openai_api_version=openai_api_version, 
    model=model
)

In [None]:
index_name = "vector-1723032578840"
vector_store = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [None]:
# https://api.python.langchain.com/en/latest/_modules/langchain_community/vectorstores/azuresearch.html#AzureSearch.similarity_search
# we need to parametrize the fields
print(os.environ.get("AZURESEARCH_FIELDS_CONTENT_VECTOR"))
print(os.environ.get("AZURESEARCH_FIELDS_ID"))
print(os.environ.get("AZURESEARCH_FIELDS_CONTENT"))

Source code for langchain_community.vectorstores.azuresearch
https://api.python.langchain.com/en/latest/_modules/langchain_community/vectorstores/azuresearch.html#AzureSearch.similarity_search

In [None]:
# query = "What are the fully local agents?"
# query = "What is Nvidia NIM API?"
query = "What is a generator function?"

In [None]:
# similarity search with relevance scores
similarity_docs = vector_store.similarity_search(
    query=query,
    k=3,
    search_type="similarity",
    score_threshold=0.79,
)
print(similarity_docs[0])
similarity_doc_string = ' '.join(doc.page_content for doc in similarity_docs[:3])
print(len(similarity_doc_string))

In [None]:
# Perform a hybrid search using the hybrid_search method
hybrid_docs = vector_store.hybrid_search(
    query=query, 
    k=3
)

print(hybrid_docs[0])
hybrid_doc_string = ' '.join(doc.page_content for doc in hybrid_docs[:3])
print(len(hybrid_doc_string))

In [None]:
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

In [None]:
prompt = ChatPromptTemplate.from_template("Answer this single question {query} basing only on the information below between four hashes. If the information contains other questions ignore them. If possible, give an example related to the answer: #### {info} ####")
print(prompt)

In [None]:
# This cell requires local Ollama model
llm = ChatOllama(model="llama3.1")
chain = prompt | llm | StrOutputParser()
print(chain)

In [None]:
print("Similarity Search")
print(chain.invoke({"query": query, "info": similarity_doc_string}))

In [None]:
print("Hybrid Search")
print(chain.invoke({"query": query, "info": hybrid_doc_string}))