In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    ComplexField,
    CorsOptions,
    SearchIndex,
    SearchFieldDataType,
    SimpleField,
    SearchableField
)

In [2]:
load_dotenv(find_dotenv('../application/.env'))

True

In [3]:
endpoint = os.environ["SEARCH_ENDPOINT"]
key = os.environ["SEARCH_API_KEY"]

# print(key)

client = SearchIndexClient(endpoint, AzureKeyCredential(key))

In [4]:
name = "restaurant"
fields = [
    SimpleField(name="restaurantId", type=SearchFieldDataType.String, key=True),
    SimpleField(name="averageCost", type=SearchFieldDataType.Double),
    SearchableField(name="description", type=SearchFieldDataType.String),
    ComplexField(name="address", fields=[
        SimpleField(name="streetAddress", type=SearchFieldDataType.String),
        SimpleField(name="city", type=SearchFieldDataType.String),
    ])
]

In [5]:
from azure.core.exceptions import HttpResponseError

cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
scoring_profiles = []

index = SearchIndex(
    name=name,
    fields=fields,
    scoring_profiles=scoring_profiles,
    cors_options=cors_options)


try:
    result = client.create_index(index)
    print(f"Index '{name}' created.")
except HttpResponseError as e:
    print(f"Index '{name}' already exists.")


Index 'restaurant' already exists.


### Add documents to the index

In [6]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient

index_name = "restaurant"
endpoint = os.environ["SEARCH_ENDPOINT"]
key = os.environ["SEARCH_API_KEY"]

documents = [
    {
        'restaurantId': '1',
        'averageCost': 50.0,
        'description': 'Traditional Italian cuisine with a modern twist.',
        'address': {
            'streetAddress': '123 Via Roma',
            'city': 'Rome',
        }
    },
    {
        'restaurantId': '2',
        'averageCost': 70.0,
        'description': 'Family-friendly Italian restaurant with classic dishes.',
        'address': {
            'streetAddress': '456 Via Milano',
            'city': 'Milan',
        }
    },
    {
        'restaurantId': '3',
        'averageCost': 35.0,
        'description': 'Cozy trattoria offering regional specialties.',
        'address': {
            'streetAddress': '789 Via Napoli',
            'city': 'Naples',
        }
    }
]

search_client = SearchClient(endpoint, index_name, AzureKeyCredential(key))
result = search_client.upload_documents(documents=documents)


In [7]:
for res in result:
    print(f"Upload of document with ID '{res.key}' succeeded: {res.succeeded}")

Upload of document with ID '1' succeeded: True
Upload of document with ID '2' succeeded: True
Upload of document with ID '3' succeeded: True


Now we can retrieve Documents from ACS

In [8]:
results = search_client.search(search_text="Family friendly?")

for result in results:
    print(result)

{'restaurantId': '2', 'averageCost': 70.0, 'description': 'Family-friendly Italian restaurant with classic dishes.', 'address': {'streetAddress': '456 Via Milano', 'city': 'Milan'}, '@search.score': 1.8678205, '@search.reranker_score': None, '@search.highlights': None, '@search.captions': None}


### Using LangChain with ACS

In [9]:
import os
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch

In [10]:
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment="text-embedding-ada-002", chunk_size=1)
index_name: str = "langchain-example"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=os.environ.get("SEARCH_ENDPOINT"),
    azure_search_key=os.environ.get("SEARCH_API_KEY"),
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [11]:
from langchain_community.document_loaders import DirectoryLoader, TextLoader

loader = DirectoryLoader('./restaurant', glob="**/*.txt", loader_cls=TextLoader)
data = loader.load()
print(len(data))

2


In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 120,
    chunk_overlap  = 20,
    length_function = len,
    is_separator_regex = False,
)
docs = text_splitter.split_documents(data)
print(len(docs))

39


In [13]:
vector_store.add_documents(documents=docs)

['Njg1MDUyMjItOWQ0MS00NzY5LTk0ZjAtYjNmMWM3ZTEwZmFl',
 'ZmJiNjZmOGUtMzYxYi00OWFiLTk4MTgtZDJlYTU1ZTVjNDE3',
 'NjQ1NTQyOWQtMjcyMi00YzBiLWFlNDgtYTdmNTgzZmJmZmIw',
 'NTQ2ZDYyYWMtNjJiMC00NTYzLTk3NzQtZDY0MGUxY2Q3ZDll',
 'OTk5OTkxODUtZDg0Ni00YzU0LWE5ZGQtYzQyMDk3MDZlYTcx',
 'N2RiZGUwOTAtYzhlMC00YmZiLWI1NTctODcyNDdmODgwMWUx',
 'YWUwZTY5ZjQtOWQ3ZS00ODAzLThjYTAtOTM3NzM1YTNkMjA4',
 'MWZhMTg4NjYtNjEyNi00MTIzLTg0ZGYtYTMyNDI0ZDYxNmY5',
 'ZDAxZTZmMDUtZDZjNC00ODg0LTljZDMtNzc3ZTliYzQ4MjE3',
 'MzY1OGQ3OGQtNzJhYi00MzkzLWI3NmUtMTdjNGJkMzgwMDMy',
 'NTllMTBiYWUtOGU4Zi00NWZmLTgwNzYtOTg3MjczNDQwZDlh',
 'MzQ1NjIwZGMtMjIyZC00YzUwLWFlNTAtYzI5ZTkxMTU1OTlh',
 'MDJmMzRmZmItMGFkZC00NThmLWIyMTQtMjU2NzZjMjMyNzQ5',
 'ODk3NGU2MjctNjI5Ni00ZTExLWE0MmMtNTkxZWQwMWQ3ODE3',
 'MTUwMTdhY2MtODA0My00OWIwLTllMmEtZDZmNWI3ZmI2OWYw',
 'ZDhjOWMwNjAtZWUxMS00N2U1LWE3MzUtNzQwMGFkMGFkZjdj',
 'YjFjMDc4M2ItY2RkOC00Y2E0LWJlZjYtN2Y0MjJlY2VjMzFl',
 'NjUzMDdmOGItMTc1OS00MTdmLTk0Y2ItZjZjZGUyNjQyYmM5',
 'Nzg5NjYxY2ItODYyMi00NTZjLTgzY2UtMWEwNTc5MzU1

In [14]:

docs = vector_store.similarity_search(
    query="When are the opening hours of the restaurant?",
    k=3,
    search_type="similarity",
)
print(docs)


[Document(page_content='Restaurant Opening Hours:', metadata={'source': 'restaurant\\opening_hours.txt'}), Document(page_content='Restaurant Opening Hours:', metadata={'source': 'restaurant\\opening_hours.txt'}), Document(page_content='Special Hours: Our kitchen closes 30 minutes before the restaurant closing time.', metadata={'source': 'restaurant\\opening_hours.txt'})]


In [15]:
print(len(docs))

3


In [16]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(), chain_type="stuff", retriever=vector_store.as_retriever())
qa.invoke("When are the opening hours of the restaurant?")

{'query': 'When are the opening hours of the restaurant?',
 'result': "I don't know the exact opening hours of the restaurant as they were not provided."}

In [17]:
response = qa.invoke("When are the opening hours of the restaurant?")

In [18]:
print(response)

{'query': 'When are the opening hours of the restaurant?', 'result': "I don't know."}


In [19]:
print(response['result'])

I don't know.
