In [4]:
# Private Preview edition
# ! pip install --index-url=https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/ azure-search-documents==11.4.0a20230509004
! pip show azure-search-documents

Name: azure-search-documents
Version: 11.4.0a20230509004
Summary: Microsoft Azure Cognitive Search Client Library for Python
Home-page: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/search/azure-search-documents
Author: Microsoft Corporation
Author-email: ascl@microsoft.com
License: MIT License
Location: C:\Users\shchitt\AppData\Local\anaconda3\envs\aoai\Lib\site-packages
Requires: azure-common, azure-core, isodate
Required-by: 


In [5]:
# loading the API keys in environment variables
import os
import os, json
import openai
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [6]:
openai.api_key = os.environ['OPENAI_API_KEY']
openai.api_base = os.environ['OPENAI_API_BASE']
openai.api_type = os.environ['OPENAI_API_TYPE']
openai.api_version = os.environ['OPENAI_API_VERSION']

text_model = os.environ['TEXT_DAVINCI_MODEL_NAME']
chat_model = os.environ['CHAT_MODEL_NAME']
embedding_model = os.environ['EMBEDDING_MODEL_NAME']

In [7]:
vector_store_endpoint: str = os.environ['AZURE_COGNITIVE_SEARCH_ENDPOINT']
vector_store_password: str = os.environ['AZURE_COGNITIVE_SEARCH_KEY']
index_name: str = "langchain-vector-demo"

In [8]:
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(model=embedding_model, chunk_size=1)
type(embeddings)

langchain.embeddings.openai.OpenAIEmbeddings

In [9]:
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_endpoint,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [11]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = TextLoader("./churchill_speech.txt", encoding="utf-8")

documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=20)
docs = text_splitter.split_documents(documents)
len(docs)

84

In [10]:
vector_store.add_documents(documents=docs)

['ZmJiZGIzNzYtNWUzYS00YWIwLTlhZDgtMzlkMGI2YmFlZjNi',
 'NTU5YmIyZTgtMTZiZS00ZTAwLWJmYjYtNTkzZTVmYjFmMDZi',
 'ZGNmOTY4OGItYzlhOS00OTY3LWI5ZDAtNjc1Y2RmZGJiMzE3',
 'NzEzZmU1NTYtZTliNy00NzIwLWI3N2QtNzlkODQwYzc4ZjQ1',
 'ZGU0ZDUyNTktM2YwYy00Yjk4LTk2YjEtYTQxOGRiOTYzY2Zm',
 'OTBiOGNjN2ItOGIyNy00NGM5LTk2Y2EtYzYzNGE1MjcyOThi',
 'ZmM5NDNhNGItYWExOC00YzUwLTg5ZjktZmRhY2E5Mzc2MWFi',
 'ZWRkMTAyYmMtODVjYi00NTM1LTlkN2MtZWZmMWU2NjdiMmI4',
 'ZWVjMWRjZTItYTExNi00NzU3LTk2YTUtZWVmYjgyNjgyZmIy',
 'ZDFiODYyMmMtNjY5OS00MzA4LTk4MmItNWVmMjUxM2Q2YzE3',
 'ODYyNDBmMDItYjE0Ni00ZTVlLTgxNTItNmRkOTdiZjdhYTBm',
 'ZGZhZjJiOWUtMGYxMi00MjM4LWE4MmMtMDRiZGU1OGQwNDJi',
 'ZDU1NGFlMDUtNjlhNy00MWU1LTliN2YtNzdmYjZjOTg4N2U2',
 'YjYwZjNlZWMtMjYxNi00ZWRiLWFjZWEtODVlNDg4Yzg1NTJm',
 'ZmU0NDRjYmQtZTI1YS00ZGEyLWI0MzktMDhlYmVlMGU5ZmU0',
 'ZjUwMmYwMzMtZmQ0YS00ZDMxLThmYzctYzY3NzNmY2I2Njlk',
 'MjEyNjM0MWQtMTMwZi00Nzg0LWEzZDgtMjdmYTlmZTMyODI5',
 'MTBkMmRkZWQtZjRmMy00NGIxLThkM2ItYjQ5ZDFkYjQ2MWQ0',
 'ZjZhMzdhNTgtMDI5Yi00NGQ3LTk1ZWUtZDhkZjQ5ZjJk

In [12]:
# Perform a similarity search
docs = vector_store.similarity_search(
    query="Where should we fight?",
    k=3,
    search_type="similarity",
)
print(docs[0].page_content)

shall fight on the beaches, we shall fight on the landing grounds, we shall fight in the fields and in the
streets, we shall fight in the hills; we shall never surrender, and even if, which I do not for a moment


In [13]:
# Perform a hybrid search - search_type is "hybrid" by default
docs = vector_store.similarity_search(
    query="What did the president say about Ketanji Brown Jackson", k=3
)
print(docs[0].page_content)

The President of the Board of Trade [Sir Andrew Duncan] is not here today. His son has been killed,
and many in the House have felt the pangs of affliction in the sharpest form. But I will say this about


In [14]:

from langchain.chat_models import AzureChatOpenAI
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

chat = AzureChatOpenAI(temperature=0.2,
    max_tokens=500,
    openai_api_base=openai.api_base,
    openai_api_version=openai.api_version,
    deployment_name=chat_model,
    openai_api_key=openai.api_key,
    openai_api_type = openai.api_type    
)

In [17]:
from langchain.chains import RetrievalQA

retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})
chain = RetrievalQA.from_chain_type(llm=chat, chain_type="stuff", retriever=retriever)

query = 'Where should we fight?'
answer = chain.run(query)
print(answer)

According to the given context, we should fight on the beaches, landing grounds, fields, streets, hills, in France, on the seas and oceans, and in the air to defend our Island.
