In [None]:
import os
import openai
from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import AzureSearch
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate

# Load environment variables
load_dotenv()

# Configure OpenAI API
openai.api_type = "azure"
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_version = os.getenv('OPENAI_API_VERSION')

# Initialize gpt-35-turbo and our embedding model
llm = AzureChatOpenAI(deployment_name="gptchat")
embeddings = OpenAIEmbeddings(deployment_id="embedding", chunk_size=1)

# Connect to Azure Cognitive Search
acs = AzureSearch(azure_search_endpoint=os.getenv('AZURE_COGNITIVE_SEARCH_SERVICE_NAME'),
                 azure_search_key=os.getenv('AZURE_COGNITIVE_SEARCH_API_KEY'),
                 index_name=os.getenv('AZURE_COGNITIVE_SEARCH_INDEX_NAME'),
                 embedding_function=embeddings.embed_query)


In [None]:
# Load PDF files from "data" folder
directory = "./data"
def load_docs(directory):
    loader = DirectoryLoader(directory)
    documents = loader.load()
    return documents

documents = load_docs(directory)
print(len(documents))

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
def split_docs(documents, chunk_size=1000, chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs

docs = split_docs(documents)
print(len(docs))

In [None]:
docs[0]

In [None]:
# Add documents to Azure Search
acs.add_documents(documents=docs)

In [None]:
# Adapt if needed
from langchain import PromptTemplate
from langchain.chains import ConversationalRetrievalChain


CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""Given the following conversation and a follow up question, 
                                                        rephrase the follow up question to be a standalone question. 

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:""")

qa = ConversationalRetrievalChain.from_llm(llm=llm,
                                           retriever=acs.as_retriever(),
                                           condense_question_prompt=CONDENSE_QUESTION_PROMPT,
                                           return_source_documents=True,
                                           verbose=False)


In [None]:
chat_history = []
query = "What are the airbag feature of the Mercedes-Benz S-Class Maybach as described in the manual?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:", result["answer"])


In [None]:
chat_history = [(query, result["answer"])]
query = "What is the fuel efficiency of the Maybach Mercedes car?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:", result["answer"])


In [None]:
chat_history = [(query, result["answer"])]
query = "How can I turn S Maybach signal light on?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:", result["answer"])

In [None]:
chat_history = [(query, result["answer"])]
query = "How can I turn S-Sedan signal light on?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:", result["answer"])

In [None]:
chat_history = [(query, result["answer"])]
query = "In which situations, I can receive haptic feedback in s-sedan?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:", result["answer"])

In [None]:
# Perform a vector similarity search
# Execute a pure vector similarity search
docs = acs.similarity_search(
    query="In which situations, I can receive haptic feedback in s-sedan?", 
    k=3,
    search_type="similarity")
print(docs[0].page_content)

In [None]:
# Perform a hybrid search
docs = acs.similarity_search(
    query="In which situations, I can receive haptic feedback in s-sedan?",
    k=3, 
    search_type="hybrid"
)
print(docs[0].page_content)