In [1]:
'''
This code works on GPT-3.5 and embeddings model.

This Python script showcases a workflow for performing conversational retrieval tasks. It leverages Azure OpenAI's GPT-3.5 Turbo model and Azure Cognitive Search to seamlessly handle various steps in the process.

First, it configures the OpenAI API and initializes essential components, such as language models and embeddings. It then efficiently loads, splits, and indexes text documents, making them easily searchable through Azure Cognitive Search.

This code serves as a practical example of how to integrate state-of-the-art natural language processing models and search capabilities to build powerful conversational retrieval systems, which can be invaluable for information retrieval and question-answering applications.


'''

"\nThis code works on GPT-3.5 and embeddings model.\n\nThis Python script showcases a workflow for performing conversational retrieval tasks. It leverages Azure OpenAI's GPT-3.5 Turbo model and Azure Cognitive Search to seamlessly handle various steps in the process.\n\nFirst, it configures the OpenAI API and initializes essential components, such as language models and embeddings. It then efficiently loads, splits, and indexes text documents, making them easily searchable through Azure Cognitive Search.\n\nThis code serves as a practical example of how to integrate state-of-the-art natural language processing models and search capabilities to build powerful conversational retrieval systems, which can be invaluable for information retrieval and question-answering applications.\n\n\n"

In [2]:
#importing the libraries

import os
import openai
from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import AzureSearch
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate


In [3]:
# Load environment variables
load_dotenv('.env')


# Configure OpenAI API
openai.api_type = "azure"
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_version = os.getenv('OPENAI_API_VERSION')


# Initialize gpt-35-turbo and our embedding model
llm = AzureChatOpenAI(deployment_name="htiOaiDEP")
embeddings = OpenAIEmbeddings(deployment_id="htiOaiDEPte", chunk_size=1)




                    deployment_id was transferred to model_kwargs.
                    Please confirm that deployment_id is what you intended.


In [8]:
# Connect to Azure Cognitive Search and create index
acs = AzureSearch(azure_search_endpoint=os.getenv('AZURE_COGNITIVE_SEARCH_SERVICE_NAME'),
                 azure_search_key=os.getenv('AZURE_COGNITIVE_SEARCH_API_KEY'),
                 index_name=os.getenv('AZURE_COGNITIVE_SEARCH_INDEX_NAME'),
                 embedding_function=embeddings.embed_query)

In [5]:
loader = DirectoryLoader("C:\\Users\\acer\\OneDrive\\Desktop\\update\\code", glob="*.txt", loader_cls=TextLoader, loader_kwargs={'autodetect_encoding': True})
documents = loader.load()
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Add documents to Azure Search
acs.add_documents(documents=docs)

['NzI3MjBlMmEtMDk1ZC00NTliLTkzNDMtYmUwODhhYTQ2OGM4',
 'YjVjMmFkMjEtZDJmOC00YWNmLTg5MDQtMjgwNmZiNjVkM2Rm',
 'MzJiNjQ1ZTMtYzJlOC00NTM5LWE2ODAtYmQ0NDhjODAxOTYw']

In [6]:

CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:""")

qa = ConversationalRetrievalChain.from_llm(llm=llm,
                                           retriever=acs.as_retriever(),
                                           condense_question_prompt=CONDENSE_QUESTION_PROMPT,
                                           return_source_documents=True,
                                           verbose=False)


In [7]:
chat_history = []
query = "who is Aditya Koul?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:", result["answer"])


Question: who is Aditya Koul?
Answer: I do not have enough information to answer that question. The context provided only states that Aditya Koul belongs to Dehradun, Uttarakhand, but it does not provide any further information about who they are.
