In [16]:
import os
import openai
from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import AzureSearch
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from azure.search.documents.indexes.models import SearchableField, SimpleField, SearchFieldDataType

In [17]:
# Load environment variables
load_dotenv('.env')


# Configure OpenAI API
openai.api_type = "azure"
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_version = os.getenv('OPENAI_API_VERSION')


# Initialize gpt-35-turbo and our embedding model
llm = AzureChatOpenAI(deployment_name="htiOaiDEP")
embeddings = OpenAIEmbeddings(deployment_id="htiOaiDEPte", chunk_size=1)




In [18]:
# Connect to Azure Cognitive Search
acs = AzureSearch(azure_search_endpoint=os.getenv('AZURE_COGNITIVE_SEARCH_SERVICE_NAME'),
                 azure_search_key=os.getenv('AZURE_COGNITIVE_SEARCH_API_KEY'),
                 index_name=os.getenv('AZURE_COGNITIVE_SEARCH_INDEX_NAME'),
                 embedding_function=embeddings.embed_query)

In [19]:
loader = DirectoryLoader("C:\\Users\\acer\\OneDrive\\Desktop\\SelfMade", glob="*.txt", loader_cls=TextLoader, loader_kwargs={'autodetect_encoding': True})
documents = loader.load()
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Add documents to Azure Search
acs.add_documents(documents=docs)



['YmJkNmUxZDctMzJkZi00OTJiLWFmZWQtMTkzZDliNTM4MTlk',
 'ZmQwMGQxM2EtOWU1ZC00MDlkLTlkZjQtNjI2OGRiM2RkNWRi',
 'OGNiNDhkY2EtOGFiNi00MTNiLTkwOGMtYjU2ZWRhYmE5ZWYz']

In [20]:
# Adapt if needed
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:""")

qa = ConversationalRetrievalChain.from_llm(llm=llm,
                                           retriever=acs.as_retriever(),
                                           condense_question_prompt=CONDENSE_QUESTION_PROMPT,
                                           return_source_documents=True,
                                           verbose=False)


In [21]:
chat_history = [(query, result["answer"])]
query = "What is cricket?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:", result["answer"])


Question: What is cricket?
Answer: Sure! Cricket is a bat-and-ball game played between two teams of eleven players on a field at the center of which is a 22-yard (20-meter) pitch with a wicket at each end, each comprising two bails balanced on three stumps. The objective of the game is to score as many runs as possible while also trying to dismiss the opposing team. 

The batting side scores runs by striking the ball bowled at one of the wickets with the bat and then running between the wickets. The fielding side tries to prevent this by preventing the ball from leaving the field and getting the ball to either wicket to dismiss each batter. Means of dismissal include being bowled, when the ball hits the stumps and dislodges the bails, and by the fielding side either catching the ball after it is hit by the bat, but before it hits the ground, or hitting a wicket with the ball before a batter can cross the crease in front of the wicket. 

When ten batters have been dismissed, the innings