In [1]:
import os
import openai
from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import AzureSearch
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from azure.search.documents.indexes.models import SearchableField, SimpleField, SearchFieldDataType

In [24]:
# Load environment variables
load_dotenv('.env')


# Configure OpenAI API
openai.api_type = "azure"
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_version = os.getenv('OPENAI_API_VERSION')


# Initialize gpt-35-turbo and our embedding model
llm = AzureChatOpenAI(deployment_name="htiOaiDEP")
embeddings = OpenAIEmbeddings(deployment_id="htiOaiDEPte", chunk_size=10)




In [25]:
# Connect to Azure Cognitive Search
acs = AzureSearch(azure_search_endpoint=os.getenv('AZURE_COGNITIVE_SEARCH_SERVICE_NAME'),
                 azure_search_key=os.getenv('AZURE_COGNITIVE_SEARCH_API_KEY'),
                 index_name=os.getenv('AZURE_COGNITIVE_SEARCH_INDEX_NAME'),
                 embedding_function=embeddings.embed_query)

In [26]:
loader = DirectoryLoader("C:\\Users\\acer\\OneDrive\\Desktop\\Final\\chatbot-llm\\data\\App", glob="*.txt", loader_cls=TextLoader, loader_kwargs={'autodetect_encoding': True})
documents = loader.load()
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Add documents to Azure Search
acs.add_documents(documents=docs)



['YTA4YTVkZDEtZGNjYi00MWEyLTk5YTktYzYzYjVhNDc3MTAz',
 'NzU2ZTgxODEtMzcyOC00ODRlLThmYWMtMTAwZGUzMjI3YWE1',
 'YjBhYmQwMzMtY2ZlMC00MmUwLWE5MDktNjFjYTE4NjIyZTRk',
 'Y2NiODQ2ZjQtMjUzZC00ZTZlLWFhYzEtODA2MjBkOTQ5NDVk',
 'ZjBhZGY3ODMtODM3OS00ZjhjLTg4NzAtNzdlOTVkYzUzNmU3']

In [31]:
# Adapt if needed
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:""")

qa = ConversationalRetrievalChain.from_llm(llm=llm,
                                           retriever=acs.as_retriever(),
                                           condense_question_prompt=CONDENSE_QUESTION_PROMPT,
                                           return_source_documents=True,
                                           verbose=False)


In [36]:
chat_history = []
query = "what is cricket?"
result = qa({"question": query, "chat_history": chat_history})
print(result)

{'question': 'what is cricket?', 'chat_history': [], 'answer': 'Cricket is a team sport played between two teams of eleven players on a large oval-shaped field with a 22-yard long rectangular strip in the center, known as the pitch. The objective of the game is for one team to score more runs than the other team while the other team tries to prevent this and dismiss the opposing team. This is done by hitting a ball with a bat and running back and forth between the wickets, while the opposing team tries to field the ball and get the batter out. Cricket is a popular sport in many countries, particularly in the Commonwealth.', 'source_documents': [Document(page_content='\nThis article is about the sport. For the insect, see Cricket (insect). For other uses, see Cricket (disambiguation).\n"Cricketer" redirects here. For other uses, see Cricketer (disambiguation).\nCricket\n\nShaun Pollock of South Africa bowls to Michael Hussey of Australia during the 2005 Boxing Day Test match at the Melb

In [37]:

chat_history = [(query, result["answer"])]

query = "What is hockey?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:", result["answer"])


Question: What is hockey?
Answer: Hockey is a family of various types of both summer and winter team sports which originated on either an outdoor field, sheet of ice, or dry floor such as in a gymnasium. While these sports vary in specific rules, numbers of players, apparel, and playing surface, they share broad characteristics of two opposing teams using a stick to propel a ball or disk into a goal. Some games make the use of skates, either wheeled, or bladed while others do not. In order to help make the distinction between these various games, the word hockey is often preceded by another word i.e. field hockey, ice hockey, roller hockey, rink hockey, or floor hockey.
