# Pre-requisites
- Python have been installed
- Jupyter environment set up in PyCharm or Visual Studio Code

# Setup environment
Install required package


In [None]:
! pip install --upgrade pip
! pip install langchain langchain_community langchain_openai openai python-dotenv pypdf chromadb pysqlite3-binary

# Init variables
You need to set value of `OPENAI_API_KEY` that you get from the training team in the `.env` file

In [None]:
import os

import openai
from dotenv import load_dotenv

load_dotenv()

AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
AZURE_OPENAI_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
AZURE_OPENAI_API_ENDPOINT = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
AZURE_OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
azureOpenAPIClient = openai.AzureOpenAI(
    api_version=AZURE_OPENAI_API_VERSION,
    azure_endpoint=AZURE_OPENAI_API_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY
)

# Overviews
The BonBon FAQ.pdf file contains frequently asked questions and answers for customer support scenario. The topics are around IT related issue troubleshooting such as networking, software, hardware. You are requested to provide a solution to build a chat bot capable of answering the user questions with LangChain.

## Assignment 1: Document Indexing (mandatory)
- The content of BonBon FAQ.pdf should be indexed to the local Chroma vector DB from where the chatbot can lookup the appropriate information to answer questions.
- Should use some embedding model such as Azure Open AI text-embedding-ada-002 to create vectors, feel free to use any other open source embedding model if it works.

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

file_path = "./data/BonBon FAQ.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
text_chunks = text_splitter.split_documents(docs)

In [None]:
# Fix Your system has an unsupported version of sqlite3
import pysqlite3
import sys

sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")

In [None]:
from langchain.vectorstores import Chroma
from typing import List

class AzureOpenAIEmbeddings:
    def embed_documents(self, texts: List[str]):
        embeddings = [azureOpenAPIClient.embeddings.create(input=[text], model=AZURE_OPENAI_EMBEDDING_DEPLOYMENT).data[
                          0].embedding for text in texts]
        return embeddings

    def embed_query(self, query: str):
        embedding = azureOpenAPIClient.embeddings.create(input=[query], model=AZURE_OPENAI_EMBEDDING_DEPLOYMENT).data[
            0].embedding
        return embedding

db = Chroma.from_documents(documents=text_chunks, embedding=AzureOpenAIEmbeddings())
retriever = db.as_retriever()

In [None]:
from langchain_openai import AzureChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

streaming = False
llm = AzureChatOpenAI(
    azure_deployment=AZURE_OPENAI_DEPLOYMENT_NAME,
    api_version=AZURE_OPENAI_API_VERSION,
    azure_endpoint=AZURE_OPENAI_API_ENDPOINT,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    streaming=streaming)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. The topics are around IT related issue troubleshooting such as networking, software, hardware."
            "{context}"
        ),
        ("placeholder", "{chat_history}"),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
while True:
    question = input("Human: ")
    if question == "exit":
        break
    print(f"Human: {question}")

    result = rag_chain.invoke({"input": question})
    metadata = result['context'][0].metadata
    filename = metadata['source'].split('/')[-1]
    page = metadata['page']

    print(f"AI: {result['answer']}")
    print(f"Source: {filename} (Page {page})")

## Assignment 2: Building Chatbot (mandatory)
- You are requested to build a chatbot solution for customer support scenario using Conversational ReAct agent supported in LangChain
- The chatbot is able to support user to answer FAQs in the sample BonBon FAQ.pdf file.
- The chatbot should use Azure Open AI GPT-3.5 LLM as the reasoning engine.
- The chatbot should be context aware, meaning that it should be able to chat with users in the conversation manner.
- The agent is equipped the following tools:
  - Internet Search: Help the chatbot automatically find out more about something using Duck Duck Go internet search
  - Knowledge Base Search: Help the chatbot to lookup information in the private knowledge base
- In case user asks for information related to topics in the BonBon FAQ.pdf file such as internet connection, printer, malware issues the chatbot must use the private knowledge base, otherwise it should search on the internet to answer the question.
- In the answer of chatbot, it should mention the source file and the page that the answer belongs to, for example the answer should mention "BonBon FQA.pdf (page 2)"

In [None]:
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.memory import ConversationBufferMemory
from langchain.tools.retriever import create_retriever_tool

from langchain.chains import LLMMathChain
from langchain.tools import Tool

llm_math = LLMMathChain.from_llm(llm, verbose=True)

tools = [
    create_retriever_tool(
        retriever=retriever,
        name="HelpDesk",
        description="Use this tool to answer relevant questions from user first.",
    ),
    DuckDuckGoSearchRun(description="Use this tool to search information in the Internet"),
    Tool(
        name="Calculator",
        func=llm_math.run,
        description="Use this tool for math calculating"
    ),
]

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        ("placeholder", "{chat_history}"),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}")
    ]
)

agent = create_tool_calling_agent(llm, tools, prompt)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=True)

In [None]:
while True:
    question = input("Human: ")
    if question == "exit":
        break

    if not streaming:
        result = agent_executor.invoke({"input": question})
        print(f"AI: {result['output']}")

    else:
        result = agent_executor.stream({"input": question})
        final_response = ""
        for item in result:
            text = item["output"]
            final_response += text

        print(f"AI: {final_response}")

## Assignment 3: Build a new assistant based on BonBon source code (optional)
The objective
- Run the code and index the sample BonBon FAQ.pdf file to Azure Cognitive Search
- Explore the code and implement a new assistant that has the same behavior as above
- Explore other features such as RBACs, features on admin portal

Please contact the training team in case you need to get the source code of BonBon.