# Chatbot

## Import Libraries

In [1]:
import openai
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.vectorstores import Chroma, DocArrayInMemorySearch
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
import panel as pn
import param
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


## Setting Up Model and Embeddings

In [2]:
api_key = open('../api_key.txt').read()

In [3]:
llm = ChatOpenAI(
    openai_api_key=api_key,
    temperature=0
)

In [4]:
embedding = OpenAIEmbeddings(openai_api_key=api_key)

## Retrieval

In [5]:
persist_directory = 'db/chroma/'

In [6]:
vector_db = Chroma(
    persist_directory=persist_directory, 
    embedding_function=embedding
)

In [7]:
question = "What are the major topics for this class?"

In [8]:
docs = vector_db.similarity_search(
    query=question, 
    k=3
)

In [9]:
len(docs)

3

## Prompt

In [10]:
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, \
just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise \
as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)

In [11]:
question = "Is probability a class topic?"

In [12]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vector_db.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [13]:
result = qa_chain({"query": question})
result["result"]

'Yes, probability is a topic that will be covered in the class. Thanks for asking!'

## Memory

In [14]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

## Conversational Retrieval Chain

In [15]:
qa = ConversationalRetrievalChain.from_llm(
    llm=llm, 
    retriever=vector_db.as_retriever(), 
    memory=memory
)

In [16]:
question = "Is probability a class topic?"
result = qa({"question": question})
result["answer"]

'Yes, probability is a topic that will be covered in this class. The instructor assumes familiarity with basic probability and statistics, so it is expected that students have prior knowledge in this area.'

In [17]:
question = "Why are those prerequesites needed?"
result = qa({"question": question})
result["answer"]

'The reason for needing those prerequisites is that the class assumes a basic knowledge of computer science, computer skills and principles, as well as familiarity with probability and statistics, and basic linear algebra. These prerequisites are necessary to understand and apply the concepts and techniques taught in the class effectively.'

## Creating a Chatbot

In [18]:
def load_db(file, chain_type, k):
    # load documents
    loader = PyPDFLoader(file)
    documents = loader.load()
    
    # split documents
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, 
        chunk_overlap=150
    )
    docs = text_splitter.split_documents(documents=documents)
    
    # define embedding
    embeddings = OpenAIEmbeddings(openai_api_key=api_key)
    
    # create vector database from data
    db = DocArrayInMemorySearch.from_documents(
        documents=docs, 
        embedding=embeddings
    )

    # define retriever
    retriever = db.as_retriever(
        search_type="similarity", 
        search_kwargs={"k": k}
    )
    
    # create a chatbot chain. Memory is managed externally.
    qa = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(openai_api_key=api_key, temperature=0), 
        chain_type=chain_type, 
        retriever=retriever,
        return_source_documents=True,
        return_generated_question=True,
    )
    
    return qa 

### Panel Chatbot

In [19]:
class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query  = param.String("")
    db_response = param.List([])
    
    def __init__(self,  **params):
        super(cbfs, self).__init__( **params)
        self.panels = []
        self.loaded_file = "documents/MachineLearning-Lecture01.pdf"
        self.qa = load_db(self.loaded_file,"stuff", 4)
    
    def call_load_db(self, count):
        if count == 0 or file_input.value is None:  # init or no file specified :
            return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
        else:
            file_input.save("temp.pdf")  # local copy
            self.loaded_file = file_input.filename
            button_load.button_style="outline"
            self.qa = load_db("temp.pdf", "stuff", 4)
            button_load.button_style="solid"
        self.clr_history()
        return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")

    def convchain(self, query):
        if not query:
            return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
        result = self.qa({"question": query, "chat_history": self.chat_history})
        self.chat_history.extend([(query, result["answer"])])
        self.db_query = result["generated_question"]
        self.db_response = result["source_documents"]
        self.answer = result['answer'] 
        self.panels.extend([
            pn.Row('User:', pn.pane.Markdown(query, width=600)),
            pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
        ])
        inp.value = ''  #clears loading indicator when cleared
        return pn.WidgetBox(*self.panels,scroll=True)

    @param.depends('db_query ', )
    def get_lquest(self):
        if not self.db_query :
            return pn.Column(
                pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
                pn.Row(pn.pane.Str("no DB accesses so far"))
            )
        return pn.Column(
            pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
            pn.pane.Str(self.db_query )
        )

    @param.depends('db_response', )
    def get_sources(self):
        if not self.db_response:
            return 
        rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    @param.depends('convchain', 'clr_history') 
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
        rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    def clr_history(self,count=0):
        self.chat_history = []
        return 

In [20]:
cb = cbfs()

file_input = pn.widgets.FileInput(accept='.pdf')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory.on_click(cb.clr_history)
inp = pn.widgets.TextInput( placeholder='Enter text here…')

bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp) 

jpg_pane = pn.pane.Image( './img/convchain.jpg')

tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation,  loading_indicator=True, height=300),
    pn.layout.Divider(),
)

tab2= pn.Column(
    pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    pn.panel(cb.get_sources ),
)

tab3= pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)

tab4=pn.Column(
    pn.Row( file_input, button_load, bound_button_load),
    pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
    pn.layout.Divider(),
    pn.Row(jpg_pane.clone(width=400))
)

dashboard = pn.Column(
    pn.Row(pn.pane.Markdown('# Chat with Your Data')),
    pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
)

In [21]:
# dashboard.show()

### Gradio Chatbot

In [22]:
def qa_bot(question, context):
    
    vector_db = Chroma(
        persist_directory=persist_directory, 
        embedding_function=embedding
    )
    
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True
    )
    
    retriever = vector_db.as_retriever()
    
    qa = ConversationalRetrievalChain.from_llm(
        llm=llm, 
        retriever=retriever, 
        memory=memory
    )
    
    response = qa(question)
    
    return response['answer']

In [23]:
# question = "Is Probability a class topic?"
# response = qa_bot(question)
# print(response)

In [24]:
# question = "Why are those prerequesites needed?"
# response = qa_bot(question)
# print(response)

In [25]:
chatbot = gr.ChatInterface(fn=qa_bot)

# Launch the Gradio app
chatbot.launch()