
## Q&A ChatBot with LangChain

In [1]:
#!pip install langchain openai chromadb tiktoken pypdf panel

#### Needed Libraries

In [2]:
import openai
import os 
import panel as pn
import tempfile

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

import panel as pn
import param
pn.extension('texteditor', template="bootstrap", sizing_mode='stretch_width')

#### Hard-coded values

In [None]:
os.environ['OPENAI_API_KEY'] = 'MY_OPENAI_API_KEY'
openai.api_key = os.environ['OPENAI_API_KEY']
llm_name = "gpt-3.5-turbo"
file = 'machinelearning-lecture01.pdf'
chain_type = "map_reduce"
persist_directory = 'docs/chroma/'
k = 5

#### ChatBot

In [4]:
def q_and_a(file, chain_type, k):
    # load documents
    loader = PyPDFLoader(file)
    documents = loader.load()

    # split documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    chunks = text_splitter.split_documents(documents)

    # define embedding
    embeddings = OpenAIEmbeddings()

    # create vector database from data
    try:
        db
    except NameError:
        db = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=persist_directory)
        db.persist()
    db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

    # define retriever
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})

    # define memory
    memory = ConversationBufferMemory(memory_key="history", input_key='question', output_key='answer', return_messages=False)

    # create a chatbot chain
    qa = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(temperature=0.2, model_name=llm_name),
        chain_type=chain_type,
        retriever=retriever,
        get_chat_history=lambda o:o,
        memory=memory,
        return_source_documents=True,
        return_generated_question=True,
        verbose=False,
    )
    return qa

#### User Interface with Panel

In [5]:
class ChatWithYourPDF(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query  = param.String("")
    db_response = param.List([])
    
    def __init__(self,  loaded_file="MachineLearning-Lecture01.pdf", chain_type="stuff", retrieved_k=5, **params):
        super(ChatWithYourPDF, self).__init__( **params)
        self.panels = []
        self.loaded_file = loaded_file
        self.chain_type = chain_type
        self.k = retrieved_k
        self.qa = q_and_a(self.loaded_file, self.chain_type, self.k)
    
    def call_llm(self, count):
        if count == 0 or file_input.value is None:  # init or no file specified :
            return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
        else:
            file_input.save("temp.pdf")  # local copy
            self.loaded_file = file_input.filename
            button_load.button_style="outline"
            self.qa = q_and_a("temp.pdf", self.chain_type, self.k)
            button_load.button_style="solid"
        self.clr_history()
        return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")

    def convchain(self, query):
        if not query:
            return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
        result = self.qa.invoke({"question": query, "chat_history": self.chat_history})
        self.chat_history.extend([(query, result["answer"])])
        self.db_query = result["generated_question"]
        self.db_response = result["source_documents"]
        self.answer = result['answer'] 
        self.panels.extend([
            pn.Row('User:', pn.pane.Markdown(query, width=600)),
            pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
        ])
        inp.value = ''  #clears loading indicator when cleared
        return pn.WidgetBox(*self.panels,scroll=True)

    @param.depends('db_query ', )
    def get_lquest(self):
        if not self.db_query :
            return pn.Column(
                pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
                pn.Row(pn.pane.Str("no DB accesses so far"))
            )
        return pn.Column(
            pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
            pn.pane.Str(self.db_query )
        )

    @param.depends('db_response', )
    def get_sources(self):
        if not self.db_response:
            return 
        rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    @param.depends('convchain', 'clr_history') 
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
        rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    def clr_history(self,count=0):
        self.chat_history = []
        return 

#### Interact with the ChatBot

In [6]:
cb = ChatWithYourPDF()

file_input = pn.widgets.FileInput(accept='.pdf')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory.on_click(cb.clr_history)
inp = pn.widgets.TextInput( placeholder='Enter text here…')

bound_button_load = pn.bind(cb.call_llm, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp) 

#jpg_pane = pn.pane.Image( './img/convchain.jpg')

tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation,  loading_indicator=True, height=600),
    pn.layout.Divider(),
)
tab2 = pn.Column(
    pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    pn.panel(cb.get_sources ),
)
tab3 = pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)
tab4 = pn.Column(
    pn.Row( file_input, button_load, bound_button_load),
    pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
    pn.layout.Divider(),
    #pn.Row(jpg_pane.clone(width=400))
)
dashboard = pn.Column(
    pn.Row(pn.pane.Markdown('# ChatWithYourPDF')),
    pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
)
dashboard

#### Acknowledgments

Panel-based chatbot inspired by Sophia Yang (see [here](https://github.com/sophiamyang/tutorials-LangChain)), LangChain (see [here](https://python.langchain.com/docs/get_started/introduction)) and DeepLearning.AI (see [here](https://www.deeplearning.ai/)). Note that some `pdf` come from the MIT OpenCourseWare (see [here](https://ocw.mit.edu/)).