# ConversationalRetrievalChain using Memory
Implementation inspired from:
> https://python.langchain.com/docs/use_cases/chatbots

Some implementations based on this workaround:
> https://github.com/langchain-ai/langchain/issues/2303#issuecomment-1677280257 

which deals with *Memory* and *LLMChain* components in *ConversationalRetrievalChain*.

Retriever with threshold from SQLBasedChains.ipynb.

In [None]:
#### Load retriever vectorstore for similar questions and queries

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from chromadb.config import Settings
import chromadb


embedding_model = "uer/sbert-base-chinese-nli" 
persist_directory = "../vectorstore/"
score_threshold = 0.8
top_k = 3
CHROMA_SETTINGS = Settings(persist_directory=persist_directory, anonymized_telemetry=False)


embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
chroma_client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=persist_directory)
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS, client=chroma_client)


retriever = vectordb.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={
        "k": top_k, 
        "score_threshold" : score_threshold
    }
)

In [None]:
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


model_path = "../models/ggml-model-gpt4all-falcon-q4_0.bin"


llm = GPT4All(
    model=model_path, 
    max_tokens=1000,
    backend='gptj',
    n_batch=8, 
    callbacks=[StreamingStdOutCallbackHandler()], 
    verbose=False
)

In [None]:
from langchain.chains import LLMChain
from langchain.llms import FakeListLLM
from langchain.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    )
from langchain.memory import ConversationBufferMemory
from langchain.memory import ConversationSummaryBufferMemory


class NoOpLLMChain(LLMChain):
    def __init__(self):
        super().__init__(llm=FakeListLLM(responses=["FakeListLLM response"]), prompt=PromptTemplate(template="", input_variables=[]))
    
    def run(self, question: str, *args, **kwargs) -> str:
        return question


prompt = ChatPromptTemplate(messages=[
    SystemMessagePromptTemplate.from_template(
        "You are a converstional chatbot named Aida. "
        "Try to be friendly, and answer questions to be best ability as you can, "
        "and gives `I'don't know` as response if the question is out of your knowledge base." ),
    MessagesPlaceholder(variable_name="chat_history"),
    HumanMessagePromptTemplate.from_template("{question}"),
])

# memory = ConversationSummaryBufferMemory(
#     llm=llm, 
#     max_token_limit=2000, 
#     memory_key="chat_history",
#     output_key='answer',
#     return_messages=True,
#     verbose=True
# )

memory = ConversationBufferMemory(
    memory_key="chat_history",
    output_key="answer",
    return_messages=True,
)

---
## LLMChain
A gentle transition to ConversationalRetrievalChain.

In [None]:
from langchain.chains import LLMChain


conversation = LLMChain(
    llm=llm,
    prompt=prompt,
    memory=memory,
    verbose=True,
)

In [None]:
conversation.invoke({"question": "What is your name, my dear?"})

In [None]:
conversation({"question": "What's the meaning of that?"})

In [None]:
conversation({"question": "What are some of these cultures?"})

In [None]:
conversation({"question": "Your name is Aida, not Aidan."})

---
## ConversationalRetrievalChain

In [None]:
from langchain.chains import ConversationalRetrievalChain


qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    chain_type="stuff",
    memory=memory,
    retriever=retriever,
    # get_chat_history=lambda h: h,
    verbose=True,
    return_source_documents=True,
)

qa.question_generator = NoOpLLMChain()

modified_template = """
Use the following pieces of context to answer the users question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
{context}
Question:
{question}

Chat History:
{chat_history}
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(modified_template)
qa.combine_docs_chain.llm_chain.prompt.template = modified_template

# add chat_history as a variable to the llm_chain's ChatPromptTemplate object
qa.combine_docs_chain.llm_chain.prompt.input_variables = ['context', 'question', 'chat_history']

In [None]:
question0 = "Hi, how are you my friend?"
result0 = qa({"question": question0})
result0

In [None]:
question = "What is your name and what is its meaning?"
result = qa({"question": question})
result

In [None]:
result.get("chat_history")

In [None]:
question2 = "Help me get an as beautiful and meaingful of a name as yours, please."
result2 = qa({"question": question2})
result2

In [None]:
result2.get("chat_history")