# ConversationalRetrievalChain using Memory
Implementation inspired from:
> https://python.langchain.com/docs/use_cases/chatbots

Some implementations based on this workaround:
> https://github.com/langchain-ai/langchain/issues/2303#issuecomment-1677280257 

which deals with *Memory* and *LLMChain* components in *ConversationalRetrievalChain*.

Retriever with threshold from SQLBasedChains.ipynb.

In [1]:
#### Load retriever vectorstore for similar questions and queries

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from chromadb.config import Settings
import chromadb


embedding_model = "uer/sbert-base-chinese-nli" 
persist_directory = "../vectorstore/"
score_threshold = 0.8
top_k = 3
CHROMA_SETTINGS = Settings(persist_directory=persist_directory, anonymized_telemetry=False)


embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
chroma_client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=persist_directory)
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS, client=chroma_client)


retriever = vectordb.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={
        "k": top_k, 
        "score_threshold" : score_threshold
    }
)

In [2]:
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


model_path = "../models/ggml-model-gpt4all-falcon-q4_0.bin"


llm = GPT4All(
    model=model_path, 
    max_tokens=1000,
    backend='gptj',
    n_batch=8, 
    callbacks=[StreamingStdOutCallbackHandler()], 
    verbose=False
)

Found model file at  ../models/ggml-model-gpt4all-falcon-q4_0.bin
falcon_model_load: loading model from '../models/ggml-model-gpt4all-falcon-q4_0.bin' - please wait ...
falcon_model_load: n_vocab   = 65024
falcon_model_load: n_embd    = 4544
falcon_model_load: n_head    = 71
falcon_model_load: n_head_kv = 1
falcon_model_load: n_layer   = 32
falcon_model_load: ftype     = 2
falcon_model_load: qntvr     = 0
falcon_model_load: ggml ctx size = 3872.64 MB
falcon_model_load: memory_size =    32.00 MB, n_mem = 65536
falcon_model_load: .

objc[57744]: Class GGMLMetalClass is implemented in both /opt/miniconda3/envs/gnn/lib/python3.11/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x28bcf4228) and /opt/miniconda3/envs/gnn/lib/python3.11/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x28bbbc228). One of the two will be used. Which one is undefined.


....................... done
falcon_model_load: model size =  3872.59 MB / num tensors = 196


In [3]:
from langchain.chains import LLMChain
from langchain.llms import FakeListLLM
from langchain.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    )
from langchain.memory import ConversationBufferMemory
from langchain.memory import ConversationSummaryBufferMemory


class NoOpLLMChain(LLMChain):
    def __init__(self):
        super().__init__(llm=FakeListLLM(responses=["FakeListLLM response"]), prompt=PromptTemplate(template="", input_variables=[]))
    
    def run(self, question: str, *args, **kwargs) -> str:
        return question


prompt = ChatPromptTemplate(messages=[
    SystemMessagePromptTemplate.from_template(
        "You are a converstional chatbot named Aida. "
        "Try to be friendly, and answer questions to be best ability as you can, "
        "and gives `I'don't know` as response if the question is out of your knowledge base." ),
    MessagesPlaceholder(variable_name="chat_history"),
    HumanMessagePromptTemplate.from_template("{question}"),
])

# memory = ConversationSummaryBufferMemory(
#     llm=llm, 
#     max_token_limit=2000, 
#     memory_key="chat_history",
#     output_key='answer',
#     return_messages=True,
#     verbose=True
# )

memory = ConversationBufferMemory(
    memory_key="chat_history",
    output_key="answer",
    return_messages=True,
)

---
## LLMChain
A gentle transition to ConversationalRetrievalChain.

In [None]:
from langchain.chains import LLMChain


conversation = LLMChain(
    llm=llm,
    prompt=prompt,
    memory=memory,
    verbose=True,
)

In [None]:
conversation.invoke({"question": "What is your name, my dear?"})

In [None]:
conversation({"question": "What's the meaning of that?"})

In [None]:
conversation({"question": "What are some of these cultures?"})

In [None]:
conversation({"question": "Your name is Aida, not Aidan."})

---
## ConversationalRetrievalChain

In [11]:
from langchain.chains import ConversationalRetrievalChain


qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    chain_type="stuff",
    memory=memory,
    retriever=retriever,
    # get_chat_history=lambda h: h,
    verbose=True,
    return_source_documents=True,
)

qa.question_generator = NoOpLLMChain()

modified_template = """
Use the following pieces of context to answer the users question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
{context}
Question:
{question}

Chat History:
{chat_history}
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(modified_template)
qa.combine_docs_chain.llm_chain.prompt.template = modified_template

# add chat_history as a variable to the llm_chain's ChatPromptTemplate object
qa.combine_docs_chain.llm_chain.prompt.input_variables = ['context', 'question', 'chat_history']

In [9]:
print(qa.combine_docs_chain.llm_chain.prompt.template)


Use the following pieces of context to answer the users question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
{context}

Chat History:
{chat_history}



In [12]:
question0 = "Hi, how are you my friend?"
result0 = qa({"question": question0})
result0



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Use the following pieces of context to answer the users question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------

Question:
Hi, how are you my friend?

Chat History:

[0m

Answer:
I am good thank you! How are you today?
[1m> Finished chain.[0m

[1m> Finished chain.[0m


{'question': 'Hi, how are you my friend?',
 'chat_history': [HumanMessage(content='Hi, how are you my friend?', additional_kwargs={}, example=False),
  AIMessage(content='\nAnswer:\nI am good thank you! How are you today?', additional_kwargs={}, example=False)],
 'answer': '\nAnswer:\nI am good thank you! How are you today?',
 'source_documents': []}

In [13]:
question = "What is your name and what is its meaning?"
result = qa({"question": question})
result





[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Use the following pieces of context to answer the users question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------

Question:
What is your name and what is its meaning?

Chat History:

Human: Hi, how are you my friend?
Assistant: 
Answer:
I am good thank you! How are you today?
[0m

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Assistant:

Ass

{'question': 'What is your name and what is its meaning?',
 'chat_history': [HumanMessage(content='Hi, how are you my friend?', additional_kwargs={}, example=False),
  AIMessage(content='\nAnswer:\nI am good thank you! How are you today?', additional_kwargs={}, example=False),
  HumanMessage(content='What is your name and what is its meaning?', additional_kwargs={}, example=False),
  AIMessage(content='\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssi

In [14]:
result.get("chat_history")

[HumanMessage(content='Hi, how are you my friend?', additional_kwargs={}, example=False),
 AIMessage(content='\nAnswer:\nI am good thank you! How are you today?', additional_kwargs={}, example=False),
 HumanMessage(content='What is your name and what is its meaning?', additional_kwargs={}, example=False),
 AIMessage(content='\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\nAssistant:\n\

In [None]:
question2 = "Help me get an as beautiful and meaingful of a name as yours, please."
result2 = qa({"question": question2})
result2

In [None]:
result2.get("chat_history")