In [1]:
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import (
    StreamlitChatMessageHistory,
)
from langchain.memory import ConversationSummaryMemory

from langchain_core.prompts import ChatPromptTemplate
from langchain.retrievers import MultiVectorRetriever
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever

from langchain.storage._lc_store import create_kv_docstore
from langchain.storage import InMemoryStore, LocalFileStore


from langchain_community.vectorstores import Chroma
from langchain.docstore.document import Document



import sys
import os
from typing import List
from utils import language_detection_chain, text_embedding_3large, azure_openai
from dotenv import load_dotenv, find_dotenv
__import__('pysqlite3')
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

load_dotenv(find_dotenv(".env"))
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")


  warn_deprecated(
  warn_deprecated(


In [2]:
system_prompt = """
As an AI assistant specializing in student support, your task is to provide concise and comprehensive answers to specific questions based on the provided context. 
The context is a list of sources. Each source includes source name and information.
You MUST follow instruction deliminated by ###.

###
Instructions:

1. Begin by reading the context carefully.
2. Answer the question based on the information in the context.
3. If you don’t know the answer, say "Sorry, the documents do not mention about this information. Please contact the Student Information Office via studentservice@buv.edu.vn for further support. Thank you". Do not fabricate responses. And Do not make up references
4. Keep your answer as succinct as possible, but ensure it includes all relevant information from the context. For examples: 
    - if students ask about a department or services, you should answer not only department name or serivec name, but also service link and department contact such as email, phone, ... if those information have in the context. 
    - if context does not have specific answer, but contain reference information such as reference link, reference contact point, support contact point and so on. Then you should show it up.
    - if context contains advices for specific student's action, you should show it up.
5. Always include the source name from the context for each fact you use in the response in the following format: 
```
{{Answer here}} 

Sources:
- Source name 1
- Source name 2
....
- Source name n
```
### 

--- Start Context:
{context}
--- End Context

Note that if the previous conversations contains usefull information, you can response based on provided context and those information too. 
Only answer in English.
"""


contextualize_q_system_prompt = (
    """
As an expert in natural language processing, your task is to transform a given student's question, which may reference prior chat history, into a standalone question that can be understood without any context from the chat history.
Do not answer the question, simply reformulate it if necessary.
Because If you change the question a little bit, It can lead the question to have the different meaning and lead to bot answer incorrectly.
So You Must Prioritize returning the latest question as it is, and only reformulate it if absolutely necessary.
Sometimes if students just say somethings and can be understood without context, not change it to the question, just keep it as it is.
"""
)


In [3]:
demo_ephemeral_chat_history = StreamlitChatMessageHistory(
    key="su_follow_up_memory")

def trim_messages(chain_input):
    stored_messages = demo_ephemeral_chat_history.messages
    if len(stored_messages) <= 2:
        return False

    demo_ephemeral_chat_history.clear()

    for message in stored_messages[-2:]:
        demo_ephemeral_chat_history.add_message(message)

    return True


def format_docs_with_sources(docs: List[Document]) -> str:
    formatted = []
    for i, doc in enumerate(docs):
        doc_str = f"""\
        Source Name: {doc.metadata['file_name']} - Page {doc.metadata['page']}
        Information: {doc.page_content}
        """
        formatted.append(doc_str)
    return "\n\n".join(formatted)

vectorstore_chunk_zie_400 = Chroma(
    persist_directory="./processed_data/chroma_db/su_embedding_400_large_with_source", embedding_function=text_embedding_3large
)

fs = LocalFileStore(
    "./processed_data/parent_document_store/su_embedding_large_with_source")
store = create_kv_docstore(fs)
parent_document_retriever = MultiVectorRetriever(
    vectorstore=vectorstore_chunk_zie_400,
    docstore=store,
    search_kwargs={"k": 2},
)


contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)



parent_document_with_history_aware_retriever = create_history_aware_retriever(
    azure_openai, parent_document_retriever, contextualize_q_prompt
)

custom_retriever_chain = parent_document_with_history_aware_retriever | format_docs_with_sources


qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


rag_chain_with_parent_retriever_with_sources = (
    RunnablePassthrough.assign(context=custom_retriever_chain)
    | qa_prompt
    | azure_openai
    | StrOutputParser()
)



  warn_deprecated(


In [4]:
def chain_with_follow_up_function(message_history):
    chain_with_message_history = RunnableWithMessageHistory(
        rag_chain_with_parent_retriever_with_sources,
        lambda session_id: message_history,
        input_messages_key="input",
        history_messages_key="chat_history",
    )
    chain_with_follow_up = (
        RunnablePassthrough.assign(messages_trimmed=trim_messages)
        | chain_with_message_history
    )
    return chain_with_follow_up