# ***🦜 Chatbot - RAG OIC 🦜***


In [None]:
%%capture
!pip install pandas openpyxl langchain openai langchain-openai langchain-community langchain-core langchain-text-splitters
!pip install streamlit chromadb pypdf

In [None]:
%%writefile app_final_proyect.py
import os
import shutil
import streamlit as st
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate
)

os.environ["OPENAI_API_KEY"] = "<YOUR_API_KEY>"
save_rag_directory = "RAG"


def save_uploaded_files(uploaded_files):
    """
    Function that saves uploaded files in a specific directory and creates a document retriever.
    
    :param uploaded_files: list of files uploaded by the user.
    :param save_rag_directory: directory where the files will be saved.
    
    Returns: None    
    """
    if len(uploaded_files) > 0:
        os.makedirs(save_rag_directory, exist_ok=True)
        for uploaded_file in uploaded_files:
            bytes_data = uploaded_file.read()
            path_file = os.path.join(save_rag_directory, uploaded_file.name)
            with open(path_file, "wb") as f:
                f.write(bytes_data)
                st.write(f"Archivo {uploaded_file.name} guardado")

        retriever = create_retriever_documents()
        st.session_state["retriever"] = retriever
        st.session_state["rag"] = True
        st.rerun()
    else:
        st.rerun()

def chatbot_template(system_message, user_question):
    """
    Create a template to pass custom queries to LLM 
    :param user_question: Message user.
    :param system_message: Message to the system, like a rol.
    :return: Configured with the system message, chat history, and user question.
    """
    chat_prompt = ChatPromptTemplate(
        messages=[
            SystemMessagePromptTemplate.from_template(system_message),
            MessagesPlaceholder(variable_name="chat_history"),
            HumanMessagePromptTemplate.from_template(user_question)
        ]
    )
    return chat_prompt


def create_retriever_documents():
    """
    """
    loader = PyPDFDirectoryLoader(save_rag_directory)
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    openai_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
    documents = text_splitter.split_documents(docs)
    vectorstore = Chroma.from_documents(documents, embedding=openai_embeddings)
    retriever = vectorstore.as_retriever()

    return retriever


def stream_response_with_memory_openai(model_name, query, chat_history):
    """
    Streams a response for a given query using an OpenAI model, formatted to maintain conversation history.

    :param model_name: The name of the OpenAI model to use for generating responses.
    :param query: The current user question or input.
    :param chat_history: The history of previous interactions to be considered for context.
    :return: A streaming object that continuously provides the chatbot's responses.
    """
    system_prompt = """You are a friendly chatbot having a conversation with a human and giving answers only in \
    Spanish."""
    user_question = "{user_question}"
    prompt = chatbot_template(system_message=system_prompt, user_question=user_question)
    chat_openai = ChatOpenAI(model=model_name)

    chain = (
            {"chat_history": lambda x: chat_history, "user_question": RunnablePassthrough()}
            | prompt
            | chat_openai
            | StrOutputParser()
    )

    return chain.stream(query)


def stream_rag_with_memory_openai(model_name, query, chat_history, retriever):
    """
    Streams a response for a given query using an OpenAI model, formatted to maintain conversation history.

    :param model_name: The name of the OpenAI model to use for generating responses.
    :param query: The current user question or input.
    :param chat_history: The history of previous interactions to be considered for context.
    :return: A streaming object that continuously provides the chatbot's responses.
    """
    chat_openai = ChatOpenAI(model=model_name, temperature=0)

    system_rag_prompt_template = """You are a specialist in cadastre in the context of the country Colombia. \
    Answer the questions based only on the following context:
    {context}
    """
    user_question = "{user_question}"
    prompt = chatbot_template(system_message=system_rag_prompt_template, user_question=user_question)

    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    rag_chain = (
            {"context": retriever | format_docs, "chat_history": lambda x: chat_history, "user_question": RunnablePassthrough()}
            | prompt
            | chat_openai
            | StrOutputParser()
    )

    return rag_chain.stream(query)


# Interfaz de usuario #
st.title("🦜 Chatbot with RAG - OIC 🦜")

if "chat_active" not in st.session_state:
    with st.sidebar.expander(f"Seleccione modelo y cargue sus archivos."):
        model_openai = st.radio(
            "Seleccione su ChatModel:",
            ["gpt-3.5-turbo-0125", "gpt-4-0125-preview"],
            key="openai_chat_model"
        )
        st.session_state["openai_model"] = model_openai
        if os.path.exists(save_rag_directory):
            delete_dir = st.button(f"Borrar carpeta {save_rag_directory}")
            if delete_dir:
                shutil.rmtree(save_rag_directory)
                st.rerun()

        uploaded_files = st.file_uploader("Elige los archivos PDF para el RAG", accept_multiple_files=True,
                                          type=["pdf"])

        if st.button("Finalizar carga de archivos y Chatear"):
            st.session_state["chat_active"] = True
            save_uploaded_files(uploaded_files)

# =================================================================================== #
# ====   Creamos sección de chatbot con Memory en session_state de streamlit.  ====== #
# =================================================================================== #
if "chat_active" in st.session_state and "rag" in st.session_state:
    st.text(f"Archivos cargados para chatear:\n {', '.join(os.listdir(save_rag_directory))}")

# Iniciamos un chat history y con streamlit creamos la memoria al estilo buffer #
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

for message in st.session_state.chat_history:
    if isinstance(message, HumanMessage):
        with st.chat_message("Human"):
            st.markdown(message.content)
    else:
        with st.chat_message("AI"):
            st.markdown(message.content)

user_query = st.chat_input("Your message")

if user_query is not None and user_query != "":
    st.session_state.chat_history.append(HumanMessage(user_query))

    with st.chat_message("Human"):
        st.markdown(user_query)

    with st.chat_message("AI"):
        if os.path.exists(save_rag_directory) and "retriever" in st.session_state:
            llm_response = st.write_stream(
                stream_rag_with_memory_openai(
                    model_name=st.session_state["openai_model"],
                    query=user_query,
                    chat_history=st.session_state.chat_history,
                    retriever=st.session_state["retriever"]
                )
            )

        else:
            llm_response = st.write_stream(
                stream_response_with_memory_openai(
                    model_name=st.session_state["openai_model"],
                    query=user_query,
                    chat_history=st.session_state.chat_history)
            )

    st.session_state.chat_history.append(AIMessage(llm_response))

In [None]:
!npm install localtunnel

In [None]:
!streamlit run /content/app_final_proyect.py &>/content/logs.txt &

In [None]:
!npx localtunnel --port 8501