# Build a Document Chat Bot using Langchain(RAG technique)-StreamLit(UI)

## Ingestion of Data to VectorDB - Pinecone 

In [None]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import ReadTheDocsLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore

load_dotenv()

# separators = ["\n\n", "\n", " ", ""])  --> recursively
def ingest_docs():
    file_path = os.path.abspath("./data/langchain-docs-old")
    embedding = OpenAIEmbeddings()
    index_name = os.environ.get("INDEX_NAME")

    #Document loader
    loader = ReadTheDocsLoader(path=file_path, encoding="UTF-8")
    raw_document = loader.load()

    #initializing text splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        separators=["\n\n", "\n", " ", ""])

    documents = text_splitter.split_documents(documents=raw_document)
    print(f"Splitted into {len(documents)} chunks")

    print(f"Inserting {len(documents)} chunks to pinecone...")
    PineconeVectorStore.from_documents(documents, index_name=index_name, embedding=embedding)
    print(f"########---Embedded {len(documents)} chunks to pinecone---#######")




# ingest_docs()

    # llm = ChatOpenAI(temperature=0,max_tokens="50")
    # llm.invoke("what is langchain? explain in simple terms!")

## Core LLM Function Call with query and Chat History

![image1](./images/conversational_retrieval_chain.png)

In [1]:
# filePath --> package(backend/core.py)

import os

from langchain import hub
from dotenv import load_dotenv
from typing import Any,List,Tuple
from langchain_pinecone import PineconeVectorStore
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.chains.retrieval import create_retrieval_chain
from  langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
import pprint as pp


load_dotenv()
def run_llm(query:str,chat_history:List[Tuple]=[]) -> Any:
    # Initializing
    llm = ChatOpenAI(temperature=0)
    embedding = OpenAIEmbeddings()
    retrieval_prompt = hub.pull('langchain-ai/retrieval-qa-chat')
    rephrase_prompt = hub.pull("langchain-ai/chat-langchain-rephrase")

    # Initializing VectorDB
    vector_store = PineconeVectorStore.from_existing_index(index_name=os.environ["CHAT_ASSISTANT_PINECONE_INDEX_NAME"],embedding=embedding)

    # Chains
    combine_docs_chain = create_stuff_documents_chain(llm,retrieval_prompt)

    # LLM Build's the query from the provided (query+chatHistory) --> summarizing the chat history and build a query based on it
    history_aware_retreiver_chain = create_history_aware_retriever(
        llm, vector_store.as_retriever(), rephrase_prompt
    )

    # retrieval_chain = create_retrieval_chain(retriever=vector_store.as_retriever(),combine_docs_chain=combine_docs_chain)
    
    question_answer_chain = create_retrieval_chain(
                                             history_aware_retreiver_chain,
                                             combine_docs_chain=combine_docs_chain)

    result = question_answer_chain.invoke({"input": query, "chat_history": chat_history})

    return result



# ----- Call's 
# chat_history = [("human","who created langchain?"),("ai","harrison chase created it!")]
# run_llm("tell me more about him", chat_history)
pp.pprint(run_llm("who built the langchain?"))
    

{'answer': 'LangChain was built by Harrison Chase.',
 'chat_history': [],
 'context': [Document(page_content='Get Started with LangChain in Node.js by Developers Digest\nLangChain + OpenAI tutorial: Building a Q&A system w/ own text data by Samuel Chan\nLangchain + Zapier Agent by Merk\nConnecting the Internet with ChatGPT (LLMs) using Langchain And Answers Your Questions by Kamalraj M M\nBuild More Powerful LLM Applications for Business’s with LangChain (Beginners Guide) by No Code Blackbox\nprevious\nTracing\n Contents\n  \nIntroduction to LangChain with Harrison Chase, creator of LangChain\nTutorials\nVideos (sorted by views)\nBy Harrison Chase\n    \n      © Copyright 2023, Harrison Chase.\n      \n  Last updated on Apr 25, 2023.', metadata={'source': 'D:\\Python\\LLM\\doc-assistant\\data\\langchain-docs-old\\langchain.readthedocs.io\\en\\latest\\youtube.html'}),
             Document(page_content="by Sam, a successful entrepreneur, who is working on a hackathon project with Deven 

## Connecting the LLM with streamlit UI  (streamlit run main.py)

In [None]:
# filePath: main.py
# run using streamlit run main.py --> serves a webpage

import streamlit as st
from streamlit_chat import message

# own import
# from backend.core import run_llm


# streamlit will always rerun the UI .... creating session_state --> helps to maintain the data on the UI bewteen re-renders 
if "user_prompt_history" not in st.session_state:
    st.session_state["user_prompt_history"] = []

if "chat_answer_history" not in st.session_state:
    st.session_state["chat_answer_history"] = []

if "chat_history" not in st.session_state:
    st.session_state["chat_history"] = []


st.header("Langchain(v1) -- Documentation Helper Bot")
prompt = st.text_input("Prompt",placeholder="Enter your prompt here..")

if prompt:
    with st.spinner("Generating responses..."):
        # import time
        # time.sleep(3)
        generated_response = run_llm(query=prompt, chat_history = st.session_state["chat_history"])
        formatted_answer = f"\n{generated_response["answer"]}"
        st.session_state["user_prompt_history"].append(prompt)
        st.session_state["chat_answer_history"].append(formatted_answer)
        for x in [("human", prompt), ("ai", formatted_answer)]:
            st.session_state["chat_history"].append(x)
        print(generated_response)

if st.session_state["chat_answer_history"]:
    for user_query, generated_response in zip(st.session_state["user_prompt_history"], st.session_state["chat_answer_history"]):
        message(user_query, is_user=True)
        message(generated_response)







#### Initializing Streamlit -- not working -- run in command line with correct Virtual environments

In [None]:
# pip install streamlit_jupyter

import streamlit as st

from streamlit_jupyter import StreamlitPatcher, tqdm

StreamlitPatcher().jupyter()  # register streamlit with jupyter-compatible wrappers
st.header("Title from streamlit")