In [1]:
import os
import dotenv
dotenv.load_dotenv()
groq_key = os.getenv("GROQ_API_KEY")
# open_api_key = os.getenv("OPENAI_API_KEY")

# Intialize vector database

In [17]:
import langchain_groq
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
# from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain import hub 
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
llm = langchain_groq.ChatGroq(model="llama3-8b-8192") # type: ignore

In [3]:
doc_path = "./documents/"
docs = []
for doc in [_ for _ in os.listdir(doc_path) if str(_).endswith('.pdf')]: 
    document = PyPDFLoader(doc_path+doc)
    docs.append(document.load())
for doc in [_ for _ in os.listdir(doc_path) if str(_).endswith('.csv')]:
    document = CSVLoader(doc_path + doc)
    docs.append(document.load())

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap = 200)
splits = []
for doc in docs:
    split = text_splitter.split_documents(doc)
    splits.append(split)
splits = sum(splits,[])

In [5]:
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embedding_function,
    # embedding_function = OpenAIEmbeddings(),
    )

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [6]:
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

In [7]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Creating response chains

## Simple response chain

In [18]:
rag_chain = (
    {
        "context": retriever | format_docs, "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)
rag_chain_docs = (
    RunnablePassthrough.assign(context = (lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)
rag_chain_source = RunnableParallel({
    'context':retriever,"question":RunnablePassthrough()
}).assign(answer=rag_chain_docs)

## Response chain with history

In [49]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

In [50]:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [51]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import HumanMessage, AIMessage

qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain_chat = create_retrieval_chain(history_aware_retriever, question_answer_chain)

# Chat Bots

## Simple response chat bot

In [44]:
import gradio as gr
def ask(prompt:str):
    resp = rag_chain_source.invoke(prompt)
    return resp["answer"],resp["context"]

demo = gr.Interface(fn = ask, inputs=["text"],outputs=["text","text"],) 
demo.launch()

## Chat bot with history

In [53]:
def chat(message,history):
    chat_history = []
    for human,ai in history:
        chat_history.extend([HumanMessage(content=human),AIMessage(content=ai)])
    resp = rag_chain_chat.invoke({"input":message,"chat_history":chat_history})
    return resp["answer"]

gr.ChatInterface(chat).launch()

Running on local URL:  http://127.0.0.1:7870

To create a public link, set `share=True` in `launch()`.


