In [1]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community import embeddings
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings

loader = PyPDFDirectoryLoader("data")
the_text = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(the_text)

vectorstore = Chroma.from_documents(
    documents=docs,
    collection_name="ollama_embeds",
    embedding=OllamaEmbeddings(model='nomic-embed-text'),
)

retriever = vectorstore.as_retriever()

In [2]:
from langchain_groq import ChatGroq
from env import groq_api_key
llm = ChatGroq(
            groq_api_key=groq_api_key,
            model_name='mixtral-8x7b-32768'
    )


#https://github.com/ollama/ollama

# from langchain_community.llms import Ollama

# llm = Ollama(model="llama2")

# llm.invoke("Tell me a joke")

In [3]:
rag_template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
rag_prompt = ChatPromptTemplate.from_template(rag_template)
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [4]:
import os
import time
import textwrap
import gradio as gr

# Test the architecture with a simple hard coded question
response = rag_chain.invoke("What is this document about")
print(textwrap.fill(response, width=80))

The document is about the Indian Space Research Organization's (ISRO)
Chandrayaan 3 mission, which is a follow-on mission to Chandrayaan 2. The
primary objective of Chandrayaan 3 is to land a lander and rover in the
highlands near the south pole of the Moon and demonstrate end-to-end landing and
roving capabilities. The mission comprises a lander/rover and a propulsion
module. The lander/rover is similar to the Vikram rover on Chandrayaan 2, with
improvements to ensure a safe landing. The propulsion module carries the lander
and rover configuration and will remain in orbit around the Moon while acting as
a communications relay satellite. Chandrayaan 3 will make several scientific
measurements on the lunar surface and from orbit, and it consists of an
indigenous propulsion module (PM), a lander module (LM), and a rover. The PM
carries the lander and rover from injection orbit to 100 km lunar orbit and
separates the LM from the PM. The LM and the Rover have scientific payloads to
carry o

In [5]:
# Make the questions dynamic using a chat interface. Let's use gradio for this.
def process_question(user_question):
    start_time = time.time()

    # Directly using the user's question as input for rag_chain.invoke
    response = rag_chain.invoke(user_question)

    # Measure the response time
    end_time = time.time()
    response_time = f"Response time: {end_time - start_time:.2f} seconds."

    # Combine the response and the response time into a single string
    full_response = f"{response}\n\n{response_time}"

    return full_response

# Setup the Gradio interface
iface = gr.Interface(fn=process_question,
                     inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
                     outputs=gr.Textbox(),
                     title="Personal Knowledge Chat App",
                     description="Ask any question about your document, and get an answer along with the response time.")

# Launch the interface
iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://4809b1dba260f61560.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


