In [5]:
!pip install groq langchain langchain-core langchain-groq chromadb pypdf gradio





In [6]:
# !curl https://ollama.ai/install.sh | sh

After installing Ollama per the above cell, open the notebook terminal and start ollama by executing `ollama serve`. The following cells with ollama commands ***will only work*** if ollama is running!

In [7]:
!ollama pull nomic-embed-text

In [8]:
from langchain_groq import ChatGroq
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community import embeddings
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFDirectoryLoader
# from google.colab import userdata
import os
import time
import textwrap
import gradio as gr

In [9]:
loader = PyPDFDirectoryLoader("source_data")
the_text = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(the_text)
print(f"Loaded {len(the_text)} documents.")
print(f"Split into {len(chunks)} chunks.")

Loaded 352 documents.
Split into 1606 chunks.


In [10]:
vectorstore = Chroma.from_documents(
    documents=chunks,
    collection_name="ollama_embeds",
    embedding=embeddings.ollama.OllamaEmbeddings(model='nomic-embed-text'),
)
retriever = vectorstore.as_retriever()

  embedding=embeddings.ollama.OllamaEmbeddings(model='nomic-embed-text'),


In [11]:
from dotenv import load_dotenv
load_dotenv()

# Function to initialize conversation chain with GROQ language model
groq_api_key = os.environ['GROQ_API_KEY']

In [12]:
llm = ChatGroq(
            groq_api_key=groq_api_key,
            model_name='mixtral-8x7b-32768'
    )

In [13]:
rag_template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
rag_prompt = ChatPromptTemplate.from_template(rag_template)
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [14]:
# Test the architecture with a simple hard coded question
response = rag_chain.invoke("What are these documents about")
print(textwrap.fill(response, width=80))

The first document (source\_data\\aapl 10k 2024.pdf, page 95) appears to be a
section of a legal agreement related to a participant's award under a plan,
which may involve the holding of shares on the participant's behalf by a broker
or other third party. The document also includes information about notice
requirements for communication between the parties.  The second document
(source\_data\\tsla-20231231-gen.pdf, page 96) includes references to various
items that will be included and incorporated by reference in the company's 2024
Proxy Statement, including information about certain relationships and related
transactions, director independence, and principal accountant fees and services.
The third document (source\_data\\aapl 10k 2024.pdf, page 72) defines terms
related to the 3.050% 2029 Notes and the 2042 Notes, including what is meant by
"U.S. government obligations" in this context.  The fourth document
(source\_data\\tsla-20231231-gen.pdf, page 2) appears to be a table of conten

In [15]:
# Make the questions dynamic using a chat interface. Let's use gradio for this.
def process_question(user_question):
    start_time = time.time()

    # Directly using the user's question as input for rag_chain.invoke
    response = rag_chain.invoke(user_question)

    # Measure the response time
    end_time = time.time()
    response_time = f"Response time: {end_time - start_time:.2f} seconds."

    # Combine the response and the response time into a single string
    full_response = f"{response}\n\n{response_time}"

    return full_response

# Setup the Gradio interface
iface = gr.Interface(fn=process_question,
                     inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
                     outputs=gr.Textbox(),
                     title="GROQ CHAT",
                     description="Ask any question about your document, and get an answer along with the response time.")

# Launch the interface
iface.launch(share = True)

* Running on local URL:  http://127.0.0.1:7861

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


