In [1]:
!pip install groq langchain langchain-core langchain-groq chromadb pypdf gradio sentence-transformers

Collecting groq
  Downloading groq-0.4.2-py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.7/65.7 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.1.14-py3-none-any.whl (812 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m812.8/812.8 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core
  Downloading langchain_core-0.1.37-py3-none-any.whl (274 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.6/274.6 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-groq
  Downloading langchain_groq-0.0.1-py3-none-any.whl (7.8 kB)
Collecting chromadb
  Downloading chromadb-0.4.24-py3-none-any.whl (525 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m525.5/525.5 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pypdf
  Downloading pypdf-4.1.0-py3-none-any.whl (286 kB)
[2K     [90m━━━━━━━━━━━

In [4]:
from langchain_groq import ChatGroq
from langchain_community.document_loaders import TextLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
# from langchain_community import embeddings
from langchain_core.prompts import ChatPromptTemplate
# from langchain_core.runnables import RunnablePassthrough
# from langchain_core.output_parsers import StrOutputParser
from google.colab import userdata
import os
import time
import textwrap
import gradio as gr

In [5]:
loader = PyPDFDirectoryLoader("/content/drive/MyDrive/PDF")
text = loader.load()

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(text)

In [9]:
from transformers import AutoModel
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cpu"}

embeddings_hf = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

In [10]:
vectorstore = Chroma.from_documents(
    documents = chunks,
    collection_name= "groq_embeds",
    embedding = embeddings_hf,
)

retriever = vectorstore.as_retriever()

In [11]:
from google.colab import userdata
import os
from langchain_groq import ChatGroq

os.environ["GROQ_API_KEY"] = userdata.get('groq_api_key')
llm = ChatGroq(temperature=0, model_name = "mixtral-8x7b-32768" )

In [12]:
from langchain.chains import RetrievalQA

rag_template = """Answer the question based only on the following context:
{context}
Question: {question}
"""

rag_prompt = ChatPromptTemplate.from_template(rag_template)
qa_chain = RetrievalQA.from_chain_type(
    llm, retriever=vectorstore.as_retriever(), chain_type_kwargs={"prompt": rag_prompt},
)

In [17]:
response = qa_chain.invoke("Who is the captian of Chennai superking?")

In [18]:
print(response)

{'query': 'Who is the captian of Chennai superking?', 'result': 'The captain of Chennai Super Kings is MS Dhoni. This is indirectly indicated in the context in the list of IPL winners where it mentions "Chennai Super King s 4 time s 2010, 2011, 2018, 2021" which implies that MS Dhoni, the captain of Chennai Super Kings, has led the team to four IPL titles in the years 2010, 2011, 2018, and 2021.'}


In [19]:
response['result']

'The captain of Chennai Super Kings is MS Dhoni. This is indirectly indicated in the context in the list of IPL winners where it mentions "Chennai Super King s 4 time s 2010, 2011, 2018, 2021" which implies that MS Dhoni, the captain of Chennai Super Kings, has led the team to four IPL titles in the years 2010, 2011, 2018, and 2021.'

In [20]:
def process_question(user_question):
  response = qa_chain.invoke(user_question)
  full_response = response['result']
  return full_response

In [21]:
interface = gr.Interface(fn= process_question,
                         inputs= gr.Textbox(lines=2, placeholder="Type your question here"),
                         outputs= gr.Textbox(),
                         title= "Groq Chatbot",
                         description="Ask any questio about your documents")

In [22]:
interface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://dcd767f24c2789def6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


