## RAG using Chromadb and hf

In [None]:
!pip install -q chromadb langchain pypdf tiktoken
!pip install -q -U langchain-huggingface
!pip install -q -U langchain-community
!pip install -q langchain-chroma
!pip install -q sentence_transformers
!pip install PyPDF2

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.6/67.6 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.1/141.1 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.9/71.9 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.6/53.6 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m8.0 MB/s[0m eta [36

In [None]:
from PyPDF2 import PdfReader
from langchain.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_chroma import Chroma
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import RetrievalQA
import os
import textwrap
import numpy as np

In [None]:
from google.colab import userdata

### Main

**Edit document filepath as needed**

In [None]:
def process_documents(file_path):
    """
    Process a PDF document to extract text chunks, compute embeddings, and create a retriever function.

    Args:
    - file_path (str): Path to the PDF file to be processed.

    Returns:
    - retriever: A retriever function that performs similarity search and returns top-3 results.
    """
    #load file and chunk
    loader = PyPDFLoader(file_path)
    # pages = loader.load_and_split()
    pages_chunk = loader.load()

    # Define text splitter parameters and split documents into chunks of text
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 700,
        chunk_overlap  = 50,
    )
    texts = text_splitter.split_documents(pages_chunk)

    #create embeddings from chunked documents
    embedding_fn = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    #store embeddings using chroma db
    db = Chroma.from_documents(pages_chunk,embedding_fn)

    #retriever function which gives top-3 outputs using similarity search
    retrieved_search = db.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 3},
    )

    return retrieved_search

In [None]:
retriever = process_documents("/content/warranty-w11655299-revA.pdf")



In [None]:
def llm_query(query):
    """
    Query a language model endpoint and format the response.

    Args:
    - query (str): The query string to send to the language model.

    Returns:
    - wrapped_text (str): Formatted text response from the language model endpoint.
    """

  repo_id = 'mistralai/Mistral-7B-Instruct-v0.2'

 # Set the Hugging Face API token from user data
  os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN")

# Initialize the Hugging Face endpoint with specified parameters
  llm = HuggingFaceEndpoint(
      repo_id=repo_id,
      temperature=0.5,
      huggingfacehub_api_token=os.environ["HF_TOKEN"],
  )

  # Initialize the RetrievalQA chain with the language model and retriever
  chain =  RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=True, retriever=retriever)

  # Query the chain with the provided query
  result=chain(query, return_only_outputs=True)
  wrapped_text = textwrap.fill(result['result'], width=500)

  return wrapped_text

In [None]:
response = llm_query(query)
response

In [None]:
# from langchain.vectorstores import Chroma

### Gradio

Run demo to interact


In [None]:
!pip install -q gradio

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m46.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.2/318.2 kB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m74.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.9/129.9 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for ffmpy (setup.py) ... [?25l[?25hdone


In [None]:
import gradio as gr

In [None]:
demo = gr.Interface(
  fn=llm_query,
  inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
  outputs="text",
  title="RAG demo",
)

# Launch the Gradio app
demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://157700d59dc1db858e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


