# Build a RAG pipeline with Gemma 2b-it, Chroma and Haystack 2.x


<img src="https://huggingface.co/blog/assets/gemma/Gemma-logo-small.png" alt="gemma" width="200" style="display:inline;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<img src="https://haystack.deepset.ai/images/haystack-ogimage.png" alt="haystack logo" width="300" style="display:inline;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<img src="https://www.trychroma.com/chroma-logo.png" alt="chroma logo" width="200" style="display:inline;">


We will see what we can build with the new [Google Gemma open models](https://blog.google/technology/developers/gemma-open-models/) and the [Haystack LLM framework](https://haystack.deepset.ai/).

In [1]:
!pip install -q transformers==4.38.0 sentence-transformers
!pip install -q haystack-ai chroma-haystack PyPDF2

In [2]:
import pandas as pd

In [3]:
pd.set_option('display.max_colwidth', None)

## Initialize the LLM

In [4]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

We will load gemma-2b-it using the `HuggingFaceLocalGenerator` in a quantized version. For simplicity, we could also just call the model using the free Hugging Face Inference API with the `HuggingFaceTGIGenerator`.

In [None]:
from haystack.components.generators import HuggingFaceLocalGenerator
from haystack.components.embedders import SentenceTransformersTextEmbedder


gemma_gen = HuggingFaceLocalGenerator(
    model="google/gemma-2b-it",
    generation_kwargs={"max_new_tokens": 350}
    )

gemma_gen.warm_up()

## Prepare documents

In [None]:
!wget --user-agent "Case" "https://cases.justia.com/federal/district-courts/california/candce/3:2020cv06754/366520/813/0.pdf" -O "Google_V_Sonos.pdf"

In [None]:
import PyPDF2

pdf_file_path = "Google_V_Sonos.pdf"

def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            text += page.extract_text()

    return text

pdf_text = extract_text_from_pdf(pdf_file_path)

In [None]:
from haystack import Document

# Creating the Haystack Document
doc = Document(
    content=pdf_text,
    meta={"pdf_path": pdf_file_path}
)

## Build the indexing Pipeline

In [None]:
from haystack.components.fetchers import LinkContentFetcher
from haystack_integrations.document_stores.chroma import ChromaDocumentStore
from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever

In [None]:
from haystack import Pipeline
#from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.writers import DocumentWriter
from haystack.document_stores.types import DuplicatePolicy
from haystack.components.embedders import SentenceTransformersDocumentEmbedder

In [None]:
document_store = ChromaDocumentStore()
document_embedder = SentenceTransformersDocumentEmbedder("BAAI/bge-large-en-v1.5")

In [None]:
indexing = Pipeline()


indexing.add_component("cleaner", DocumentCleaner())
indexing.add_component("splitter", DocumentSplitter(split_by="word", split_length=300))
indexing.add_component("embedder", document_embedder)
indexing.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE))

indexing.connect("cleaner", "splitter")
indexing.connect("splitter", "embedder")
indexing.connect("embedder", "writer")

In [None]:
indexing.run({"cleaner":{"documents":[doc]}})

In [None]:
document_store.filter_documents()[0].content

In [None]:
len(document_store.filter_documents())

## Build the RAG Pipeline

In [None]:
from haystack.components.builders import PromptBuilder

prompt_template = """
<start_of_turn>user
Using the information contained in the context, give a comprehensive answer to the question.
If the answer is contained in the context, also report the source PAGE.
If the answer cannot be deduced from the context, do not give an answer.

Context:
  {% for doc in documents %}
  {{ doc.content }} PAGE:{{ doc.meta['page'] }}
  {% endfor %};
  Question: {{question}}<end_of_turn>

<start_of_turn>model
"""
prompt_builder = PromptBuilder(template=prompt_template)

In [None]:
gemma_rag = Pipeline()
gemma_rag.add_component("text_embedder",SentenceTransformersTextEmbedder("BAAI/bge-large-en-v1.5"))
gemma_rag.add_component("prompt_builder", prompt_builder)
gemma_rag.add_component("retriever", ChromaEmbeddingRetriever(document_store=document_store))
gemma_rag.add_component("llm", gemma_gen)

gemma_rag.connect("text_embedder.embedding", "retriever.query_embedding")
gemma_rag.connect("retriever.documents", "prompt_builder.documents")
gemma_rag.connect("prompt_builder.prompt", "llm.prompt")

In [None]:
def run_query_pipeline(question, llm):
    data = {
        "text_embedder": {"text": question},
        "retriever": {"top_k": 3},
        "prompt_builder": {"question": question},
        "llm": {"generation_kwargs": {"max_new_tokens": 350}}
    }
    result = llm.run(data)
    return result['llm']['replies'][0]

In [None]:
print(run_query_pipeline("Summarize what happened in Google v. Sonos", gemma_rag))

## Answer questions

In [None]:
def run_queries_on_multiple_llms(queries, llms):
    col_name = lambda i: f'LLM_{i+1}_Response'
    # Create an empty list to collect the results
    results = []

    # Iterate over each query in the list of queries
    for query in queries:
        # Create a dictionary to collect responses for the current query
        query_responses = {'Query': query}

        # Iterate over each LLM in the list of LLMs
        for i, llm in enumerate(llms):
            # Execute the query using the run_query_pipeline function and the current LLM
            response = run_query_pipeline(query, llm)

            # Add the response to the responses dictionary
            query_responses[col_name(i)] = response

        # Add the responses dictionary to the results DataFrame
        results.append(query_responses)

    # Return the complete results DataFrame
    cols = ['Query'] + [col_name(i) for i in range(len(llms))]
    results_df = pd.DataFrame(results, columns=cols)
    return results_df

In [None]:
questions ="""What specific patents does Sonos claim Google has infringed upon in their multi-room audio technology?
How has Google responded to the allegations of patent infringement made by Sonos?
What are the key legal arguments presented by Sonos in their lawsuit against Google?
Has the International Trade Commission (ITC) made any preliminary rulings or decisions regarding the patent infringement claims between Google and Sonos?
What are the potential implications for both Google and Sonos depending on the outcome of the legal dispute?""".split('\n')

In [None]:
rag_pipelines = [gemma_rag]  # Replace llm1, llm2... with actual language model instances

# Get the DataFrame with responses
results_df = run_queries_on_multiple_llms(questions, rag_pipelines)

In [None]:
results_df