# Your first RAG application

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm 
import ollama 

In [2]:
# Obtain your embedding & LLM models (execute here or go to terminal)
!ollama pull mxbai-embed-large
!ollama pull mistral  

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 819c2adf5ce6: 100% ▕██████████████████▏ 669 MB                         [K
pulling c71d239df917: 100% ▕██████████████████▏  11 KB                         [K
pulling b837481ff855: 100% ▕██████████████████▏   16 B                         [K
pulling 38badd946f91: 100% ▕██████████████████▏  408 B                         [K
verifying sha256 digest [K
writing manifest [K
success [K[?25h[?2026l
[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l

In [3]:
# Check model availability 
!ollama list

NAME                        ID              SIZE      MODIFIED               
mistral:latest              f974a74358d6    4.1 GB    Less than a second ago    
mxbai-embed-large:latest    468836162de7    669 MB    Less than a second ago    


In [4]:
res = ollama.chat(model="mistral", 
            messages=[ {"role": "user", "content": "Tell me a joke about Data Scientists"}]
           )
res

ChatResponse(model='mistral', created_at='2025-05-02T15:47:33.237839Z', done=True, done_reason='stop', total_duration=5486703250, load_duration=12943084, prompt_eval_count=13, prompt_eval_duration=3082628125, eval_count=49, eval_duration=2390351875, message=Message(role='assistant', content=" Why don't Data Scientists play hide and seek with their data?\n\nBecause they always use find() function to search for it!\n\n(Apologies for the lame joke, I'll work on my humor.)", images=None, tool_calls=None))

In [5]:
print(res['message']['content'])

 Why don't Data Scientists play hide and seek with their data?

Because they always use find() function to search for it!

(Apologies for the lame joke, I'll work on my humor.)


# Set up your first RAG pipeline

In [6]:
import os
import ollama
import chromadb
from chromadb.utils.embedding_functions import DefaultEmbeddingFunction

In [7]:
# === Step 1: Setup ChromaDB ===
chroma_client = chromadb.Client()
collection    = chroma_client.get_or_create_collection(name="rag-docs")

In [8]:
# === Step 2: Load and Embed Documents ===
def embed_text(text):
    response = ollama.embed(model="mxbai-embed-large", input=text)
    return response["embeddings"][0]

In [9]:
# Sample docs (could also read from files)
documents = [
    "Jurgen Klopp was born in Germany in 1974. He has been a successful coach in the UK",
    "You can contact Sky customer support through the help portal or live chat.",
    "An apple a day keeps the doctor away"
]

In [10]:
for i, doc in tqdm(enumerate(documents)):
    embedding = embed_text(doc)
    collection.add(
        documents=[doc],
        embeddings=[embedding],
        ids=[f"doc-{i}"]
    )

3it [00:00,  6.75it/s]


In [11]:
# === Step 3: Accept User Query and Retrieve Relevant Docs ===
query = "Who was Jurgen Klopp?"

query_embedding = embed_text(query)
results = collection.query(query_embeddings=[query_embedding], n_results=3)

results 

{'ids': [['doc-0', 'doc-1', 'doc-2']],
 'embeddings': None,
 'documents': [['Jurgen Klopp was born in Germany in 1974. He has been a successful coach in the UK',
   'You can contact Sky customer support through the help portal or live chat.',
   'An apple a day keeps the doctor away']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None, None, None]],
 'distances': [[0.3875125050544739, 1.4115504026412964, 1.4422112703323364]]}

In [12]:
retrieved_docs = results["documents"][0]
context = "\n".join(retrieved_docs)

In [13]:
# === Step 4: Run RAG Prompt through Ollama LLM ===
prompt = f"""Answer the question using only the context below.

Context:
{context}

Question: {query}

Answer:"""

print(prompt)

Answer the question using only the context below.

Context:
Jurgen Klopp was born in Germany in 1974. He has been a successful coach in the UK
You can contact Sky customer support through the help portal or live chat.
An apple a day keeps the doctor away

Question: Who was Jurgen Klopp?

Answer:


In [14]:
response = ollama.chat(model="mistral", messages=[
    {"role": "user", "content": prompt}
])

print("RAG Answer:\n", response["message"]["content"])

RAG Answer:
  Jurgen Klopp is a German born individual who has been a successful coach in the UK.
