# Your first RAG application

In [17]:
import pandas as pd
import numpy as np
from tqdm import tqdm 
import ollama 
import rich

In [None]:
# Obtain your embedding & LLM models (execute here or go to terminal)
!ollama pull mxbai-embed-large
!ollama pull mistral  

In [None]:
# Check model availability 
!ollama list

In [36]:
res = ollama.chat(model="mistral", 
            messages=[ {"role": "user", "content": "What team did Jurgen Klopp coach?"}]
           )
res

ChatResponse(model='mistral', created_at='2025-05-03T06:36:13.155425Z', done=True, done_reason='stop', total_duration=3834306875, load_duration=15186333, prompt_eval_count=14, prompt_eval_duration=335340416, eval_count=73, eval_duration=3483269542, message=Message(role='assistant', content=' Jurgen Klopp currently coaches Liverpool Football Club in the English Premier League. Prior to his tenure at Liverpool, he coached Borussia Dortmund, where he led the team to two Bundesliga titles and a UEFA Champions League title. He has also managed Mainz 05 and Borussia Mönchengladbach in the German Bundesliga.', images=None, tool_calls=None))

In [37]:
rich.print(res)

In [38]:
res = ollama.chat(model="mistral", 
            messages=[ {"role": "user", "content": "Tell me a joke about Data Science"}]
           )
rich.print(res)

# Set up your first RAG pipeline

In [8]:
import os
import ollama
import chromadb
from chromadb.utils.embedding_functions import DefaultEmbeddingFunction

In [9]:
# === Step 1: Setup ChromaDB ===
chroma_client = chromadb.Client()
collection    = chroma_client.get_or_create_collection(name="rag-docs")

In [10]:
# === Step 2: Load and Embed Documents ===
def embed_text(text):
    response = ollama.embed(model="mxbai-embed-large", input=text)
    return response["embeddings"][0]

In [11]:
# Sample docs (could also read from files)
documents = [
    "Jurgen Klopp was born in Germany in 1974. He has been a successful coach in the UK",
    "You can contact Sky customer support through the help portal or live chat.",
    "An apple a day keeps the doctor away"
]

In [12]:
for i, doc in tqdm(enumerate(documents)):
    embedding = embed_text(doc)
    collection.add(
        documents=[doc],
        embeddings=[embedding],
        ids=[f"doc-{i}"]
    )

3it [00:00,  3.11it/s]


In [13]:
# === Step 3: Accept User Query and Retrieve Relevant Docs ===
query = "Who was Jurgen Klopp?"

query_embedding = embed_text(query)
results = collection.query(query_embeddings=[query_embedding], n_results=3)

results 

{'ids': [['doc-0', 'doc-1', 'doc-2']],
 'embeddings': None,
 'documents': [['Jurgen Klopp was born in Germany in 1974. He has been a successful coach in the UK',
   'You can contact Sky customer support through the help portal or live chat.',
   'An apple a day keeps the doctor away']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None, None, None]],
 'distances': [[0.3875125050544739, 1.4115504026412964, 1.4422112703323364]]}

In [14]:
retrieved_docs = results["documents"][0]
context = "\n".join(retrieved_docs)

In [32]:
# === Step 4: Run RAG Prompt through Ollama LLM ===
prompt = f"""You're a personal assistant. Your task is to answer questions using only the provided context. 
If you can not explicitly extract the answer from the context, your answer must be I cannot help with that. 

Context:
{context}

Question: {query}

Answer:"""

rich.print(prompt)

In [34]:
query = 'What team did Jurgen Klopp coach?'

In [35]:
response = ollama.chat(model="mistral", messages=[
    {"role": "user", "content": prompt}
])

rich.print("RAG Answer:\n", response["message"]["content"])