# Lab 2.2: Enterprise RAG with watsonx.ai - Complete Solution

This is the complete working solution for Lab 2.2.

In [None]:
# Complete implementation
import os
import time
from typing import List, Dict, Any
from dotenv import load_dotenv
from ibm_watsonx_ai import Credentials
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.foundation_models.embeddings import Embeddings
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings.base import Embeddings as LangChainEmbeddings

# Load credentials
load_dotenv()
credentials = Credentials(
    url=os.getenv("WATSONX_URL", "https://us-south.ml.cloud.ibm.com"),
    api_key=os.getenv("WATSONX_APIKEY")
)
PROJECT_ID = os.getenv("WATSONX_PROJECT_ID")

# Initialize embedding model
embedding_model = Embeddings(
    model_id="ibm/slate-30m-english-rtrvr",
    credentials=credentials,
    project_id=PROJECT_ID
)

# Initialize Granite LLM
granite_model = ModelInference(
    model_id="ibm/granite-13b-chat-v2",
    credentials=credentials,
    project_id=PROJECT_ID,
    params={
        GenParams.DECODING_METHOD: "greedy",
        GenParams.MAX_NEW_TOKENS: 300,
        GenParams.TEMPERATURE: 0.7
    }
)

print("✅ All components initialized!")

In [None]:
# Sample documents
documents = [
    Document(
        page_content="IBM watsonx.ai is an enterprise AI platform with Granite models.",
        metadata={"source": "watsonx.txt"}
    ),
    Document(
        page_content="RAG enhances LLMs by retrieving relevant context before generation.",
        metadata={"source": "rag.txt"}
    ),
    Document(
        page_content="Vector databases enable semantic search using embeddings.",
        metadata={"source": "vectors.txt"}
    )
]

# Chunk documents
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(documents)

print(f"✅ Created {len(chunks)} chunks")

In [None]:
# Embedding wrapper
class WatsonxEmbeddingsWrapper(LangChainEmbeddings):
    def __init__(self, watsonx_embeddings):
        self.watsonx_embeddings = watsonx_embeddings
    
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return [self.watsonx_embeddings.embed_query(text) for text in texts]
    
    def embed_query(self, text: str) -> List[float]:
        return self.watsonx_embeddings.embed_query(text)

# Create vector store
embeddings_wrapper = WatsonxEmbeddingsWrapper(embedding_model)
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings_wrapper,
    persist_directory="./watsonx_rag_db"
)

print("✅ Vector store created!")

In [None]:
# RAG function
def rag_answer(question: str, k: int = 3) -> Dict[str, Any]:
    # Retrieve
    relevant_chunks = vectorstore.similarity_search(question, k=k)
    context = "\n\n".join([chunk.page_content for chunk in relevant_chunks])
    
    # Generate
    prompt = f"""<|system|>
You are a helpful assistant. Use the context to answer questions.
<|endofsystem|>

<|user|>
Context: {context}

Question: {question}
<|endofuser|>

<|assistant|>
"""
    answer = granite_model.generate_text(prompt=prompt)
    
    return {
        "question": question,
        "answer": answer.strip(),
        "sources": [chunk.metadata for chunk in relevant_chunks]
    }

# Test
result = rag_answer("What is watsonx.ai?")
print(f"Q: {result['question']}")
print(f"A: {result['answer']}")
print(f"Sources: {result['sources']}")

## ✅ Complete!

You've built a production-ready RAG system with watsonx.ai!

**Next**: Lab 2.3 - Compare this with Ollama RAG