# Lab 4: RAG Pipeline Implementation - SOLUTIONS

**Module 4 - Retrieval-Augmented Generation**

In [None]:
import os
import numpy as np
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

os.environ["OPENAI_API_KEY"] = "your-api-key-here"

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
llm = ChatOpenAI(model="gpt-4", temperature=0)

documents = [
    """Machine Learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed. There are three main types: supervised learning, unsupervised learning, and reinforcement learning.""",
    """Deep Learning is part of machine learning based on artificial neural networks. Architectures include CNNs, RNNs, and Transformers, applied to computer vision, NLP, and more."""
]

## Exercise 1: Document Chunking - SOLUTION

In [None]:
def create_chunks(texts: list, chunk_size: int, chunk_overlap: int) -> list:
    """Create chunks from a list of texts."""
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", ". ", " ", ""]
    )
    
    all_chunks = []
    for i, text in enumerate(texts):
        chunks = splitter.split_text(text)
        for j, chunk in enumerate(chunks):
            all_chunks.append(Document(
                page_content=chunk,
                metadata={"source": f"doc_{i}", "chunk": j}
            ))
    return all_chunks

# Test
chunks = create_chunks(documents, chunk_size=100, chunk_overlap=20)
print(f"Created {len(chunks)} chunks")
for c in chunks[:3]:
    print(f"  - {c.page_content[:50]}...")

## Exercise 2: Vector Store - SOLUTION

In [None]:
def create_vector_store(documents: list) -> FAISS:
    chunks = create_chunks(documents, chunk_size=200, chunk_overlap=40)
    vector_store = FAISS.from_documents(documents=chunks, embedding=embeddings)
    return vector_store

def compare_similarities(texts: list):
    text_embeddings = [embeddings.embed_query(t) for t in texts]
    
    print("\nSimilarity Matrix:")
    for i, emb1 in enumerate(text_embeddings):
        sims = []
        for emb2 in text_embeddings:
            sim = np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2))
            sims.append(f"{sim:.3f}")
        print(f"Text {i}: {' | '.join(sims)}")

# Test
vector_store = create_vector_store(documents)
print(f"Vector store created with {vector_store.index.ntotal} vectors")

## Exercise 3: Semantic Search - SOLUTION

In [None]:
def semantic_search(vector_store, query: str, k: int = 3) -> list:
    results = vector_store.similarity_search_with_score(query, k=k)
    formatted = []
    for doc, score in results:
        formatted.append({
            "content": doc.page_content,
            "metadata": doc.metadata,
            "similarity_score": 1 - score
        })
    return formatted

# Test
results = semantic_search(vector_store, "What is machine learning?")
for r in results:
    print(f"Score: {r['similarity_score']:.3f} - {r['content'][:50]}...")

## Exercise 4: Complete RAG Pipeline - SOLUTION

In [None]:
class RAGPipeline:
    def __init__(self, vector_store, llm, k: int = 3):
        self.vector_store = vector_store
        self.llm = llm
        self.k = k
        self.prompt = ChatPromptTemplate.from_template("""
Use the following context to answer the question.
If the context doesn't contain relevant information, say so.

Context:
{context}

Question: {question}

Answer:""")
    
    def retrieve(self, query: str) -> str:
        docs = self.vector_store.similarity_search(query, k=self.k)
        return "\n\n".join([f"[{i+1}] {d.page_content}" for i, d in enumerate(docs)])
    
    def generate(self, question: str, context: str) -> str:
        chain = self.prompt | self.llm | StrOutputParser()
        return chain.invoke({"context": context, "question": question})
    
    def query(self, question: str) -> dict:
        context = self.retrieve(question)
        answer = self.generate(question, context)
        return {"question": question, "context": context, "answer": answer}

# Test
rag = RAGPipeline(vector_store, llm)
result = rag.query("What are the types of machine learning?")
print(f"Answer: {result['answer']}")

## Checkpoint

Lab 4 complete! **Next:** Lab 5 - LoRA Fine-tuning