# 02 - LangChain RAG Pipeline

Build a Retrieval-Augmented Generation (RAG) system using Weaviate and Ollama.

In [None]:
import os
from dotenv import load_dotenv
import weaviate
from langchain_community.llms import Ollama
from langchain.embeddings.base import Embeddings
from ollama import Client

load_dotenv()

## 1. Connect to Services

In [None]:
# Weaviate
weaviate_url = os.getenv("WEAVIATE_URL").replace("http://", "")
host, port = weaviate_url.split(":")
wv_client = weaviate.connect_to_custom(
    http_host=host,
    http_port=int(port),
    http_secure=False
)

# Ollama
llm = Ollama(model="llama3.2", base_url=os.getenv("OLLAMA_BASE_URL"))
ollama_client = Client(host=os.getenv("OLLAMA_BASE_URL"))

print(f"✅ Connected to Weaviate: {wv_client.is_ready()}")

## 2. Create Custom Ollama Embeddings

In [None]:
class OllamaEmbeddings(Embeddings):
    def __init__(self, client, model="llama3.2"):
        self.client = client
        self.model = model
    
    def embed_documents(self, texts):
        return [self.client.embeddings(model=self.model, prompt=text)["embedding"] for text in texts]
    
    def embed_query(self, text):
        return self.client.embeddings(model=self.model, prompt=text)["embedding"]

embeddings = OllamaEmbeddings(ollama_client)

## 3. Sample Documents

In [None]:
documents = [
    "GenAI Vanilla Stack is a modular AI development platform.",
    "Ollama provides local LLM inference without cloud dependencies.",
    "Weaviate is a vector database optimized for semantic search.",
    "JupyterHub enables interactive data science workflows.",
    "Neo4j stores data as graphs with nodes and relationships."
]

print(f"Sample documents: {len(documents)}")

## 4. Create Embeddings and Store

In [None]:
# Create collection if not exists
from weaviate.classes.config import Configure, Property, DataType

if not wv_client.collections.exists("Document"):
    wv_client.collections.create(
        name="Document",
        properties=[Property(name="content", data_type=DataType.TEXT)]
    )

collection = wv_client.collections.get("Document")

# Add documents
for doc in documents:
    embedding = embeddings.embed_query(doc)
    collection.data.insert(properties={"content": doc}, vector=embedding)

print("✅ Documents stored in Weaviate")

## 5. RAG Query

In [None]:
query = "What is a vector database?"
query_embedding = embeddings.embed_query(query)

# Search
results = collection.query.near_vector(near_vector=query_embedding, limit=2)

context = "\n".join([obj.properties["content"] for obj in results.objects])

# Generate answer
prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
answer = llm.invoke(prompt)

print(f"Query: {query}")
print(f"\nContext: {context}")
print(f"\nAnswer: {answer}")