# Part 3: "The Librarian" (Advanced RAG System)

**Objective:** Build a robust, non-parametric memory system to retrieve exact information from the 2024 Annual Report.

**Key Architecture:**
- **Vector DB:** Weaviate (Embedded)
- **Retrieval:** Hybrid Search (Dense Vectors + BM25 Keyword Search)
- **Refinement:** Reciprocal Rank Fusion (RRF) + Cross-Encoder Reranking
- **Generator:** Llama-3-8B (or compatible LLM)

## 1. Setup & Dependencies

In [10]:
# Install Weaviate and RAG dependencies
# !pip install -q -U weaviate-client langchain-weaviate langchain-community sentence-transformers rank_bm25 python-dotenv

In [11]:
import os
import sys
import json
import weaviate
from weaviate.embedded import EmbeddedOptions
from sentence_transformers import CrossEncoder
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain_core.documents import Document

print("Libraries loaded.")

Libraries loaded.


## 2. Path Configuration (Hybrid Local/Colab)

In [12]:
# 1. Define the User's Local Path
USER_LOCAL_ROOT = r"C:/Development/financial-intelligence-engine"
USER_LOCAL_DATA = os.path.join(USER_LOCAL_ROOT, "data/interim")

# 2. Check Environment
if os.path.exists(USER_LOCAL_ROOT):
    # Running Locally (Windows/VS Code Local Kernel)
    print("Local Windows Environment Detected.")
    DATA_PATH = USER_LOCAL_DATA
    CHUNKS_PATH = os.path.join(DATA_PATH, "chunks.json")
    
elif 'google.colab' in sys.modules:
    # Running in Colab
    print("Google Colab Environment Detected.")
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Drive Path
    DRIVE_ROOT = "/content/drive/MyDrive/Financial_Intern_Project"
    DATA_PATH = f"{DRIVE_ROOT}/data/interim"
    CHUNKS_PATH = f"{DATA_PATH}/chunks.json"
    
    os.makedirs(DATA_PATH, exist_ok=True)
    print(f"Google Drive Mounted.\nExpected Data Path: {CHUNKS_PATH}")
    
else:
    # Generic Fallback
    print("Generic Environment.")
    DATA_PATH = "../data/interim"
    CHUNKS_PATH = "../data/interim/chunks.json"

# Validation
if not os.path.exists(CHUNKS_PATH):
    print(f"ERROR: 'chunks.json' not found at {CHUNKS_PATH}")
    print("Colab Users: Upload 'chunks.json' to MyDrive/Financial_Intern_Project/data/interim/")
    print("Local Users: Check your data generation step.")
else:
    print(f"Found Data: {CHUNKS_PATH}")

Local Windows Environment Detected.
Found Data: C:/Development/financial-intelligence-engine\data/interim\chunks.json


## 3. Vector Database Setup (Weaviate)

In [14]:
# Weaviate v4 Connection Logic
import weaviate
import os
from urllib.parse import urlparse
from weaviate.classes.init import Auth
import weaviate.classes.config as wn

print("Attempting to connect to Weaviate (v4)...")

headers = {
    "X-HuggingFace-Api-Key": os.getenv("HF_TOKEN", "")
}

client = None

# 1. Try Embedded
try:
    print("Trying Embedded...")
    client = weaviate.connect_to_embedded(
        headers=headers,
    )
    print("Weaviate Embedded Started!")
except Exception as e:
    print(f"Embedded failed (or not supported in this env): {e}")
    client = None

# 2. Fallback to Local/External if Embedded failed
if not client:
    print("Attempting to connect to external instance...")
    wcd_url = os.getenv("WEAVIATE_URL", "http://localhost:8080")
    wcd_api_key = os.getenv("WEAVIATE_API_KEY", "")

    try:
        parsed = urlparse(wcd_url)
        host = parsed.hostname or "localhost"
        port = parsed.port or (443 if parsed.scheme == 'https' else 80)
        secure = parsed.scheme == 'https'
        
        auth_config = Auth.api_key(wcd_api_key) if wcd_api_key else None
        
        if "localhost" in host or "127.0.0.1" in host:
             client = weaviate.connect_to_local(port=port, headers=headers)
        else:
             client = weaviate.connect_to_custom(
                http_host=host,
                http_port=port,
                http_secure=secure,
                headers=headers,
                auth_credentials=auth_config
             )
        print(f"Connected to Weaviate at {wcd_url}")
    except Exception as e_ext:
        print(f"CRITICAL: Could not connect to Weaviate. Error: {e_ext}")
        raise e_ext

if client and client.is_ready():
    print("Client Ready.")
    # Define Schema
    class_name = "FinancialReport"
    if client.collections.exists(class_name):
        client.collections.delete(class_name)
    
    client.collections.create(
        name=class_name,
        vectorizer_config=wn.Configure.Vectorizer.none(),
        properties=[
            wn.Property(name="text", data_type=wn.DataType.TEXT),
            wn.Property(name="source", data_type=wn.DataType.TEXT),
            wn.Property(name="chunk_id", data_type=wn.DataType.INT),
        ]
    )
    print(f"Schema '{class_name}' created/reset.")


Attempting to connect to Weaviate (v4)...
Trying Embedded...
Embedded failed (or not supported in this env): Windows is not supported with EmbeddedDB. Please upvote this feature request if you want
                 this: https://github.com/weaviate/weaviate/issues/3315
Attempting to connect to external instance...
Connected to Weaviate at http://localhost:8080
Client Ready.


            Use the `vector_config` argument instead.
            


Schema 'FinancialReport' created/reset.


## 4. Data Ingestion

In [15]:
# Load Embeddings Model (Local)
embedding_model_name = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
print(f"Loaded Embeddings: {embedding_model_name}")

# Load Data
with open(CHUNKS_PATH, 'r') as f:
    raw_chunks = json.load(f)

collection = client.collections.get(class_name)
print(f"Loading {len(raw_chunks)} chunks to Weaviate (Collection: {class_name})...")

with collection.batch.fixed_size(batch_size=100) as batch:
    for i, chunk in enumerate(raw_chunks):
        text = chunk.get("chunk_content", "")
        source = chunk.get("source", "Unknown")
        if not text: continue
        
        vector = embeddings.embed_query(text)
        
        batch.add_object(
            properties={
                "text": text,
                "source": source,
                "chunk_id": i
            },
            vector=vector
        )
        if i % 100 == 0:
            print(f"Imported {i} chunks...")

print(f"Ingestion Complete. Total Objects: {collection.aggregate.over_all(total_count=True).total_count}")


  embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)


Loaded Embeddings: all-MiniLM-L6-v2
Loading 654 chunks to Weaviate (Collection: FinancialReport)...
Ingestion Complete. Total Objects: 0


## 5. Retrieval Strategies (Hybrid + RRF)

In [16]:
import weaviate.classes.query as wq

def hybrid_search(query, limit=20):
    """
    Performs Hybrid Search: Dense Vector + BM25 (Keyword)
    """
    collection = client.collections.get(class_name)
    query_vector = embeddings.embed_query(query)
    
    response = collection.query.hybrid(
        query=query,
        vector=query_vector,
        alpha=0.5,
        limit=limit,
        fusion_type=wq.HybridFusion.RELATIVE_SCORE,
        return_metadata=wq.MetadataQuery(score=True)
    )
    
    results = []
    for o in response.objects:
        res = o.properties
        # Add score for compatibility with reranker if needed
        if o.metadata and o.metadata.score is not None:
            res['_additional'] = {'score': o.metadata.score}
        results.append(res)
        
    return results


## 6. Refinement: Cross-Encoder Reranking

In [17]:
# Load Cross-Encoder (Reranker)
rerank_model_name = "cross-encoder/ms-marco-MiniLM-L-6-v2"
reranker = CrossEncoder(rerank_model_name)
print(f"Loaded Reranker: {rerank_model_name}")

def rerank_results(query, retrieved_docs, top_k=5):
    """
    Reranks the hybrid search results using a Cross-Encoder
    """
    if not retrieved_docs:
        return []
        
    # Prepare pairs for Cross-Encoder
    chunk_texts = [doc['text'] for doc in retrieved_docs]
    pairs = [[query, text] for text in chunk_texts]
    
    # Score pairs
    scores = reranker.predict(pairs)
    
    # Attach scores and sort
    ranked_results = []
    for doc, score in zip(retrieved_docs, scores):
        doc['rerank_score'] = score
        ranked_results.append(doc)
        
    # Sort by score descending
    ranked_results = sorted(ranked_results, key=lambda x: x['rerank_score'], reverse=True)
    
    return ranked_results[:top_k]

Loaded Reranker: cross-encoder/ms-marco-MiniLM-L-6-v2


## 7. The Librarian (Inference)

In [None]:
# Setup LLM (Can use the Fine-tuned one if loaded, or a lightweight one)

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

# Load Model (Optional: Reuse Part 2 logic/paths if unified)
model_id = "unsloth/Meta-Llama-3.1-8B-Instruct"
try:
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        device_map="auto",
        load_in_4bit=True
    )
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
    print("LLM Loaded for Librarian")
except Exception as e:
    print(f"LLM Load Warning: {e}. Ensure you have GPU/Space. You can also mock this for retrieval testing.")

def query_librarian(question):
    # 1. Retrieval
    raw_results = hybrid_search(question, limit=20)
    
    # 2. Refinement
    refined_results = rerank_results(question, raw_results, top_k=5)
    
    # 3. Context Construction
    context_text = "\n---\n".join([doc['text'] for doc in refined_results])
    
    # 4. Generation
    prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are The Librarian, a precise financial assistant. Answer the question specifically using ONLY the provided context below. If the answer is not in the context, say 'Information not found'.

Context:
{context_text}
<|eot_id|><|start_header_id|>user<|end_header_id|>

{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
    
    try:
        outputs = pipe(
            prompt,
            max_new_tokens=256,
            do_sample=True,
            temperature=0.1,
            top_p=0.9
        )
        answer = outputs[0]["generated_text"].split("assistant")[-1].strip()
    except NameError:
        answer = "[LLM Not Loaded - Retrieval Only Mode]"
        
    return {
        "answer": answer,
        "context": refined_results
    }

## 8. Evaluation & Test

In [None]:
question = "What is the total revenue for 2024?"
result = query_librarian(question)

print(f"Question: {question}")
print(f"Answer: {result['answer']}")
print("\nSource Contexts:")
for i, doc in enumerate(result['context']):
    print(f"[{i+1}] (Score: {doc['rerank_score']:.4f}) {doc['text'][:150]}...")