In [None]:
# @title # **0) Install deps (2025-safe)**
!pip install -q --upgrade llama-index-core llama-index \
         llama-index-vector-stores-faiss faiss-cpu pypdf \
         sentence-transformers llama-index-embeddings-huggingface \
         llama-index-llms-huggingface

In [2]:
# @title # **1) Imports**

# Core LlamaIndex
from llama_index.core import (
    Settings,
    VectorStoreIndex,
    SimpleDirectoryReader,
    get_response_synthesizer,
)
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.storage import StorageContext
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler

# Hugging Face LLM + Embeddings
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# FAISS vector store
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss

In [3]:
# @title # **2) Paths & runtime knobs**
DATA_DIR = "/content/data"             # your PDFs
PERSIST_DIR = "/content/faiss_store"   # FAISS index storage
CHUNK_SIZE = 1024
CHUNK_OVERLAP = 100
TOP_K = 5

In [16]:
# @title # **3) Configure LLM + Embeddings + Settings**
from transformers import AutoTokenizer
llm = HuggingFaceLLM(
    model_name="facebook/opt-350m",
    tokenizer=AutoTokenizer.from_pretrained("facebook/opt-350m")
)

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")


dbg = LlamaDebugHandler(print_trace_on_end=True)


Settings.callback_manager = CallbackManager([dbg])
Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = SentenceSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)


tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]



In [17]:
# @title # **4) Create FAISS index + Build LlamaIndex**
# initialize FAISS index
embed_dim = len(embed_model.get_text_embedding("test"))
faiss_index = faiss.IndexFlatL2(embed_dim)
vector_store = FaissVectorStore(faiss_index=faiss_index)

storage_context = StorageContext.from_defaults(vector_store=vector_store)

# load PDFs
reader = SimpleDirectoryReader(input_dir=DATA_DIR, recursive=True)
documents = reader.load_data()
print(f"Loaded {len(documents)} document(s) from {DATA_DIR}")

# build vector index
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    show_progress=True,
)


Loaded 176 document(s) from /content/data


Parsing nodes:   0%|          | 0/176 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/176 [00:00<?, ?it/s]

**********
Trace: index_construction
    |_CBEventType.EMBEDDING -> 2.360265 seconds
    |_CBEventType.EMBEDDING -> 2.593183 seconds
    |_CBEventType.EMBEDDING -> 1.219504 seconds
    |_CBEventType.EMBEDDING -> 3.492537 seconds
    |_CBEventType.EMBEDDING -> 1.920979 seconds
    |_CBEventType.EMBEDDING -> 1.601589 seconds
    |_CBEventType.EMBEDDING -> 1.43399 seconds
    |_CBEventType.EMBEDDING -> 0.734126 seconds
    |_CBEventType.EMBEDDING -> 1.836512 seconds
    |_CBEventType.EMBEDDING -> 2.937422 seconds
    |_CBEventType.EMBEDDING -> 3.397503 seconds
    |_CBEventType.EMBEDDING -> 2.463823 seconds
    |_CBEventType.EMBEDDING -> 0.870543 seconds
    |_CBEventType.EMBEDDING -> 1.563879 seconds
    |_CBEventType.EMBEDDING -> 4.045609 seconds
    |_CBEventType.EMBEDDING -> 3.57953 seconds
    |_CBEventType.EMBEDDING -> 2.543255 seconds
    |_CBEventType.EMBEDDING -> 1.390232 seconds
**********


In [18]:
# @title # **5) Query Engine**
query_engine = index.as_query_engine(
    similarity_top_k=TOP_K,
    response_mode="compact",
)


In [19]:
# @title # **6) Try some questions**

questions = [
    "Give me a concise overview of the PDFs in 3 bullets.",
    "What are the main topics discussed?",
]

import asyncio

for q in questions:
    print(f"\nQ: {q}")
    resp = await query_engine.aquery(q)
    print("\nA:", str(resp))

    print("\nSources:")
    if resp.source_nodes:
        for i, node in enumerate(resp.source_nodes[:3], start=1):
            print(f"{i}. {node.metadata.get('file_name', 'unknown')} | score={node.score:.3f}")
    else:
        print("No sources found")


Q: Give me a concise overview of the PDFs in 3 bullets.
**********
Trace: query
    |_CBEventType.QUERY -> 53.815125 seconds
      |_CBEventType.RETRIEVE -> 0.030008 seconds
        |_CBEventType.EMBEDDING -> 0.027951 seconds
      |_CBEventType.SYNTHESIZE -> 53.784302 seconds
        |_CBEventType.TEMPLATING -> 2.5e-05 seconds
        |_CBEventType.LLM -> 53.780509 seconds
        |_CBEventType.LLM -> 53.780003 seconds
**********

A:                                                                                                                                                                                                                                                                 

Sources:
1. Data structures (Binary Search Tree).pdf | score=1.308
2. Data structures (Array 1D).pdf | score=1.319
3. Data structures (Array 1D).pdf | score=1.326

Q: What are the main topics discussed?
**********
Trace: query
    |_CBEventType.QUERY -> 69.571506 seconds
      |_CBEventType.RETRIEVE -

In [22]:
# @title # **7) Save FAISS index**

# save the raw FAISS index
faiss.write_index(faiss_index, f"{PERSIST_DIR}/faiss.index")

# save storage context metadata
storage_context.persist(persist_dir=PERSIST_DIR)

In [24]:
# @title # **8) Reload FAISS index without embedding again**

from llama_index.core import load_index_from_storage

# reload FAISS
faiss_index = faiss.read_index(f"{PERSIST_DIR}/faiss.index")
vector_store = FaissVectorStore(faiss_index=faiss_index)

storage_context = StorageContext.from_defaults(
    vector_store=vector_store,
    persist_dir=PERSIST_DIR,
)

index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine(similarity_top_k=TOP_K)


Loading llama_index.core.storage.kvstore.simple_kvstore from /content/faiss_store/docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from /content/faiss_store/index_store.json.
**********
Trace: index_construction
**********
