In [28]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings, StorageContext, Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from dotenv import load_dotenv
import os
import pypdf
import chromadb

In [10]:
load_dotenv()

True

In [None]:
def load_documents_with_metadata_included(data_path):
    all_docs = []
    for filename in os.listdir(data_path):
        if not filename.endswith('.pdf'):
            continue
        file_path = os.path.join(data_path, filename)
        reader = pypdf.PdfReader(file_path)

        if "lecture" in filename.lower():
            doc_type = "lecture"

        if 'lecture' not in filename.lower():
            doc_type = "textbook"

        for page_num, text in enumerate(reader.pages):
            doc = Document(text = text, 
                           metadata = {
                               "file_name" : filename, 
                               "text" : text, 
                               "page_num" : page_num, 
                               "doc_type" : doc_type,
                               "course" : "Machine Learning"
                           })     
            

In [11]:
required_exts = ['.pdf', '.tex']
reader = SimpleDirectoryReader(input_dir = "../data", required_exts = required_exts)

In [12]:
docs = reader.load_data()



In [13]:
print(f"Loaded {len(docs)} documents")

Loaded 1789 documents


In [14]:
# See what one chunk looks like
print(f"Chunk length: {len(docs[0].text)} characters")
print(f"Preview: {docs[0].text[:500]}")

Chunk length: 147 characters
Preview: Gareth James • Daniela Witten •
Trevor Hastie • Robert Tibshirani
An Introduction to Statistical
Learning
with Applications in R
Second Edition
123


We will change the default embedding model from the OpenAI one to the sentence transformer model from HuggingFace as it is open-source and free.

In [15]:
Settings.embed_model = HuggingFaceEmbedding(model_name = 'sentence-transformers/all-MiniLM-L6-V2')

2026-02-12 21:24:41,227 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-V2
2026-02-12 21:24:41,379 - INFO - HTTP Request: HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-V2/resolve/main/modules.json "HTTP/1.1 307 Temporary Redirect"
2026-02-12 21:24:41,481 - INFO - HTTP Request: HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/modules.json "HTTP/1.1 307 Temporary Redirect"
2026-02-12 21:24:41,501 - INFO - HTTP Request: HEAD https://huggingface.co/api/resolve-cache/models/sentence-transformers/all-MiniLM-L6-v2/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json "HTTP/1.1 200 OK"
2026-02-12 21:24:41,607 - INFO - HTTP Request: HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-V2/resolve/main/config_sentence_transformers.json "HTTP/1.1 307 Temporary Redirect"
2026-02-12 21:24:41,717 - INFO - HTTP Request: HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/config_sen

In [20]:
chroma_client = chromadb.PersistentClient(path = './chroma')
chroma_collection = chroma_client.create_collection('ml_textbooks') 

In [21]:
vector_store = ChromaVectorStore(chroma_collection= chroma_collection)
storage_context = StorageContext.from_defaults(vector_store = vector_store)

In [22]:
index = VectorStoreIndex.from_documents(docs, storage_context = storage_context)

In [25]:
query_engine = index.as_query_engine(response_mode = 'tree_summarize', verbose = True,  similarity_top_k = 5)

response = query_engine.query("What is gradient descent? Return answers only based off of the books given to you.")

1 text chunks after repacking


2026-02-12 21:27:18,345 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [26]:
print(f"Answer : {response.response}")
print("\nSources: ")
for i, node in enumerate(response.source_nodes, 1):
    print(f"\n{i}. Score: {node.score:.3f}")
    print(f"   File: {node.metadata.get('file_name', 'Unknown')}")
    print(f"   Text: {node.text[:200]}...")

Answer : Gradient descent is a first-order optimization algorithm used to find a local minimum of a function by taking steps proportional to the negative of the gradient of the function at the current point. The gradient points in the direction of steepest descent, moving towards the minimum value of the function. By iteratively updating the parameters in the direction opposite to the gradient, gradient descent aims to converge towards a local minimum of the function being optimized.

Sources: 

1. Score: 0.485
   File: mml-book.pdf
   Text: 228 Continuous Optimization
where f : Rd → R is an objective function that captures the machine
learning problem at hand. We assume that our functionf is differentiable,
and we are unable to analytica...

2. Score: 0.414
   File: mml-book.pdf
   Text: 230 Continuous Optimization
Although the “undo” step seems to be a waste of resources, using this
heuristic guarantees monotonic convergence.
Example 7.2 (Solving a Linear Equation System)
When we sol