In [None]:
# notebooks/01_data_prep.ipynb

# 1. Install deps (only needed in Colab/Jupyter)
# !pip install sentence-transformers faiss-cpu langchain

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import os

# 2. Example lecture/rubric text (replace with real data later)
documents = [
    "Polymorphism allows objects to take many forms in object-oriented programming.",
    "Encapsulation is the concept of bundling data and methods together.",
    "Inheritance allows one class to acquire properties of another class."
]

# 3. Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = splitter.create_documents(documents)

# 4. Create embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 5. Build FAISS index
db = FAISS.from_documents(docs, embeddings)

# Save index to disk
if not os.path.exists("../data"):
    os.makedirs("../data")
db.save_local("../data/lecture_index")

print("✅ Embeddings built and FAISS index saved at ../data/lecture_index")
