In [1]:
!pip install sentence-transformers faiss-cpu openai



Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m77.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.2


In [2]:
#sample_dataset
documents = [
    "Academic stress negatively impacts student mental health and performance.",
    "Machine learning models can detect depression risk from behavioral data.",
    "Semantic search uses embeddings to retrieve meaningfully similar documents.",
    "RAG systems combine retrieval and generation to reduce hallucinations."
]


In [3]:
#generate embeddings
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(documents)

print(embeddings.shape)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

(4, 384)


In [4]:
#Vector Database Layer
# Temporary Backend: FAISS
#Future Backend: Endee
import faiss

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))


In [5]:
#semantic search
def semantic_search(query, top_k=2):
    q_emb = model.encode([query])
    distances, indices = index.search(np.array(q_emb), top_k)
    return [documents[i] for i in indices[0]]


In [6]:
semantic_search("How does stress affect students?")


['Academic stress negatively impacts student mental health and performance.',
 'Machine learning models can detect depression risk from behavioral data.']

In [7]:
#RAG
from openai import OpenAI

client = OpenAI(api_key="KEY")

def rag_answer(query):
    context = " ".join(semantic_search(query))
    prompt = f"""
    Use the context below to answer the question.

    Context:
    {context}

    Question:
    {query}
    """
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content


In [8]:
#Abstract the Vector DB
class VectorDB:
    def add(self, vectors): ...
    def search(self, query_vector, k): ...


In [9]:
#faiss_implementation
class FaissVectorDB(VectorDB):
    def __init__(self, dimension):
        self.index = faiss.IndexFlatL2(dimension)

    def add(self, vectors):
        self.index.add(np.array(vectors))

    def search(self, query_vector, k):
        D, I = self.index.search(np.array([query_vector]), k)
        return I[0]


In [10]:
#endee_implemenation
class EndeeVectorDB(VectorDB):
    def __init__(self, api_key):
        pass  # Endee client here

    def add(self, vectors):
        pass

    def search(self, query_vector, k):
        pass


In [11]:
#Instantiate FAISS Vector DB
# Use FAISS backend for now
vector_db = FaissVectorDB(dimension=embeddings.shape[1])

# Add document embeddings
vector_db.add(embeddings)


In [12]:
#Perform Semantic Search
def semantic_search_pipeline(query, top_k=2):
    query_embedding = model.encode(query)
    indices = vector_db.search(query_embedding, top_k)
    return [documents[i] for i in indices]


In [13]:
results = semantic_search_pipeline(
    "How does academic stress affect students?",
    top_k=2
)

for i, doc in enumerate(results, 1):
    print(f"{i}. {doc}")


1. Academic stress negatively impacts student mental health and performance.
2. Machine learning models can detect depression risk from behavioral data.


In [18]:
#RAG Output
def simple_rag(query):
    retrieved_docs = semantic_search_pipeline(query)
    context = " ".join(retrieved_docs)

    print(" Retrieved Context:")
    print(context)
    print("\nGenerated Answer:")

    # Simulated LLM response (since API optional)
    return f"Based on the retrieved documents, {context}"




In [19]:

    print(simple_rag("Explain student stress impacts"))

 Retrieved Context:
Academic stress negatively impacts student mental health and performance. Machine learning models can detect depression risk from behavioral data.

Generated Answer:
Based on the retrieved documents, Academic stress negatively impacts student mental health and performance. Machine learning models can detect depression risk from behavioral data.
