In [1]:
# To ensure a clean environment for dependency resolution
# Restart runtime after running this cell once to apply changes
import os
os.environ["COLAB_VM_RESTART_AFTER_EXECUTION"] = "1"

# Install dependencies
# Upgrade pip and related tools to mitigate build errors
!pip install --upgrade pip setuptools wheel

# Clean up existing conflicting packages
!pip uninstall -y numpy transformers # Remove problematic numpy and old transformers

# Install core deep learning framework (PyTorch) which often comes with compatible numpy
!pip install -q torch numpy==1.23.5 # Explicitly install numpy version compatible with older torch

# Set environment variable to force transformers to use PyTorch
os.environ["TRANSFORMERS_BACKEND"] = "pytorch"

# Install other dependencies, now that numpy and torch are in a good state
!pip install -q langchain==0.0.267 sentence-transformers faiss-cpu transformers

# (Optionally) Silence warnings
import logging
logging.getLogger("transformers").setLevel(logging.ERROR)

# Imports
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from transformers import pipeline

# -----------------------------
# 1) Prepare "document" data
# -----------------------------
docs = [
    "RAG stands for Retrieval Augmented Generation.",
    "Embeddings convert text into numerical vectors for semantic similarity.",
    "LangChain helps orchestrate components in a real RAG system.",
    "FAISS is a local vector store library for fast similarity search."
]

# Wrap docs in LangChain Document objects
lc_docs = [Document(page_content=d) for d in docs]

# -----------------------------
# 2) Create embeddings
# -----------------------------
print("Creating embeddings (this may take a moment)...")

embeddings_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# -----------------------------
# 3) Build FAISS vector store
# -----------------------------
vectorstore = FAISS.from_documents(lc_docs, embeddings_model)

print("Vectorstore built with docs:", len(docs))

# -----------------------------
# 4) Define retrieval function
# -----------------------------
def simple_retrieval(query, k=2):
    results = vectorstore.similarity_search(query, k=k)
    return [r.page_content for r in results]

# -----------------------------
# 5) Optional: LLM setup
# -----------------------------
print("Loading small LLM (GPT-2 — free)...")
llm = pipeline(
    "text-generation",
    model="gpt2",
    max_new_tokens=100
)

print("\n=== Mini RAG Ready ===\n")

# -----------------------------
# 6) Ask queries interactively
# -----------------------------
def ask_rag(query):
    retrieved = simple_retrieval(query)
    print("Retrieved Context:")
    for i, c in enumerate(retrieved):
        print(f"{i+1}.", c)

    prompt = f"""
Answer the question using the context below:

Context:
{retrieved}

Question:
{query}

Answer:
"""

    resp = llm(prompt)[0]["generated_text"]
    print("\nGenerated Answer:\n", resp)


Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: transformers 4.57.3
Uninstalling transformers-4.57.3:
  Successfully uninstalled transformers-4.57.3
  Installing build dependencies ... [?25l[?25hdone
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m No available output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Getting requirements to build wheel ... [?25l[?25herror
[31mERROR: Failed to build 'numpy' when getting requirements to build wheel[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-decision-forests 1.12.0 requires tensorflow==2.19



Creating embeddings (this may take a moment)...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Vectorstore built with docs: 4
Loading small LLM (GPT-2 — free)...


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu



=== Mini RAG Ready ===



In [2]:
ask_rag("What is RAG?")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Retrieved Context:
1. RAG stands for Retrieval Augmented Generation.
2. LangChain helps orchestrate components in a real RAG system.

Generated Answer:
 
Answer the question using the context below:

Context:
['RAG stands for Retrieval Augmented Generation.', 'LangChain helps orchestrate components in a real RAG system.']

Question:
What is RAG?

Answer:

Rag is a C# project that provides a way to build and maintain a system of interconnected components. It provides a way to create a system that is the result of the interaction between components in a real RAG system.

It can be used in a variety of ways.

In the following examples I will provide a single point-to-point solution to a simple question.

Question:

A single point-to-point solution to a simple question.

