In [2]:
from sentence_transformers import SentenceTransformer

Goal: See how text turns into numbers (vectors). That’s your input to FAISS/Chroma

In [3]:
# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

# The sentences to encode
sentences = [
    "The weather is lovely today.",
    "It's so sunny outside!",
    "He drove to the stadium.",
]

# 2. Calculate embeddings by calling model.encode()
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# 3. Calculate the embedding similarities
similarities = model.similarity(embeddings, embeddings)
print(similarities)
# tensor([[1.0000, 0.6660, 0.1046],
#         [0.6660, 1.0000, 0.1411],
#         [0.1046, 0.1411, 1.0000]])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

(3, 384)
tensor([[1.0000, 0.6660, 0.1046],
        [0.6660, 1.0000, 0.1411],
        [0.1046, 0.1411, 1.0000]])


In [4]:
pip install faiss-cpu sentence-transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

Try local vector search with FAISS

In [1]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

texts = ["capital of italy", "capital of france", "capital of USA"]
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(texts, convert_to_numpy=True)  # ensure numpy float32
print(embeddings)
print("Shape: ", embeddings.shape)
print("Max value", np.max(embeddings))
print("Min value", np.min(embeddings))

# Create FAISS index
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings.astype('float32'))
print(index)

l = len(texts)
# Search
query = model.encode(["Rome"], convert_to_numpy=True).astype('float32')
D, I = index.search(query, k=l)
print("Best match:", texts[I[0][0]])
print("Distance:", D[0][0])
print("Index:", I[0][0])

print(f"Top {l} matches:")
for i in range(l):
    print(f"{i+1}. {texts[I[0][i]]} (distance: {D[0][i]:.4f})")

#NICE :)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


[[-0.0050227   0.05689619 -0.01852141 ... -0.01030463  0.00571991
  -0.03225598]
 [ 0.06731305  0.0148501   0.00798476 ...  0.05302893  0.07453572
  -0.03802058]
 [ 0.11803867 -0.03280355 -0.0135524  ... -0.05184991  0.08668697
  -0.01345322]]
Shape:  (3, 384)
Max value 0.16379766
Min value -0.14781478
<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7d4b285dda70> >
Best match: capital of italy
Distance: 0.9608064
Index: 0
Top 3 matches:
1. capital of italy (distance: 0.9608)
2. capital of USA (distance: 1.3828)
3. capital of france (distance: 1.3973)


Add RAG (Retrieval + Generation)

In [4]:
pip install transformers accelerate



In [9]:
# Add RAG step using Hugging Face pipeline
from transformers import pipeline
#rag_pipeline = pipeline("text-generation", model="tiiuae/falcon-7b-instruct", device_map="auto")

context = texts[I[0][0]]
question = "What is the capital of Italy?"
prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"

# Avoid surrogate characters in Jupyter
response = rag_pipeline(prompt, max_new_tokens=100, pad_token_id=rag_pipeline.tokenizer.eos_token_id)[0]['generated_text']
safe_response = response.encode('utf-8', 'ignore').decode('utf-8')

print("\n\U0001F50E Generated Answer:\n", safe_response)



🔎 Generated Answer:
 Context: capital of italy

Question: What is the capital of Puglia?
Answer: The capital of Puglia is Bari.
