In [1]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Initialize the embedding model (use a lightweight model for speed)
model = SentenceTransformer("all-MiniLM-L6-v2")



In [2]:

# Sample Diverse Documents
documents = [
    "I love programming in Python.",  
    "Machine learning is fascinating.",  
    "I enjoy hiking in the mountains.",  
    "Artificial intelligence is changing the world.",  
    "I like to eat oranges, apples, and grapes.",  
    "What is the capital of France?",  
    "The Eiffel Tower is located in Paris.",  
    "Do you enjoy listening to music?",  
    "The sun rises in the east and sets in the west.",  
    "I prefer tea over coffee.",  
    "How does a neural network work?",  
    "The Great Wall of China is visible from space.",  
    "I find astronomy incredibly interesting.",  
    "What are the benefits of exercising regularly?",  
    "Reading books helps improve vocabulary and knowledge.",  
    "I love watching sci-fi movies.",  
    "The Pacific Ocean is the largest ocean on Earth.",  
    "Why do people enjoy traveling?",  
    "I think dogs are better pets than cats.",  
    "The Mona Lisa was painted by Leonardo da Vinci.",
    "Climate change is one of the biggest challenges facing humanity.",
    "The speed of light is approximately 299,792 kilometers per second.",
    "I enjoy playing chess in my free time.",
    "The human brain contains around 86 billion neurons.",
    "Mount Everest is the tallest mountain in the world.",
    "I prefer rock music over classical music.",
    "The Statue of Liberty was a gift from France to the United States.",
    "The Pythagorean theorem is fundamental in geometry.",
    "Venus is the hottest planet in our solar system.",
    "I love coding in JavaScript for web development.",
    "How do black holes form in space?",
    "What are the main causes of inflation in an economy?",
    "The Amazon rainforest is home to diverse wildlife.",
    "The theory of relativity was developed by Albert Einstein.",
    "The Sahara Desert is the largest hot desert in the world.",
    "SpaceX is developing reusable rocket technology.",
    "How does photosynthesis work in plants?",
    "The Great Barrier Reef is the world's largest coral reef system.",
    "I enjoy solving Sudoku puzzles to keep my mind sharp.",
    "What is the difference between machine learning and deep learning?",
    "Leonardo da Vinci was not only a painter but also an engineer and scientist.",
    "The Wright brothers invented the first successful airplane.",
    "I find quantum mechanics incredibly complex yet fascinating.",
    "What are the benefits of learning a second language?",
    "The internet has revolutionized the way we communicate and work.",
    "Sharks have been around for more than 400 million years.",
    "The human heart beats about 100,000 times a day.",
    "Why do some people experience lucid dreaming?",
    "Pluto was reclassified as a dwarf planet in 2006.",
    "Cryptography plays a vital role in securing online transactions.",
]

# Convert documents into embeddings
document_embeddings = model.encode(documents)  # Shape (20, 384)

# Create a FAISS index
dimension = document_embeddings.shape[1]  # Embedding size (384)
index = faiss.IndexFlatL2(dimension)  # L2 Distance for similarity search
index.add(np.array(document_embeddings))  # Store embeddings in FAISS

print(f"Added {len(documents)} documents to the FAISS index!")

Added 50 documents to the FAISS index!


In [6]:

# Query the Collection
query = "which animal is preferred?"
query_embedding = model.encode([query])  # Convert query to embedding

# Perform similarity search (find top 5 closest vectors)
n_results = 5
distances, indices = index.search(np.array(query_embedding), n_results)

# Display Results
print("\nQuery:", query)
print("Most similar documents:")
for i, idx in enumerate(indices[0]):
    print(f"{i+1}. {documents[idx]} (Distance: {distances[0][i]:.4f})")



Query: which animal is preferred?
Most similar documents:
1. I think dogs are better pets than cats. (Distance: 1.1566)
2. The Amazon rainforest is home to diverse wildlife. (Distance: 1.2531)
3. I prefer rock music over classical music. (Distance: 1.5937)
4. I like to eat oranges, apples, and grapes. (Distance: 1.6012)
5. What are the benefits of learning a second language? (Distance: 1.6038)
