In [24]:
#pip install sentence-transformers faiss-cpu

In [3]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Step 1: Embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')


In [25]:
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

In [26]:
S1 = "This is a sample sentence."
model.encode(S1)


array([ 7.78064057e-02,  7.64624849e-02,  3.77088003e-02,  6.09390028e-02,
        4.88075577e-02,  7.11164670e-03,  2.06367783e-02,  2.86463443e-02,
        6.35214821e-02,  1.49655379e-02,  7.55234212e-02, -5.63334301e-02,
       -4.15603456e-04, -2.09397394e-02,  6.40889108e-02,  2.22409181e-02,
        4.88932133e-02, -5.74298166e-02, -2.99322661e-02,  4.03885916e-02,
        3.07553131e-02,  3.82084623e-02,  4.59011197e-02,  7.84999505e-03,
        4.37707594e-03,  3.39148380e-02, -1.33846076e-02,  5.27541786e-02,
        1.02368459e-01,  2.79691652e-04, -5.75644895e-02,  4.39352132e-02,
        8.33893791e-02,  1.62850209e-02,  7.24605173e-02,  7.32023083e-03,
       -2.33295336e-02,  5.43991886e-02, -5.93922427e-03,  2.92547196e-02,
        4.84408885e-02, -3.95350419e-02,  3.56571004e-02,  7.58862169e-03,
       -1.70144904e-02, -3.67177278e-02, -3.15289497e-02,  9.60673112e-03,
       -1.58553664e-02,  4.69975471e-02, -7.87201971e-02, -3.92243117e-02,
       -1.03012383e-01, -

In [30]:

docs = [
    "AI automation tools help productivity.",
    "Pizza recipes for beginners.",
    "Optimize workflows using AI systems."
]

doc_embeddings = model.encode(docs)

In [31]:

dim = doc_embeddings.shape[1]
dim  # Dimension of embeddings

384

In [32]:

# Step 2: Build FAISS index
index = faiss.IndexFlatL2(dim)
index.add(doc_embeddings)


In [8]:
# Step 3: Query
query = "How to increase work productivity using AI?"
query_emb = model.encode([query])

In [33]:
# Step 4: Search top 2 similar documents
distances, ids = index.search(query_emb, k=2)

In [34]:
print("Results:")
for i in ids[0]:
    print(docs[i])


Results:
AI automation tools help productivity.
Optimize workflows using AI systems.


In [35]:
faiss.write_index(index, "./faiss_index/my_faiss.index")

In [36]:
import pickle
with open("./faiss_index/docs_metadata.pkl", "wb") as f:
    pickle.dump(docs, f)

print("Index + metadata saved!")

Index + metadata saved!


## Re_index

#### New Documents

In [37]:
docs_new = [
    "AI can automate repetitive tasks.",
    "Machine learning models are improving healthcare outcomes.",
    "The stock market crashed yesterday due to global tension.",
    "Cats are very playful animals.",
    "Python is widely used in data science.",
    "Electric vehicles are becoming more popular.",
    "The weather today is sunny with mild winds.",
    "Football is the most popular sport in the world.",
    "Quantum computing is the future of cryptography.",
    "Deep learning helps in image recognition.",
    "The restaurant serves Italian and Mexican food.",
    "Traveling to Japan is on my wishlist.",
    "Reinforcement learning trains agents via rewards.",
    "Natural Language Processing enables chatbots.",
    "The new iPhone was launched with major upgrades.",
    "Meditation improves mental health.",
    "Blockchain technology ensures secure transactions.",
    "Yoga enhances flexibility and strength.",
    "Cloud computing allows scalable applications.",
    "Climate change impacts global ecosystems."
]

### Existing Index

In [38]:
# Reload index
loaded_index = faiss.read_index("./faiss_index/my_faiss.index")

# Reload metadata
with open("./faiss_index/docs_metadata.pkl", "rb") as f:
    metadata = pickle.load(f)

print("Loaded index size:", loaded_index.ntotal)
print("Loaded metadata items:", len(metadata))

Loaded index size: 3
Loaded metadata items: 3


In [39]:
print("New docs:", len(docs_new))

# Encode new docs
emb_new = model.encode(docs_new)

New docs: 20


In [40]:
# Add to existing index
loaded_index.add(emb_new)

In [41]:
# Update metadata
metadata.extend(docs_new)

print("Updated index size:", loaded_index.ntotal)
print("Updated metadata:", len(metadata))

Updated index size: 23
Updated metadata: 23


In [44]:
query = "How can AI help in healthcare?"
query_emb = model.encode([query])

k = 3
distances, ids = loaded_index.search(query_emb, k)

print("\nTop Results:")
for rank, idx in enumerate(ids[0]):
    print(f"{rank+1}. {metadata[idx]}  (distance={distances[0][rank]:.4f})")


Top Results:
1. AI can automate repetitive tasks.  (distance=0.8343)
2. Machine learning models are improving healthcare outcomes.  (distance=0.8885)
3. AI automation tools help productivity.  (distance=0.9372)
