In [None]:
!pip install sentence-transformers faiss-cpu --queit 

In [None]:
import json
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

input_path="cleaned_AI_summarized_tagged.json"
index_path="articles_faiss.index"
metadata_path="article_metadata.json"

In [None]:
with open(input_path, "r", encoding="utf-8") as f:
    articles = json.load(f)

In [None]:
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [None]:
texts = [a["title"] + ". " + a["summary"] for a in articles]
embeddings = embedder.encode(texts, show_progress_bar=True)

In [None]:
embedding_dim = embeddings[0].shape[0]
index = faiss.IndexFlatL2(embedding_dim)
index.add(np.array(embeddings).astype("float32"))

print("Added", index.ntotal, "vectors to FAISS index")

In [None]:
def search_articles(query, k=5):
    if not isinstance(query, list):
        query_vec = embedder.encode([query])
    else:
        query_vec = embedder.encode(query)

    D, I = index.search(np.array(query_vec).astype("float32"), k=5)

    for i in I[0]:
        print("📰", articles[i]["title"])
        print("✂️", articles[i]["summary"])
        print("🏷️", articles[i]["category"])
        print("-" * 60)

# Example
search_articles("AI applications in healthcare and medicine", k=5)

In [None]:
faiss.write_index(index, index_path)

with open(metadata_path, "w", encoding="utf-8") as f:
    json.dump(articles, f, indent=2)