In [1]:
import os
import json

chunks = []

folder = r"/Users/mohamad/Documents/GitHub/Personalized-RAG-Chatbot/new_chunks.json"

with open(folder, "r", encoding="utf-8") as f:
    data = json.load(f)
    if isinstance(data, list):
        chunks.extend(data)
    elif isinstance(data, dict):
        chunks.append(data)
    else:
        print(f"Skipping {folder} as it is not a list or dictionary.")

print(f"✅ Loaded {len(chunks)} chunks from {folder}")

texts = [c["content"] for c in chunks]
metadata = [
    {
        "id": c["id"],
        "title": c["title"],
        "source": c["source"],
        "text": c["content"]
    }
    for c in chunks
]

✅ Loaded 14 chunks from /Users/mohamad/Documents/GitHub/Personalized-RAG-Chatbot/new_chunks.json


In [None]:
import openai
import numpy as np
from tqdm import tqdm
import os
from dotenv import load_dotenv

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

texts = [c["content"] for c in chunks]
embeddings = []

for text in tqdm(texts, desc="Embedding texts"):
    response = openai.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )
    emb = np.array(response.data[0].embedding, dtype="float32")
    emb /= np.linalg.norm(emb)
    embeddings.append(emb)

embeddings = np.vstack(embeddings)

Embedding texts: 100%|██████████| 14/14 [00:07<00:00,  1.92it/s]


In [3]:
import faiss
import os
import numpy as np

# Step 1: Load existing index
index_path = "storage/openai_index.faiss"
if os.path.exists(index_path):
    index = faiss.read_index(index_path)
    print(f"🔄 Loaded existing index with {index.ntotal} vectors.")
else:
    raise FileNotFoundError(f"FAISS index not found at {index_path}")

# Step 3: Add new embeddings to index
index.add(embeddings)
print(f"➕ Added {len(embeddings)} new vectors. Total is now {index.ntotal} vectors.")

# Step 4: Save updated index
faiss.write_index(index, index_path)
print(f"💾 Updated FAISS index saved with {index.ntotal} total vectors.")


🔄 Loaded existing index with 86 vectors.
➕ Added 14 new vectors. Total is now 100 vectors.
💾 Updated FAISS index saved with 100 total vectors.
