In [None]:
from ragdoll.embeddings import get_embedding_model  
from ragdoll.vector_stores import get_vector_store
import os

# Get embedding model
embedding_model = get_embedding_model("text-embedding-ada-002")  # Updated model name format
print(f"Embedding model created: {embedding_model}")

# Create a FAISS vector store with documents
# For example purposes, let's create some simple test documents
from langchain_core.documents import Document
documents = [
    Document(page_content="This is a test document about AI"),
    Document(page_content="Vector databases store embeddings for semantic search"),
    Document(page_content="RAG systems combine retrieval with generation")
]

# Ensure the directory exists
persist_dir = "./data/vector_stores/my_faiss"
os.makedirs(persist_dir, exist_ok=True)

print(f"Creating vector store...")
vector_store = get_vector_store(
    store_type="faiss", 
    embedding_model=embedding_model,
    documents=documents,
    persist_directory=persist_dir
)

print(f"Vector store created: {vector_store}")

# Save it - Note: With persist_directory, FAISS may have already saved the index
if vector_store is not None and hasattr(vector_store, "save_local"):
    print(f"Saving vector store to {persist_dir}")
    vector_store.save_local(persist_dir)
    print("Vector store saved successfully")

# Later, load it back
print(f"Attempting to load vector store from {persist_dir}")
loaded_store = get_vector_store(
    store_type="faiss",
    embedding_model=embedding_model,
    persist_directory=persist_dir,
    allow_dangerous_deserialization=True  # Add this parameter
)
print(f"Loaded store: {loaded_store}")

# You can now perform similarity searches
if loaded_store is not None:
    query = "How do RAG systems work?"
    print(f"Performing search with query: {query}")
    results = loaded_store.similarity_search(query, k=2)
    for doc in results:
        print(doc.page_content)
        print("-" * 50)
else:
    print("Failed to load vector store. Please check the error messages above.")

Embedding model created: client=<openai.resources.embeddings.Embeddings object at 0x0000022E7521D710> async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x0000022E752B9B90> model='text-embedding-ada-002' dimensions=None deployment='text-embedding-ada-002' openai_api_version=None openai_api_base=None openai_api_type=None openai_proxy=None embedding_ctx_length=8191 openai_api_key=SecretStr('**********') openai_organization=None allowed_special=None disallowed_special=None chunk_size=1000 max_retries=2 request_timeout=None headers=None tiktoken_enabled=True tiktoken_model_name=None show_progress_bar=False model_kwargs={} skip_empty=False default_headers=None default_query=None retry_min_seconds=4 retry_max_seconds=20 http_client=None http_async_client=None check_embedding_ctx_length=True
Creating vector store...
Vector store created: <langchain_community.vectorstores.faiss.FAISS object at 0x0000022E64FDD090>
Saving vector store to ./data/vector_stores/my_faiss
Vector stor