----

## Initial Vector DB Setup

In [1]:
import dotenv
import os
import sys

from datetime import datetime
from pathlib import Path

In [2]:
# Load and set environment

dotenv.load_dotenv()
os.environ['USER_AGENT'] = 'myagent'
PROJECT_HOME = Path(os.environ.get('PROJECT_HOME', Path.cwd() / '..')).resolve()
sys.path.append(str(PROJECT_HOME))

In [3]:
from app.databases.vector import VectorDB

vector_db = VectorDB(
    # auto_id=True,
    # drop_old=True,  # Drop existing values inside the collection
)

----

## Getting a list of current documents

In [6]:
def search_vector_db(string_to_search):
    # We can't do an empty search, so we'll just search on ".".
    all_docs = vector_db.similarity_search(string_to_search, k=100)  # k is the number of docs to retrieve
    
    # Display the documents
    for doc in all_docs:
        print(f"ID: {doc.metadata.get('source_id', 'N/A')} | Text: {doc.page_content[:20]}...")

In [8]:
search_vector_db(".")

ID: alice-in-wonderland.pdf | Text: THE END...
ID: alice-in-wonderland.pdf | Text: ‘We indeed!’ cried t...
ID: alice-in-wonderland.pdf | Text: history, Alice had n...
ID: alice-in-wonderland.pdf | Text: ‘Back to land again ...
ID: alice-in-wonderland.pdf | Text: perhaps he can’t hel...
ID: alice-in-wonderland.pdf | Text: CHAPTER VII. A MAD T...
ID: alice-in-wonderland.pdf | Text: them out of their wi...
ID: alice-in-wonderland.pdf | Text: ﬂowers and the blade...
ID: alice-in-wonderland.pdf | Text: for it to speak with...
ID: alice-in-wonderland.pdf | Text: CHAPTER VIII. THE QU...
ID: alice-in-wonderland.pdf | Text: CHAPTER II. THE POOL...
ID: alice-in-wonderland.pdf | Text: CHAPTER VII. A MAD T...
ID: alice-in-wonderland.pdf | Text: CHAPTER XII. ALICE’S...
ID: alice-in-wonderland.pdf | Text: CHAPTER II. THE POOL...
ID: alice-in-wonderland.pdf | Text: CHAPTER I. DOWN THE ...
ID: alice-in-wonderland.pdf | Text: CHAPTER VI. PIG AND ...
ID: alice-in-wonderland.pdf | Text: So she sat on, wi

----

## Adding text to the vector db (Don't do this on its own!)

In [9]:
class StoreTextWrap:
    def __init__(self):
        self.cur_source_id = 1
    
    def store(self, text, source_name="chat", source_id=None, modified_at=None):
        
        if not source_id:
            source_id = str(self.cur_source_id)
            self.cur_source_id += 1

        modified_at = modified_at or datetime.now()
        
        return vector_db.split_and_store_text(text,
            metadata={
                'source_name':source_name,
                'source_id':source_id,
                'modified_at':modified_at.isoformat(),
            })
store_text = StoreTextWrap().store
# Use by just calling store_text().

In [11]:
await store_text("The best headphones in the world are the Boise QC Edan version")

[453742060807679821]

----

## Deleting text from the db

In [None]:
def delete_by_id(aid):
    return vector_db.delete_embeddings(aid)

In [None]:
await delete_by_id(1)

## Delete and remake the collection

In [None]:
dir(vector_db)