----

## Initial Vector DB Setup

In [59]:
import dotenv
import os
import pandas as pd
import sys

from pathlib import Path

In [60]:
# Load and set environment

dotenv.load_dotenv()
os.environ['USER_AGENT'] = 'myagent'
PROJECT_HOME = Path(os.environ.get('PROJECT_HOME', Path.cwd() / '..')).resolve()
sys.path.append(str(PROJECT_HOME))

In [61]:
# from app.databases.vector.milvus import Milvus
from app.databases.vector import VectorDB

vector_db = VectorDB(
    # auto_id=True,
    # drop_old=True,  # Drop existing values inside the collection
)

----

## Getting a list of current documents

In [57]:
def display_docs():
    # We can't do an empty search, so we'll just search on ".".
    all_docs = vector_db.similarity_search("t", k=100)  # k is the number of docs to retrieve
    
    # Display the documents
    for doc in all_docs:
        print(f"ID: {doc.metadata.get('source_id', 'N/A')} | Text: {doc.page_content[:20]}...")
        # print(f"ID: {doc.metadata.get('source_id', 'N/A')}")

In [65]:
display_docs()

ID: aesops fables_small.txt | Text: ﻿INTRODUCTION...
ID: aesops fables_small.txt | Text: This is the immortal...
ID: aesops fables_small.txt | Text: The historical Æsop,...
ID: aesops fables_small.txt | Text: _Æsop embodies an ep...
ID: aesops fables_small.txt | Text: miller's third son d...
ID: aesops fables_small.txt | Text: But whatever be fair...
ID: aesops fables_small.txt | Text: Æsop, or Babrius (or...
ID: aesops fables_small.txt | Text: not Æsop's all the a...
ID: aesops fables_small.txt | Text: fighting Christianit...
ID: aesops fables_small.txt | Text: with a race too easi...


----

## Adding text to the vector db (Don't do this on its own!)

In [42]:
from datetime import datetime

class StoreTextWrap:
    def __init__(self):
        self.cur_source_id = 1
    
    def store(self, text, source_name="chat", source_id=None, modified_at=None):
        
        if not source_id:
            source_id = str(self.cur_source_id)
            self.cur_source_id += 1

        modified_at = modified_at or datetime.now()
        
        return vector_db.split_and_store_text(text,
            metadata={
                'source_name':source_name,
                'source_id':source_id,
                'modified_at':modified_at.isoformat(),
            })
store_text = StoreTextWrap().store
# Use by just calling store_text().

In [43]:
await store_text("I want to try my hand at RAG")

RPC error: [insert_rows], <ParamError: (code=1, message=Field source don't match in entities[0])>, <Time:{'RPC start': '2024-11-05 12:20:12.893006', 'RPC error': '2024-11-05 12:20:12.906103'}>
Failed to insert batch starting at entity: 0/1


ParamError: <ParamError: (code=1, message=Field source don't match in entities[0])>

----

## Deleting text from the db

In [28]:
def delete_by_id(aid):
    return vector_db.delete_embeddings(aid)


In [30]:

await delete_by_id(1)e

Unexpected error: [delete], pymilvus.client.prepare.Prepare.delete_request() got multiple values for keyword argument 'consistency_level', <Time: {'RPC start': '2024-10-31 16:14:48.304825', 'Exception': '2024-10-31 16:14:48.305617'}>


MilvusException: <MilvusException: (code=1, message=Unexpected error, message=<pymilvus.client.prepare.Prepare.delete_request() got multiple values for keyword argument 'consistency_level'>)>

## Delete and remake the collection

In [51]:
dir(vector_db)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_asimilarity_search_with_relevance_scores',
 '_collection_search',
 '_cosine_relevance_score_fn',
 '_create_collection',
 '_create_connection_alias',
 '_create_index',
 '_create_search_params',
 '_euclidean_relevance_score_fn',
 '_extract_fields',
 '_get_index',
 '_get_retriever_tags',
 '_init',
 '_load',
 '_max_inner_product_relevance_score_fn',
 '_metadata_field',
 '_parse_document',
 '_partition_key_field',
 '_primary_field',
 '_select_relevance_score_fn',
 '_similarity_search_with_relevance_scores',
 '_text_field',
 '_vector_field',
 'aadd_documents