Update README.md

kyegomez · Jun 17, 2024 · b144669 · b144669
1 parent 5a3fb81
commit b144669
Showing 1 changed file with 349 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -112,16 +112,182 @@ agent.run("Generate a 10,000 word blog on health and wellness.")
 
 ```python
 import os
-
 from dotenv import load_dotenv
-
-# Import the OpenAIChat model and the Agent struct
 from swarms import Agent, OpenAIChat
 from playground.memory.chromadb_example import ChromaDB
+import logging
+import os
+import uuid
+from typing import Optional
+import chromadb
+from swarms.utils.data_to_text import data_to_text
+from swarms.utils.markdown_message import display_markdown_message
+from swarms.memory.base_vectordb import BaseVectorDatabase
 
-# Load the environment variables
+# Load environment variables
 load_dotenv()
 
+
+# Results storage using local ChromaDB
+class ChromaDB(BaseVectorDatabase):
+    """
+
+    ChromaDB database
+
+    Args:
+        metric (str): The similarity metric to use.
+        output (str): The name of the collection to store the results in.
+        limit_tokens (int, optional): The maximum number of tokens to use for the query. Defaults to 1000.
+        n_results (int, optional): The number of results to retrieve. Defaults to 2.
+
+    Methods:
+        add: _description_
+        query: _description_
+
+    Examples:
+        >>> chromadb = ChromaDB(
+        >>>     metric="cosine",
+        >>>     output="results",
+        >>>     llm="gpt3",
+        >>>     openai_api_key=OPENAI_API_KEY,
+        >>> )
+        >>> chromadb.add(task, result, result_id)
+    """
+
+    def __init__(
+        self,
+        metric: str = "cosine",
+        output_dir: str = "swarms",
+        limit_tokens: Optional[int] = 1000,
+        n_results: int = 3,
+        docs_folder: str = None,
+        verbose: bool = False,
+        *args,
+        **kwargs,
+    ):
+        self.metric = metric
+        self.output_dir = output_dir
+        self.limit_tokens = limit_tokens
+        self.n_results = n_results
+        self.docs_folder = docs_folder
+        self.verbose = verbose
+
+        # Disable ChromaDB logging
+        if verbose:
+            logging.getLogger("chromadb").setLevel(logging.INFO)
+
+        # Create Chroma collection
+        chroma_persist_dir = "chroma"
+        chroma_client = chromadb.PersistentClient(
+            settings=chromadb.config.Settings(
+                persist_directory=chroma_persist_dir,
+            ),
+            *args,
+            **kwargs,
+        )
+
+        # Create ChromaDB client
+        self.client = chromadb.Client()
+
+        # Create Chroma collection
+        self.collection = chroma_client.get_or_create_collection(
+            name=output_dir,
+            metadata={"hnsw:space": metric},
+            *args,
+            **kwargs,
+        )
+        display_markdown_message(
+            "ChromaDB collection created:"
+            f" {self.collection.name} with metric: {self.metric} and"
+            f" output directory: {self.output_dir}"
+        )
+
+        # If docs
+        if docs_folder:
+            display_markdown_message(
+                f"Traversing directory: {docs_folder}"
+            )
+            self.traverse_directory()
+
+    def add(
+        self,
+        document: str,
+        *args,
+        **kwargs,
+    ):
+        """
+        Add a document to the ChromaDB collection.
+
+        Args:
+            document (str): The document to be added.
+            condition (bool, optional): The condition to check before adding the document. Defaults to True.
+
+        Returns:
+            str: The ID of the added document.
+        """
+        try:
+            doc_id = str(uuid.uuid4())
+            self.collection.add(
+                ids=[doc_id],
+                documents=[document],
+                *args,
+                **kwargs,
+            )
+            print("-----------------")
+            print("Document added successfully")
+            print("-----------------")
+            return doc_id
+        except Exception as e:
+            raise Exception(f"Failed to add document: {str(e)}")
+
+    def query(
+        self,
+        query_text: str,
+        *args,
+        **kwargs,
+    ):
+        """
+        Query documents from the ChromaDB collection.
+
+        Args:
+            query (str): The query string.
+            n_docs (int, optional): The number of documents to retrieve. Defaults to 1.
+
+        Returns:
+            dict: The retrieved documents.
+        """
+        try:
+            docs = self.collection.query(
+                query_texts=[query_text],
+                n_results=self.n_results,
+                *args,
+                **kwargs,
+            )["documents"]
+            return docs[0]
+        except Exception as e:
+            raise Exception(f"Failed to query documents: {str(e)}")
+
+    def traverse_directory(self):
+        """
+        Traverse through every file in the given directory and its subdirectories,
+        and return the paths of all files.
+        Parameters:
+        - directory_name (str): The name of the directory to traverse.
+        Returns:
+        - list: A list of paths to each file in the directory and its subdirectories.
+        """
+        added_to_db = False
+
+        for root, dirs, files in os.walk(self.docs_folder):
+            for file in files:
+                file_path = os.path.join(root, file)  # Change this line
+                _, ext = os.path.splitext(file_path)
+                data = data_to_text(file_path)
+                added_to_db = self.add(str(data))
+                print(f"{file_path} added to Database")
+
+        return added_to_db
+
 # Get the API key from the environment
 api_key = os.environ.get("OPENAI_API_KEY")
 
@@ -163,7 +329,185 @@ agent.run("Generate a 10,000 word blog on health and wellness.")
 An LLM equipped with long term memory and tools, a full stack agent capable of automating all and any digital tasks given a good prompt.
 
 ```python
-from swarms import Agent, ChromaDB, OpenAIChat
+import logging
+import os
+import uuid
+from typing import Optional
+
+import chromadb
+from dotenv import load_dotenv
+
+from swarms.utils.data_to_text import data_to_text
+from swarms.utils.markdown_message import display_markdown_message
+from swarms.memory.base_vectordb import BaseVectorDatabase
+from swarms import Agent, OpenAIChat
+
+
+# Load environment variables
+load_dotenv()
+
+
+
+# Results storage using local ChromaDB
+class ChromaDB(BaseVectorDatabase):
+    """
+
+    ChromaDB database
+
+    Args:
+        metric (str): The similarity metric to use.
+        output (str): The name of the collection to store the results in.
+        limit_tokens (int, optional): The maximum number of tokens to use for the query. Defaults to 1000.
+        n_results (int, optional): The number of results to retrieve. Defaults to 2.
+
+    Methods:
+        add: _description_
+        query: _description_
+
+    Examples:
+        >>> chromadb = ChromaDB(
+        >>>     metric="cosine",
+        >>>     output="results",
+        >>>     llm="gpt3",
+        >>>     openai_api_key=OPENAI_API_KEY,
+        >>> )
+        >>> chromadb.add(task, result, result_id)
+    """
+
+    def __init__(
+        self,
+        metric: str = "cosine",
+        output_dir: str = "swarms",
+        limit_tokens: Optional[int] = 1000,
+        n_results: int = 3,
+        docs_folder: str = None,
+        verbose: bool = False,
+        *args,
+        **kwargs,
+    ):
+        self.metric = metric
+        self.output_dir = output_dir
+        self.limit_tokens = limit_tokens
+        self.n_results = n_results
+        self.docs_folder = docs_folder
+        self.verbose = verbose
+
+        # Disable ChromaDB logging
+        if verbose:
+            logging.getLogger("chromadb").setLevel(logging.INFO)
+
+        # Create Chroma collection
+        chroma_persist_dir = "chroma"
+        chroma_client = chromadb.PersistentClient(
+            settings=chromadb.config.Settings(
+                persist_directory=chroma_persist_dir,
+            ),
+            *args,
+            **kwargs,
+        )
+
+        # Create ChromaDB client
+        self.client = chromadb.Client()
+
+        # Create Chroma collection
+        self.collection = chroma_client.get_or_create_collection(
+            name=output_dir,
+            metadata={"hnsw:space": metric},
+            *args,
+            **kwargs,
+        )
+        display_markdown_message(
+            "ChromaDB collection created:"
+            f" {self.collection.name} with metric: {self.metric} and"
+            f" output directory: {self.output_dir}"
+        )
+
+        # If docs
+        if docs_folder:
+            display_markdown_message(
+                f"Traversing directory: {docs_folder}"
+            )
+            self.traverse_directory()
+
+    def add(
+        self,
+        document: str,
+        *args,
+        **kwargs,
+    ):
+        """
+        Add a document to the ChromaDB collection.
+
+        Args:
+            document (str): The document to be added.
+            condition (bool, optional): The condition to check before adding the document. Defaults to True.
+
+        Returns:
+            str: The ID of the added document.
+        """
+        try:
+            doc_id = str(uuid.uuid4())
+            self.collection.add(
+                ids=[doc_id],
+                documents=[document],
+                *args,
+                **kwargs,
+            )
+            print("-----------------")
+            print("Document added successfully")
+            print("-----------------")
+            return doc_id
+        except Exception as e:
+            raise Exception(f"Failed to add document: {str(e)}")
+
+    def query(
+        self,
+        query_text: str,
+        *args,
+        **kwargs,
+    ):
+        """
+        Query documents from the ChromaDB collection.
+
+        Args:
+            query (str): The query string.
+            n_docs (int, optional): The number of documents to retrieve. Defaults to 1.
+
+        Returns:
+            dict: The retrieved documents.
+        """
+        try:
+            docs = self.collection.query(
+                query_texts=[query_text],
+                n_results=self.n_results,
+                *args,
+                **kwargs,
+            )["documents"]
+            return docs[0]
+        except Exception as e:
+            raise Exception(f"Failed to query documents: {str(e)}")
+
+    def traverse_directory(self):
+        """
+        Traverse through every file in the given directory and its subdirectories,
+        and return the paths of all files.
+        Parameters:
+        - directory_name (str): The name of the directory to traverse.
+        Returns:
+        - list: A list of paths to each file in the directory and its subdirectories.
+        """
+        added_to_db = False
+
+        for root, dirs, files in os.walk(self.docs_folder):
+            for file in files:
+                file_path = os.path.join(root, file)  # Change this line
+                _, ext = os.path.splitext(file_path)
+                data = data_to_text(file_path)
+                added_to_db = self.add(str(data))
+                print(f"{file_path} added to Database")
+
+        return added_to_db
+
 
 # Making an instance of the ChromaDB class
 memory = ChromaDB(