Skip to content

Commit

Permalink
Update README.md
Browse files Browse the repository at this point in the history
  • Loading branch information
kyegomez committed Jun 17, 2024
1 parent 5a3fb81 commit b144669
Showing 1 changed file with 349 additions and 5 deletions.
354 changes: 349 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,16 +112,182 @@ agent.run("Generate a 10,000 word blog on health and wellness.")

```python
import os

from dotenv import load_dotenv

# Import the OpenAIChat model and the Agent struct
from swarms import Agent, OpenAIChat
from playground.memory.chromadb_example import ChromaDB
import logging
import os
import uuid
from typing import Optional
import chromadb
from swarms.utils.data_to_text import data_to_text
from swarms.utils.markdown_message import display_markdown_message
from swarms.memory.base_vectordb import BaseVectorDatabase

# Load the environment variables
# Load environment variables
load_dotenv()


# Results storage using local ChromaDB
class ChromaDB(BaseVectorDatabase):
"""
ChromaDB database
Args:
metric (str): The similarity metric to use.
output (str): The name of the collection to store the results in.
limit_tokens (int, optional): The maximum number of tokens to use for the query. Defaults to 1000.
n_results (int, optional): The number of results to retrieve. Defaults to 2.
Methods:
add: _description_
query: _description_
Examples:
>>> chromadb = ChromaDB(
>>> metric="cosine",
>>> output="results",
>>> llm="gpt3",
>>> openai_api_key=OPENAI_API_KEY,
>>> )
>>> chromadb.add(task, result, result_id)
"""

def __init__(
self,
metric: str = "cosine",
output_dir: str = "swarms",
limit_tokens: Optional[int] = 1000,
n_results: int = 3,
docs_folder: str = None,
verbose: bool = False,
*args,
**kwargs,
):
self.metric = metric
self.output_dir = output_dir
self.limit_tokens = limit_tokens
self.n_results = n_results
self.docs_folder = docs_folder
self.verbose = verbose

# Disable ChromaDB logging
if verbose:
logging.getLogger("chromadb").setLevel(logging.INFO)

# Create Chroma collection
chroma_persist_dir = "chroma"
chroma_client = chromadb.PersistentClient(
settings=chromadb.config.Settings(
persist_directory=chroma_persist_dir,
),
*args,
**kwargs,
)

# Create ChromaDB client
self.client = chromadb.Client()

# Create Chroma collection
self.collection = chroma_client.get_or_create_collection(
name=output_dir,
metadata={"hnsw:space": metric},
*args,
**kwargs,
)
display_markdown_message(
"ChromaDB collection created:"
f" {self.collection.name} with metric: {self.metric} and"
f" output directory: {self.output_dir}"
)

# If docs
if docs_folder:
display_markdown_message(
f"Traversing directory: {docs_folder}"
)
self.traverse_directory()

def add(
self,
document: str,
*args,
**kwargs,
):
"""
Add a document to the ChromaDB collection.
Args:
document (str): The document to be added.
condition (bool, optional): The condition to check before adding the document. Defaults to True.
Returns:
str: The ID of the added document.
"""
try:
doc_id = str(uuid.uuid4())
self.collection.add(
ids=[doc_id],
documents=[document],
*args,
**kwargs,
)
print("-----------------")
print("Document added successfully")
print("-----------------")
return doc_id
except Exception as e:
raise Exception(f"Failed to add document: {str(e)}")

def query(
self,
query_text: str,
*args,
**kwargs,
):
"""
Query documents from the ChromaDB collection.
Args:
query (str): The query string.
n_docs (int, optional): The number of documents to retrieve. Defaults to 1.
Returns:
dict: The retrieved documents.
"""
try:
docs = self.collection.query(
query_texts=[query_text],
n_results=self.n_results,
*args,
**kwargs,
)["documents"]
return docs[0]
except Exception as e:
raise Exception(f"Failed to query documents: {str(e)}")

def traverse_directory(self):
"""
Traverse through every file in the given directory and its subdirectories,
and return the paths of all files.
Parameters:
- directory_name (str): The name of the directory to traverse.
Returns:
- list: A list of paths to each file in the directory and its subdirectories.
"""
added_to_db = False

for root, dirs, files in os.walk(self.docs_folder):
for file in files:
file_path = os.path.join(root, file) # Change this line
_, ext = os.path.splitext(file_path)
data = data_to_text(file_path)
added_to_db = self.add(str(data))
print(f"{file_path} added to Database")

return added_to_db

# Get the API key from the environment
api_key = os.environ.get("OPENAI_API_KEY")

Expand Down Expand Up @@ -163,7 +329,185 @@ agent.run("Generate a 10,000 word blog on health and wellness.")
An LLM equipped with long term memory and tools, a full stack agent capable of automating all and any digital tasks given a good prompt.

```python
from swarms import Agent, ChromaDB, OpenAIChat
import logging
import os
import uuid
from typing import Optional

import chromadb
from dotenv import load_dotenv

from swarms.utils.data_to_text import data_to_text
from swarms.utils.markdown_message import display_markdown_message
from swarms.memory.base_vectordb import BaseVectorDatabase
from swarms import Agent, OpenAIChat


# Load environment variables
load_dotenv()



# Results storage using local ChromaDB
class ChromaDB(BaseVectorDatabase):
"""
ChromaDB database
Args:
metric (str): The similarity metric to use.
output (str): The name of the collection to store the results in.
limit_tokens (int, optional): The maximum number of tokens to use for the query. Defaults to 1000.
n_results (int, optional): The number of results to retrieve. Defaults to 2.
Methods:
add: _description_
query: _description_
Examples:
>>> chromadb = ChromaDB(
>>> metric="cosine",
>>> output="results",
>>> llm="gpt3",
>>> openai_api_key=OPENAI_API_KEY,
>>> )
>>> chromadb.add(task, result, result_id)
"""

def __init__(
self,
metric: str = "cosine",
output_dir: str = "swarms",
limit_tokens: Optional[int] = 1000,
n_results: int = 3,
docs_folder: str = None,
verbose: bool = False,
*args,
**kwargs,
):
self.metric = metric
self.output_dir = output_dir
self.limit_tokens = limit_tokens
self.n_results = n_results
self.docs_folder = docs_folder
self.verbose = verbose

# Disable ChromaDB logging
if verbose:
logging.getLogger("chromadb").setLevel(logging.INFO)

# Create Chroma collection
chroma_persist_dir = "chroma"
chroma_client = chromadb.PersistentClient(
settings=chromadb.config.Settings(
persist_directory=chroma_persist_dir,
),
*args,
**kwargs,
)

# Create ChromaDB client
self.client = chromadb.Client()

# Create Chroma collection
self.collection = chroma_client.get_or_create_collection(
name=output_dir,
metadata={"hnsw:space": metric},
*args,
**kwargs,
)
display_markdown_message(
"ChromaDB collection created:"
f" {self.collection.name} with metric: {self.metric} and"
f" output directory: {self.output_dir}"
)

# If docs
if docs_folder:
display_markdown_message(
f"Traversing directory: {docs_folder}"
)
self.traverse_directory()

def add(
self,
document: str,
*args,
**kwargs,
):
"""
Add a document to the ChromaDB collection.
Args:
document (str): The document to be added.
condition (bool, optional): The condition to check before adding the document. Defaults to True.
Returns:
str: The ID of the added document.
"""
try:
doc_id = str(uuid.uuid4())
self.collection.add(
ids=[doc_id],
documents=[document],
*args,
**kwargs,
)
print("-----------------")
print("Document added successfully")
print("-----------------")
return doc_id
except Exception as e:
raise Exception(f"Failed to add document: {str(e)}")

def query(
self,
query_text: str,
*args,
**kwargs,
):
"""
Query documents from the ChromaDB collection.
Args:
query (str): The query string.
n_docs (int, optional): The number of documents to retrieve. Defaults to 1.
Returns:
dict: The retrieved documents.
"""
try:
docs = self.collection.query(
query_texts=[query_text],
n_results=self.n_results,
*args,
**kwargs,
)["documents"]
return docs[0]
except Exception as e:
raise Exception(f"Failed to query documents: {str(e)}")

def traverse_directory(self):
"""
Traverse through every file in the given directory and its subdirectories,
and return the paths of all files.
Parameters:
- directory_name (str): The name of the directory to traverse.
Returns:
- list: A list of paths to each file in the directory and its subdirectories.
"""
added_to_db = False

for root, dirs, files in os.walk(self.docs_folder):
for file in files:
file_path = os.path.join(root, file) # Change this line
_, ext = os.path.splitext(file_path)
data = data_to_text(file_path)
added_to_db = self.add(str(data))
print(f"{file_path} added to Database")

return added_to_db


# Making an instance of the ChromaDB class
memory = ChromaDB(
Expand Down

0 comments on commit b144669

Please sign in to comment.