In [None]:
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from dotenv import load_dotenv, find_dotenv
import os


In [None]:
load_dotenv(find_dotenv())

In [None]:
openai_api_key = os.getenv("OPENAI_API_KEY")

In [None]:
base_path = os.getcwd()
vectordb_path = os.path.join(base_path, "..", "skills-library", "vectordb")


client = chromadb.PersistentClient(path=vectordb_path)

In [None]:
# returns a nanosecond heartbeat. Useful for making sure the client remains connected.
client.heartbeat()


In [None]:
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=openai_api_key,
    model_name="text-embedding-3-large"
)

In [None]:
collections = client.list_collections()
collections

In [None]:
# Get a collection object from an existing collection, by name. If it doesn't exist, create it.
collection = client.get_or_create_collection(
    name="default", embedding_function=openai_ef, metadata={"hnsw:space": "cosine"})

In [None]:
collection.peek()

In [None]:
# step1: open all txt files  from specific folder
# step2: collect ids_lst, which is a filename without extension
# step3: collect documents_lst, which is a list of text content for each file
# step4: add documents to the collection


# Define the path to the directory containing the text files
folder_path = os.path.join(base_path, "..", "skills-library", "description")

# Initialize lists to hold file names without extensions and file contents
ids_lst = []
documents_lst = []
metadatas_lst = []
# Step 1: Open all txt files from specific folder
for filename in os.listdir(folder_path):
    # Check if the file is a text file
    if filename.endswith('.txt'):
        # Step 2: Collect ids_lst, which is a filename without extension
        command = os.path.splitext(filename)[0]
        ids_lst.append(command)
        metadatas_lst.append({"command": command})
        # Define the full path to the file
        file_path = os.path.join(folder_path, filename)

        # Step 3: Collect documents_lst, which is a list of text content for each file
        with open(file_path, 'r', encoding='utf-8') as file:
            documents_lst.append(file.read())

# Now ids_lst contains all filenames without their extension
# and documents_lst contains the corresponding file contents

In [None]:
collection.upsert(ids=ids_lst, documents=documents_lst, metadatas=metadatas_lst)

In [None]:
collection.get(include=["metadatas"])['metadatas']

In [None]:
collection.get(ids=['ADDWPT', 'CR2E'])

In [None]:
collection.query(
    query_texts=["Enable Conflict Detection and Resolution System\n  Useful commands: ASAS ON, RESO ON\n  - ASAS ON: Enable the conflict detection system.\n  - RESO ON: Enable the conflict resolution system.\n\n-",
                 
                 "Step 2: Set the conflict detection method\n  Useful commands: [CDMETHOD MODULE-NAME]\n  Usage details: `CDMETHOD MVP` - This command sets the conflict detection method to the'Minimum Vectoring Profile' which is an example module name.\n\n-", 
                 "Set the conflict resolution method\n  Useful commands: [RESO MODULE-NAME]\n  Usage details: `RESO EBY` - This command sets the conflict resolution method to 'Enhanced Bounded Yaw,' which is an example module name.\n\n"],
    n_results=5,
    #where={"command": "ADDWPT"}
)

In [None]:
input = "HDG, ALT, AT, ATALT, ATDIST, ATSPD, BANK, DELWPT, DEST, DIRECT, DIST, HDG, SPD, VS"

commands_lst = list(set([item.strip() for item in input.split(",")]))
print(commands_lst)
documents_lst = collection.get(ids=commands_lst)["documents"]
documents_str = ""
for doc in documents_lst:
    documents_str += doc + "\n\n ############################## \n\n"

documents_str