In [1]:
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from dotenv import load_dotenv, find_dotenv
import os


In [4]:
load_dotenv(find_dotenv())

True

In [5]:
openai_api_key = os.getenv("OPENAI_API_KEY")

In [6]:
path = '../skills-library/vectordb'
client = chromadb.PersistentClient(path=path)

In [7]:
# returns a nanosecond heartbeat. Useful for making sure the client remains connected.
client.heartbeat()


1711360711866479900

In [8]:
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=openai_api_key,
    model_name="text-embedding-3-large"
)

In [9]:
# Get a collection object from an existing collection, by name. If it doesn't exist, create it.
collection = client.get_or_create_collection(
    name="test1", embedding_function=openai_ef, metadata={"hnsw:space": "cosine"})

In [10]:
collection.peek()

{'ids': [],
 'embeddings': [],
 'metadatas': [],
 'documents': [],
 'uris': None,
 'data': None}

In [11]:
# step1: open all txt files  from specific folder
# step2: collect ids_lst, which is a filename without extension
# step3: collect documents_lst, which is a list of text content for each file
# step4: add documents to the collection


# Define the path to the directory containing the text files
folder_path = '../skills-library/description'

# Initialize lists to hold file names without extensions and file contents
ids_lst = []
documents_lst = []

# Step 1: Open all txt files from specific folder
for filename in os.listdir(folder_path):
    # Check if the file is a text file
    if filename.endswith('.txt'):
        # Step 2: Collect ids_lst, which is a filename without extension
        ids_lst.append(os.path.splitext(filename)[0])

        # Define the full path to the file
        file_path = os.path.join(folder_path, filename)

        # Step 3: Collect documents_lst, which is a list of text content for each file
        with open(file_path, 'r', encoding='utf-8') as file:
            documents_lst.append(file.read())

# Now ids_lst contains all filenames without their extension
# and documents_lst contains the corresponding file contents

In [14]:
collection.upsert(ids=ids_lst, documents=documents_lst)

In [15]:
collection.peek()

{'ids': ['ADDNODES',
  'ADDWPT',
  'ADDWPTMODE',
  'AFTER',
  'ALT',
  'AREA',
  'ASAS',
  'AT',
  'ATALT',
  'ATDIST'],
 'embeddings': [[-0.00954065565019846,
   0.02709392085671425,
   -0.013572641648352146,
   0.012937026098370552,
   0.020275497809052467,
   0.016911229118704796,
   -0.02190626971423626,
   0.06101910397410393,
   -0.05804005637764931,
   0.02560439705848694,
   0.0009229267598129809,
   0.0054284147918224335,
   -0.037648990750312805,
   0.010413824580609798,
   0.02066071890294552,
   0.008019030094146729,
   -0.034746989607810974,
   -0.01091461256146431,
   -0.017142361029982567,
   -0.012031755410134792,
   -0.009232478216290474,
   0.06343316286802292,
   -0.0020705661736428738,
   0.03877897560596466,
   0.008134596049785614,
   0.026503246277570724,
   0.0038843180518597364,
   0.02505224570631981,
   -0.02763323113322258,
   0.004535984713584185,
   -0.03092045523226261,
   -0.017129521816968918,
   0.03785444423556328,
   -0.006092921830713749,
   0.03166

In [24]:
collection.query(
    query_texts=["how to land an aircraft?"],
    n_results=10
)

{'ids': [['HDG',
   'CRE',
   'set_heading',
   'create_aircraft',
   'DEST',
   'MCRE',
   'LNAV',
   'move_aircraft',
   'RUNWAYS',
   'VNAV']],
 'distances': [[0.7256861059373836,
   0.7320739750235935,
   0.7372124559131688,
   0.7412965548504962,
   0.7431516014723765,
   0.7433793171311778,
   0.7448992956075222,
   0.7489444710771764,
   0.7567629674495429,
   0.7572419925781988]],
 'metadatas': [[None, None, None, None, None, None, None, None, None, None]],
 'embeddings': None,
 'documents': [['HDG: Hdg\nHeading command (autopilot). This is a basic autopilot heading mode. So this command disengages the LNAV mode. If you want the aircraft to follow the route again, use the "acid LNAV ON" command.\nUsage:\nHDG acid,hdg (deg,True)\n\nArguments:\n\nName | Type     | Required | Description               \n-----+----------+----------+---------------------------\nacid | txt      | Yes      | Aircraft id               \nhdg  | float    | Yes      | Aircraft heading [deg] [1]\ntype | de