In [15]:
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
from dotenv import load_dotenv, find_dotenv
import os

In [16]:
load_dotenv(find_dotenv())

True

In [17]:
openai_api_key = os.getenv("OPENAI_API_KEY")

In [18]:
path = 'C:/Users/justa/OneDrive/Desktop/Developer/LLM-Enhanced-ATM/llm/skills-library/vectordb'
client = chromadb.PersistentClient(path=path)

In [19]:
# returns a nanosecond heartbeat. Useful for making sure the client remains connected.
client.heartbeat()


1709636264858177600

In [20]:
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=openai_api_key,
    model_name="text-embedding-3-large"
)

In [21]:
# Get a collection object from an existing collection, by name. If it doesn't exist, create it.
collection = client.get_or_create_collection(
    name="test1", embedding_function=openai_ef, metadata={"hnsw:space": "cosine"})

In [22]:
collection.peek()

{'ids': ['ADDNODES',
  'ADDWPT',
  'ADDWPTMODE',
  'AFTER',
  'ALT',
  'AREA',
  'ASAS',
  'AT',
  'ATALT',
  'ATDIST'],
 'embeddings': [[-0.00954272411763668,
   0.02707410790026188,
   -0.013614115305244923,
   0.012920565903186798,
   0.020279893651604652,
   0.016863521188497543,
   -0.0219110194593668,
   0.061083707958459854,
   -0.05800126865506172,
   0.025622792541980743,
   0.0009528275113552809,
   0.005452067591249943,
   -0.037657156586647034,
   0.010390395298600197,
   0.020626667886972427,
   0.008059299550950527,
   -0.03472883626818657,
   -0.010884870775043964,
   -0.01706901751458645,
   -0.012040785513818264,
   -0.00917668454349041,
   0.06339553743600845,
   -0.0020678043365478516,
   0.038761697709560394,
   0.008149203844368458,
   0.026457620784640312,
   0.0038723167963325977,
   0.025006303563714027,
   -0.027613535523414612,
   0.004594763740897179,
   -0.030978534370660782,
   -0.01715892180800438,
   0.03788834065198898,
   -0.006049290765076876,
   0.031

In [23]:
# step1: open all txt files  from specific folder
# step2: collect ids_lst, which is a filename without extension
# step3: collect documents_lst, which is a list of text content for each file
# step4: add documents to the collection


# Define the path to the directory containing the text files
folder_path = 'C:/Users/justa/OneDrive/Desktop/Developer/LLM-Enhanced-ATM/llm/skills-library/description'

# Initialize lists to hold file names without extensions and file contents
ids_lst = []
documents_lst = []

# Step 1: Open all txt files from specific folder
for filename in os.listdir(folder_path):
    # Check if the file is a text file
    if filename.endswith('.txt'):
        # Step 2: Collect ids_lst, which is a filename without extension
        ids_lst.append(os.path.splitext(filename)[0])

        # Define the full path to the file
        file_path = os.path.join(folder_path, filename)

        # Step 3: Collect documents_lst, which is a list of text content for each file
        with open(file_path, 'r', encoding='utf-8') as file:
            documents_lst.append(file.read())

# Now ids_lst contains all filenames without their extension
# and documents_lst contains the corresponding file contents

In [24]:
collection.upsert(
    documents=documents_lst,
    ids=ids_lst
)

In [25]:
collection.peek()

{'ids': ['ADDNODES',
  'ADDWPT',
  'ADDWPTMODE',
  'AFTER',
  'ALT',
  'AREA',
  'ASAS',
  'AT',
  'ATALT',
  'ATDIST'],
 'embeddings': [[-0.00954272411763668,
   0.02707410790026188,
   -0.013614115305244923,
   0.012920565903186798,
   0.020279893651604652,
   0.016863521188497543,
   -0.0219110194593668,
   0.061083707958459854,
   -0.05800126865506172,
   0.025622792541980743,
   0.0009528275113552809,
   0.005452067591249943,
   -0.037657156586647034,
   0.010390395298600197,
   0.020626667886972427,
   0.008059299550950527,
   -0.03472883626818657,
   -0.010884870775043964,
   -0.01706901751458645,
   -0.012040785513818264,
   -0.00917668454349041,
   0.06339553743600845,
   -0.0020678043365478516,
   0.038761697709560394,
   0.008149203844368458,
   0.026457620784640312,
   0.0038723167963325977,
   0.025006303563714027,
   -0.027613535523414612,
   0.004594763740897179,
   -0.030978534370660782,
   -0.01715892180800438,
   0.03788834065198898,
   -0.006049290765076876,
   0.031

In [26]:
collection.query(
    query_texts=["please move my aircraft AAA333 to this location: 55, 68"],
    n_results=4
)

{'ids': [['move_aircraft',
   'create_aircraft',
   'set_heading',
   'delete_objects']],
 'distances': [[0.5559056787783122,
   0.6519506961285177,
   0.684922008739572,
   0.7326471232332508]],
 'metadatas': [[None, None, None, None]],
 'embeddings': None,
 'documents': [['MOVE: Move\nInstantaneously move an aircraft to a new position. If no values for the altitude, heading, speed and climb rate are provided, the aircraft will keep the old values.\n\nUsage:\n\nMOVE acid,lat,lon,[alt,hdg,spd,vspd]\nArguments:\n\nName\tType\tRequired\tDescription\nacid\ttxt\tyes\tAircraft ID\nlat\tfloat\tyes\tLatitude\nlon\tfloat\tyes\tLatitude\nalt\tfloat\tno\tAltitude\nhdg\tfloat\tno\tHeading\nspd\tfloat\tno\tSpeed\nvspd\tfloat\tno\tClimb Rate [fpm]',
   'CRE: Cre\nCreate an aircraft at specified coordinates.\n\nFor creating multiple randomly located aircraft, see MCRE.\n\nUsage:\n\nCRE acid,type,lat,lon,hdg,alt,spd\nArguments:\n\nName\tType\tRequired\tDescription\nacid\ttxt\tyes\tUnique aircraft cal