In [None]:
! pip install pymongo
! pip install python-dotenv

In [1]:
import pymongo  
import json
from dotenv import dotenv_values

In [2]:
env_name = "myconfig.env" 
config = dotenv_values(env_name)

# Connection string
cosmos_conn = config['cosmos_connection_string']
cosmos_client = pymongo.MongoClient(cosmos_conn)

# Database name
database = cosmos_client[config['cosmos_database']]

# Collection names
actual_nodes = database[config['cosmos_actual_nodes']]
predicted_nodes = database[config['cosmos_predicted_nodes']]
element_catalog = database[config['cosmos_element_collection']]

# Drop the database if recreating from scratch
# cosmos_client.drop_database(config['cosmos_database'])



Using existing collection: 'ElementCollection'.
Using existing collection: 'ActualNode'.
Using existing collection: 'PredictedNode'.


In [5]:
# Create the collections and indexes if they do not exist
collection_names = [actual_nodes, predicted_nodes, element_catalog]

# Iterate through the collection names and create them if they do not exist
for collection_name in collection_names:
    if collection_name not in database.list_collection_names():
        # Creates a collection
        database.create_collection(collection_name)
        print(f"Created collection '{collection_name}'.")
    else:
        print(f"Using existing collection: '{collection_name}'.")


# Create index for Type and Id in ElementCollection 
database.command({
  'createIndexes': element_catalog,
  'indexes': [
    {
      'key': {
        'Type': 1,
        'Id': 1
      }
    }
  ]
})


# create vector index on ElementCollection
database.command({
  'createIndexes': element_catalog,
  'indexes': [
    {
      'name': 'vectorSearchIndex',
      'key': {
        "Embedding": "cosmosSearch"
      },
      'cosmosSearchOptions': {
        'kind': 'vector-hnsw',
        'm': 16,
        'efConstruction': 40,
        'similarity': 'COS',
        'dimensions': 1536
      }
    }
  ]
});

In [5]:
# load the element catalog data
data_file = open(file="./data/catalog/catalog.json", mode="r") 
data = json.load(data_file)
data_file.close()

# Insert the data into the ElementCollection
elements = database[element_catalog]
result = elements.insert_many(data)

print(f"Number of data points added: {len(result.inserted_ids)} in {elements.name}")

Number of data points added: 101 in ElementCollection


In [4]:
# load the node data
data_file = open(file="./data/nodes/AugmentedNode.json", mode="r") 
data = json.load(data_file)
data_file.close()

actual_node = db['ActualNode']

result = actual_node.insert_many(data)

print(f"Number of data points added: {len(result.inserted_ids)} in {actual_node.name}")

Number of data points added: 200000 in ActualNode
