In [43]:
from typing import Dict, Union
from loguru import logger
import json
import boto3
from botocore.exceptions import ClientError
from botocore.config import Config as BotoConfig
import pymongo
from urllib.parse import quote_plus
from typing import Dict


def get_secret(secret_name: str) -> Union[Dict[str, str], str]:
    logger.info(f"Retrieving secret {secret_name}")
    session = boto3.session.Session()
    boto_config = BotoConfig(
        connect_timeout=10,
        retries={
            "max_attempts": 3,
            "mode":"standard"
        }
    )
    client = session.client(
        service_name='secretsmanager',
        config=boto_config,
        region_name='us-east-1',
    )
    try:
        logger.info(f"{secret_name}")
        response = client.get_secret_value(
            SecretId=secret_name
        )
    except ClientError as e:
        raise e
    secret_string = response["SecretString"]
    try:
        return json.loads(secret_string)
    except json.JSONDecodeError:
        return secret_string

credentials = get_secret("dev/tai_service/document_DB/read_ONLY_user_password")
user_name = quote_plus(credentials["username"])
password = quote_plus(credentials["password"])
db_uri = f"mongodb://{user_name}:{password}@tai-service-645860363137.us-east-1.docdb-elastic.amazonaws.com:27017/?tls=true&retryWrites=false"
# escape the url
# db_uri = urllib.parse.quote_plus(db_uri)

##Create a MongoDB client, open a connection to Amazon DocumentDB as a replica set and specify the read preference as secondary preferred
client = pymongo.MongoClient(db_uri) 
##Specify the database to be used
# print all databases|
print(client.list_database_names())

db = client.class_resources
collection_list = db.list_collection_names()
print(collection_list)
document_counts = {}
indexes = {}
index_sizes = {}
sum_of_indexes = {}
size_of_objects = {}

for x in collection_list:
    # print all doucments in each collect'''ion
    col = db[x]
    document_counts[x] = col.estimated_document_count()
    indexes[x] = col.index_information()
    index_size = db.command('collStats', x)['indexSizes']
    # convert index size to GB 
    index_sizes[x] = {k: str(v / 1024 / 1024 / 1024) + " GB" for k, v in index_size.items()}
    # add all index sizes together
    sum_of_indexes[x] = str(sum(index_size.values()) / 1024 / 1024 / 1024) + " GB"
    # get average size of objects
    size_of_objects[x] = db.command('collStats', x)['avgObjSize']

print(f"Indexes: {indexes}")
print(f"Estimated document counts: {document_counts}")
print(f"Index sizes: {index_sizes}")
print(f"Sum of indexes: {sum_of_indexes}")
print(f"Size of objects: {size_of_objects}")


[32m2023-07-14 22:37:37.676[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_secret[0m:[36m13[0m - [1mRetrieving secret dev/tai_service/document_DB/read_ONLY_user_password[0m
[32m2023-07-14 22:37:37.744[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_secret[0m:[36m28[0m - [1mdev/tai_service/document_DB/read_ONLY_user_password[0m


['class_resources']
['class_resource_chunk', 'class_resource']
Indexes: {'class_resource_chunk': {'class_id_1': {'v': 4, 'key': [('class_id', 1)], 'ns': 'class_resources.class_resource_chunk'}, 'resource_id_1': {'v': 4, 'key': [('resource_id', 1)], 'ns': 'class_resources.class_resource_chunk'}, 'chunk_id_1': {'v': 4, 'key': [('chunk_id', 1)], 'ns': 'class_resources.class_resource_chunk'}, '_id_': {'v': 4, 'key': [('_id', 1)], 'ns': 'class_resources.class_resource_chunk'}}, 'class_resource': {'class_id_1': {'v': 4, 'key': [('class_id', 1)], 'ns': 'class_resources.class_resource'}, 'resource_id_1': {'v': 4, 'key': [('resource_id', 1)], 'ns': 'class_resources.class_resource'}, '_id_': {'v': 4, 'key': [('_id', 1)], 'ns': 'class_resources.class_resource'}}}
Estimated document counts: {'class_resource_chunk': 0, 'class_resource': 0}
Index sizes: {'class_resource_chunk': {'resource_id_1': '0.0009765625 GB', 'chunk_id_1': '0.0009765625 GB', '_id_': '0.0009765625 GB', 'class_id_1': '0.000976562

In [46]:

# find all in class_resource
col = db["class_resource"]
print("class_resources")
for x in col.find():
    print(x)
# col.delete_many({})

col = db["class_resource_chunk"]
print("class_resource_chunks")
for x in col.find():
    print(x)
# col.delete_many({})

class_resources
{'_id': 'eb6d1c4a-851b-441c-829a-dd9cef2f2a2d', 'child_resource_ids': None, 'class_id': 'eb6d1c4a-851b-441c-829a-dd9cef2f2a2d', 'class_resource_chunk_ids': ['d185ded7-2858-4185-a404-14bf0733e25f', 'ec7f3b17-c632-4b8b-a5a8-9feeb3a27906', 'dff493f0-1744-47ec-8345-351a1e0afb72', '5ae13e56-a763-4c8c-87ac-a4f5ea089256', '963badc2-9fa1-4473-8c3d-98e48d2ad6cd', 'a1359d03-62de-482b-a64a-912e1cb65184', '32649c04-60ea-4962-b014-2959139b4d53', '78b00660-9589-44de-8d72-a505453c730d', 'd0f50352-606d-4434-b145-12d87d6c8428', '61dd53c4-d96c-4182-a5c7-b73eceff6bf8', '86abd55d-7af0-4e6c-b682-a366ebc95924', 'fe114406-b74e-4ef3-8d09-3ffd3bc70c06', '5156aea7-52d1-4a02-bb31-bc1b22b848d9', '849859fc-c814-4172-90fd-e87762e5f49e', 'c26e9bfb-d813-4ee0-8d82-f6b88e625d1e', 'f5aa6ff7-655c-4869-aa4f-5fdfe906244c', '94b5c536-7d55-4a4f-9fae-6c0585ecc0be', '83eb3bec-32c5-4e2d-ac76-2e77b13b3146', '19ea718d-4c4c-44d3-a9a4-35ab29c21600'], 'create_timestamp': datetime.datetime(2023, 7, 14, 22, 37, 44, 272

In [47]:
import pinecone

api_key = get_secret("dev/tai_service/pinecone_db/api_key")

pinecone.init(api_key=api_key, environment="us-east-1-aws")
indexes = pinecone.list_indexes()
print(indexes)
for index in indexes:
    print(pinecone.describe_index(index))
    index: pinecone.Index = pinecone.Index(index)
    print(index.describe_index_stats())
    namespaces = index.describe_index_stats()["namespaces"]
    # for namespace in namespaces:
    #     index.delete(delete_all=True, namespace=namespace)

[32m2023-07-14 22:38:17.176[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_secret[0m:[36m13[0m - [1mRetrieving secret dev/tai_service/pinecone_db/api_key[0m
[32m2023-07-14 22:38:17.225[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_secret[0m:[36m28[0m - [1mdev/tai_service/pinecone_db/api_key[0m


['tai-index']
IndexDescription(name='tai-index', metric='dotproduct', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='s1.x1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'eb6d1c4a-851b-441c-829a-dd9cef2f2a2d': {'vector_count': 19}},
 'total_vector_count': 19}
