In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr'

%load_ext autoreload
%autoreload 2

In [2]:
import os

from azure.cosmos.cosmos_client import CosmosClient

# Update items

## Connect to the Cosmos DB instance

`COSMOS_ENDPOINT` and `COSMOS_WRITE_KEY` need to be environment variables. 

In [4]:
# Initialize Cosmos DB client
url = os.environ['COSMOS_ENDPOINT']
key = os.environ['COSMOS_WRITE_KEY']
client = CosmosClient(url, credential=key)

database = client.get_database_client('camera-trap')
container_datasets = database.get_container_client('datasets')
container_sequences = database.get_container_client('sequences')

## Upsert an item
in the `datasets` container.

The view in the Data Explorer on Azure Portal will remain outdated for a while it seems...

When you're *updating* an existing item instead of *inserting* a new item, you need to find its `id` and include it in the `item_to_update`.

In [9]:
# dict-like object representing the item to update or insert
item_to_update = {
    "access": [
      "public"
    ],
    "comment": "Images from Bellevue, WA. All from the same location so far.",
    "location": "dansyard",
    "container": "bellevue-camera-traps",
    "container_sas_key": "?se=2021-01-01T07%3A59%3A00Z&sp=rl&sv=2018-03-28&sr=c&sig=h%2BzEOq%2BLId9cFvKM%2BjDImK%2BulJbNy2W5VVE7tJyFHNY%3D",
    "dataset_name": "bellevue_190602",
    "path_prefix": "bellevue_camera_traps.19.06.02.1320",
    "storage_account": "wildlifeblobssc",
    'id': '883725a0-2a73-38db-7a5d-6a4ac3875bef'
  }

In [10]:
%%time

container_datasets.upsert_item(item_to_update)

CPU times: user 19.7 ms, sys: 3.94 ms, total: 23.6 ms
Wall time: 314 ms


{'access': ['public'],
 'comment': 'Images from Bellevue, WA. All from the same location so far.',
 'location': 'dansyard',
 'container': 'bellevue-camera-traps',
 'container_sas_key': '?se=2021-01-01T07%3A59%3A00Z&sp=rl&sv=2018-03-28&sr=c&sig=h%2BzEOq%2BLId9cFvKM%2BjDImK%2BulJbNy2W5VVE7tJyFHNY%3D',
 'dataset_name': 'bellevue_190602',
 'path_prefix': 'bellevue_camera_traps.19.06.02.1320',
 'storage_account': 'wildlifeblobssc',
 'id': '883725a0-2a73-38db-7a5d-6a4ac3875bef',
 '_rid': 'WjB+AMAUh0USAAAAAAAAAA==',
 '_self': 'dbs/WjB+AA==/colls/WjB+AMAUh0U=/docs/WjB+AMAUh0USAAAAAAAAAA==/',
 '_etag': '"06000c2e-0000-0500-0000-5e0548b00000"',
 '_attachments': 'attachments/',
 '_ts': 1577404592}

In [11]:
%%time

query = '''SELECT * FROM datasets d'''

result_iterable = container_datasets.query_items(query=query, enable_cross_partition_query=True)

datasets = {i['dataset_name']:{k: v for k, v in i.items() if not k.startswith('_')} for i in iter(result_iterable)}

print('Length of results:', len(datasets))

Length of results: 19
CPU times: user 5.32 ms, sys: 1.82 ms, total: 7.14 ms
Wall time: 113 ms


In [22]:
len(datasets)

19