## Demonstrate RU usage with and without index optimization

### Prepare key references for notebook
Expected CosmosDBAccounts.cfg with format:<br/>
[CosmosDB]<br/>
COSMOSDB_ACCOUNT_URI: https://CosmosDBAccount.documents.azure.com:443/<br/>
COSMOSDB_ACCOUNT_KEY: CosmosDBKey<br/>

In [2]:
from azure.cosmos import CosmosClient, PartitionKey
from configparser import ConfigParser
from faker import Faker

import os
import json
import uuid

parser = ConfigParser()
parser.read('../NotebookConfig.cfg')

cosmosAccountURI = parser.get('CosmosDB', 'COSMOSDB_ACCOUNT_URI')
cosmosAccountKey = parser.get('CosmosDB', 'COSMOSDB_ACCOUNT_KEY')

databaseName = 'Learn'
containerName = 'Index'
partitionKeypath = '/PartitionKey'

Faker.seed(0)
fake = Faker(['en-US'])

print(cosmosAccountURI)

https://cosmicgbbcdb-sql.documents.azure.com:443/


In [3]:
# Database shared throughput
client = CosmosClient(cosmosAccountURI, cosmosAccountKey)
db = client.create_database_if_not_exists(databaseName, offer_throughput=4000)

pkPath = PartitionKey(path=partitionKeypath)
ctr = db.create_container_if_not_exists(id=containerName, partition_key=pkPath) 

### Repeatable References
Contains documents that should be used as a reference data set across cell/operations. </br>
For example: Patient, IOT devices, tax payer information, ...

In [5]:
from collections import OrderedDict
maxRange = 10000
IOTSources = []

os.makedirs(os.path.dirname('./OutputFiles/'), exist_ok=True)
with open('./OutputFiles/' + containerName + '_referenceData.json', 'w') as jsonFile:
    for i in range(maxRange):
        entity = {            
            'Name': fake.bothify('????_############')
            , 'Type': fake.random_element(elements=('Type1', 'Type2', 'Type3', 'Type4', 'Type5', 'Type6'))
        }
        IOTSources.append(entity)

        # Save patients for reference
        json.dump(entity, jsonFile)
        if (i < maxRange):
            jsonFile.write(',\n')

In [6]:
## Write a single document
from datetime import datetime, timedelta

RUCharges = []
daysRange = 1
iotRange = 1

dtBase = datetime(year=2023, month=1, day=1)

for iot in range(iotRange):
    IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]

    for day in range(daysRange):
        docs = []        
        readings = []

        # *** Produce 1440 readings - 1 for each minute
        for m in range(1440):
            readings.append(
                {
                    'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                    , 'Metric': fake.random_number(digits=5)
                    , 'Timestamp': (dtBase + timedelta(days=day,minutes=m)).isoformat()  # fake.date_time_this_year().isoformat()
                })

        doc = {
            'id': str(uuid.uuid4())
            , 'PartitionKey': IOTSrc['Name'] + '_' + IOTSrc['Type'] + '_' + (dtBase + timedelta(days=day)).strftime('%Y_%m_%d')
            , 'Name': IOTSrc['Name']
            , 'Type': IOTSrc['Type']
            , 'JobId': 100
            , 'Entity': IOTSrc
            , 'DateTime': (dtBase + timedelta(days=day)).isoformat()
            , 'Timestamp': (dtBase + timedelta(days=day)).timestamp()
            , 'Readings': readings
            , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
        }

        ctr.create_item(doc)
        RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
        print('RU charge: ' + ctr.client_connection.last_response_headers['x-ms-request-charge'])

        # Store docs and reset readings
        docs.append(doc)

        with open('./OutputFiles/' + containerName + '_' + IOTSrc['Name'] + '_' + IOTSrc['Type'] + '_' + (dtBase + timedelta(days=day)).strftime('%Y_%m_%d') + '_docs.json', 'w') as jf:
            for d in docs:
                json.dump(d, jf)
                #jf.write('\n')

RU charge: 876.57


### Adjust index policy
The readings being stored in the container should not require indexing, as most of the operations should fetch documents based on Device and then compute the values. Therefore application will not benefit from default indexing. <br/>
Execute cell below to adjust indexPolicy and then re-execute cell to load data (adjusting number of readings)

In [26]:
# containerPath = 'dbs/'+ databaseName +'/colls/' + containerName
# container = db.get_container_client(container=containerName)

indexPolicy = {
    "indexingMode":"consistent",
    "includedPaths":[
        {"path":"/PartitionKey/?"}
        , {"path":"/Entity/*"}
        , {"path":"/Class/?"}
        ]
    , "excludedPaths":[{"path":"/*"}]
}

db.replace_container(containerName, pkPath, indexing_policy=indexPolicy)

<ContainerProxy [dbs/Learn/colls/Index]>

In [8]:
## Write a single document
from datetime import datetime, timedelta

RUCharges = []
daysRange = 1
iotRange = 1

dtBase = datetime(year=2023, month=1, day=1)

for iot in range(iotRange):
    IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]

    for day in range(daysRange):
        docs = []        
        readings = []

        # *** Produce 1440 readings - 1 for each minute
        for m in range(1440):
            readings.append(
                {
                    'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                    , 'Metric': fake.random_number(digits=5)
                    , 'Timestamp': (dtBase + timedelta(days=day,minutes=m)).isoformat()  # fake.date_time_this_year().isoformat()
                })

        doc = {
            'id': str(uuid.uuid4())
            , 'PartitionKey': IOTSrc['Name'] + '_' + IOTSrc['Type'] + '_' + (dtBase + timedelta(days=day)).strftime('%Y_%m_%d')
            , 'Name': IOTSrc['Name']
            , 'Type': IOTSrc['Type']
            , 'JobId': 100
            , 'Entity': IOTSrc
            , 'DateTime': (dtBase + timedelta(days=day)).isoformat()
            , 'Timestamp': (dtBase + timedelta(days=day)).timestamp()
            , 'Readings': readings
            , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
        }

        ctr.create_item(doc)
        RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
        print('RU charge: ' + ctr.client_connection.last_response_headers['x-ms-request-charge'])

        # Store docs and reset readings
        docs.append(doc)

        with open('./OutputFiles/' + containerName + '_' + IOTSrc['Name'] + '_' + IOTSrc['Type'] + '_' + (dtBase + timedelta(days=day)).strftime('%Y_%m_%d') + '_docs.json', 'w') as jf:
            for d in docs:
                json.dump(d, jf)
                #jf.write('\n')

RU charge: 50.48


### Write comparison: </br>
With default indexing: 876.57 RU</br>
With optimized index: 50.48 RU</br>

In [9]:
from datetime import datetime, timedelta

RUCharges = []
daysRange = 30
iotRange = 100

dtBase = datetime(year=2023, month=1, day=1)

for iot in range(iotRange):
    IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]

    for day in range(daysRange):
        docs = []        
        readings = []

        # *** Produce 1440 readings - 1 for each minute
        for m in range(1440):
            readings.append(
                {
                    'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                    , 'Metric': fake.random_number(digits=5)
                    , 'Timestamp': (dtBase + timedelta(days=day,minutes=m)).isoformat()  # fake.date_time_this_year().isoformat()
                })

        doc = {
            'id': str(uuid.uuid4())
            , 'PartitionKey': IOTSrc['Name'] + '_' + IOTSrc['Type'] + '_' + (dtBase + timedelta(days=day)).strftime('%Y_%m_%d')
            , 'Name': IOTSrc['Name']
            , 'Type': IOTSrc['Type']
            , 'JobId': 100
            , 'Entity': IOTSrc
            , 'DateTime': (dtBase + timedelta(days=day)).isoformat()
            , 'Timestamp': (dtBase + timedelta(days=day)).timestamp()
            , 'Readings': readings
            , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
        }

        ctr.create_item(doc)
        RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
        # print('RU charge: ' + ctr.client_connection.last_response_headers['x-ms-request-charge'])

        # Store docs and reset readings
        docs.append(doc)

        # with open('./OutputFiles/' + containerName + '_' + IOTSrc['Name'] + '_' + IOTSrc['Type'] + '_' + (dtBase + timedelta(days=day)).strftime('%Y_%m_%d') + '_docs.json', 'w') as jf:
        #     for d in docs:
        #         json.dump(d, jf)

In [25]:
# Reverting to default indexing
indexPolicy = {
    "indexingMode":"consistent",
    "includedPaths":[
        {"path":"/*"}
        ]
    , "excludedPaths":[{"path":"/\"_etag\"/?"}]
}

db.replace_container(containerName, pkPath, indexing_policy=indexPolicy)

<ContainerProxy [dbs/Learn/colls/Index]>