## IOT Readings - Embbed all readings or not?
Test dataset to evaluate RU cost for collocating IOT readings in a single document or to have multiple documents with N readings. </br>
Maximum number of readings: 10000

### Prepare key references for notebook
Environment variables COSMOS_ACCOUNT_URI and COSMOS_ACCOUNT_KEY should exist

In [1]:
from azure.cosmos import CosmosClient, PartitionKey
from faker import Faker

import os
import json
import uuid

cosmosAccountURI = os.environ['COSMOS_ACCOUNT_URI']
cosmosAccountKey = os.environ['COSMOS_ACCOUNT_KEY']

databaseName = 'Models'
containerName = 'IOTEnergyTelemetry'
partitionKeypath = '/PartitionKey'

Faker.seed(0)
fake = Faker(['en-US'])

print(cosmosAccountURI)

https://cosmicgbb-sql.documents.azure.com:443/


### Create DB and collection for sample data
If resources already exists, just get references for database and container.

In [5]:
client = CosmosClient(cosmosAccountURI, cosmosAccountKey)
db = client.create_database_if_not_exists(databaseName)

pkPath = PartitionKey(path=partitionKeypath)
ctr = db.create_container_if_not_exists(id=containerName, partition_key=pkPath, offer_throughput=1000) 

### Repeatable Reference lists
Contains documents that should have consistent across cell/operations. </br>
For example: Patient, IOT devices, tax payer information, ...

In [6]:
from collections import OrderedDict
maxRange = 10000
IOTSources = []

os.makedirs(os.path.dirname('./OutputFiles/'), exist_ok=True)
with open('./OutputFiles/' + containerName + '_referenceData.json', 'w') as jsonFile:
    for i in range(maxRange):
        entity = {            
            'Name': fake.bothify('????_############') ## SiteId_ResourceId
            , 'Type': fake.random_element(elements=('Type1', 'Type2', 'Type3', 'Type4', 'Type5', 'Type6'))
        }
        IOTSources.append(entity)

        # Save patients for reference
        json.dump(entity, jsonFile)
        if (i < maxRange):
            jsonFile.write(',\n')

### Load sample documents in the container
Save generated documents in 'Output' directory

In [12]:
RUCharges = []
docs = []
maxRange = 1

for j in range(maxRange):
    IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]    
    readings = []

    # *** Produce N readings
    # Execute this repeatedly by changing range from 1, 100, 1000 and 10000
    for r in range(1):        
        readings.append(
            {
                'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                , 'Value': fake.random_number(digits=5)
                , 'Timestamp': fake.date_time_this_year().isoformat()
            })

    doc = {
        'id': str(uuid.uuid4())
        , 'PartitionKey': fake.bothify('##_') + IOTSrc['Name'] + '_' + IOTSrc['Type']
        , 'Entity': IOTSrc
        , 'Readings': readings
        , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
    }

    # Create items, record charges and store docs locally (optional)
    # print(doc)
    ctr.create_item(doc)
    RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
    # print(ctr.client_connection.last_response_headers['x-ms-request-charge'])
    docs.append(doc)

with open('./OutputFiles/' + containerName + '_1docs.json', 'w') as jf:
    for d in docs:
        json.dump(d, jf)
        jf.write(',\n')

In [None]:
print('Average RU charge: ' + str(sum(RUCharges) / len(RUCharges)))

In [None]:
RUCharges = []
docs = []
maxRange = 1

for j in range(maxRange):
    IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]    
    readings = []

    # *** Produce N readings
    # Execute this repeatedly by changing range from 1, 100, 1000 and 10000
    for r in range(100):        
        readings.append(
            {
                'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                , 'Value': fake.random_number(digits=5)
                , 'Timestamp': fake.date_time_this_year().isoformat()
            })

    doc = {
        'id': str(uuid.uuid4())
        , 'PartitionKey': fake.bothify('##_') + IOTSrc['Name'] + '_' + IOTSrc['Type']
        , 'Entity': IOTSrc
        , 'Readings': readings
        , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
    }

    # Create items, record charges and store docs locally (optional)
    # print(doc)
    ctr.create_item(doc)
    RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
    # print(ctr.client_connection.last_response_headers['x-ms-request-charge'])
    docs.append(doc)

with open('./OutputFiles/' + containerName + '_100docs.json', 'w') as jf:
    for d in docs:
        json.dump(d, jf)
        jf.write(',\n')

In [None]:
print('Average RU charge: ' + str(sum(RUCharges) / len(RUCharges)))

In [None]:
RUCharges = []
docs = []
maxRange = 1

for j in range(maxRange):
    IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]    
    readings = []

    # *** Produce N readings
    # Execute this repeatedly by changing range from 1, 100, 1000 and 10000
    for r in range(1000):        
        readings.append(
            {
                'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                , 'Value': fake.random_number(digits=5)
                , 'Timestamp': fake.date_time_this_year().isoformat()
            })

    doc = {
        'id': str(uuid.uuid4())
        , 'PartitionKey': fake.bothify('##_') + IOTSrc['Name'] + '_' + IOTSrc['Type']
        , 'Entity': IOTSrc
        , 'Readings': readings
        , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
    }

    # Create items, record charges and store docs locally (optional)
    # print(doc)
    ctr.create_item(doc)
    RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
    # print(ctr.client_connection.last_response_headers['x-ms-request-charge'])
    docs.append(doc)

with open('./OutputFiles/' + containerName + '_1000docs.json', 'w') as jf:
    for d in docs:
        json.dump(d, jf)
        jf.write(',\n')

In [None]:
print('Average RU charge: ' + str(sum(RUCharges) / len(RUCharges)))

In [None]:
RUCharges = []
docs = []
maxRange = 1

for j in range(maxRange):
    IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]    
    readings = []

    # *** Produce N readings
    # Execute this repeatedly by changing range from 1, 100, 1000 and 10000
    for r in range(10000):        
        readings.append(
            {
                'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                , 'Value': fake.random_number(digits=5)
                , 'Timestamp': fake.date_time_this_year().isoformat()
            })

    doc = {
        'id': str(uuid.uuid4())
        , 'PartitionKey': fake.bothify('##_') + IOTSrc['Name'] + '_' + IOTSrc['Type']
        , 'Entity': IOTSrc
        , 'Readings': readings
        , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
    }

    # Create items, record charges and store docs locally (optional)
    # print(doc)
    ctr.create_item(doc)
    RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
    # print(ctr.client_connection.last_response_headers['x-ms-request-charge'])
    docs.append(doc)

with open('./OutputFiles/' + containerName + '_1000docs.json', 'w') as jf:
    for d in docs:
        json.dump(d, jf)
        jf.write(',\n')

In [13]:
print('Average RU charge: ' + str(sum(RUCharges) / len(RUCharges)))

Average RU charge: 6249.76


In [None]:
items = list(ctr.query_items(query='SELECT SUBSTRING(c.PartitionKey, 0, 23) AS PK, COUNT(c.id) AS Total FROM c GROUP BY SUBSTRING(c.PartitionKey, 0, 23) ', enable_cross_partition_query=True, max_item_count=100))

print (float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
print(items.__len__())

### Adjust index policy
The readings being stored in the container should not require indexing, as most of the operations should fetch documents based on Device and then compute the values. Therefore application will not benefit from default indexing. <br/>
Execute cell below to adjust indexPolicy and then re-execute cell to load data (adjusting number of readings)

In [19]:
# containerPath = 'dbs/'+ databaseName +'/colls/' + containerName
# container = db.get_container_client(container=containerName)

indexPolicy = {
    "indexingMode":"consistent",
    "includedPaths":[
        {"path":"/PartitionKey/?"}
        , {"path":"/Entity/*"}
        , {"path":"/Class/?"}
        , {"path":"/_ts/?"}
        ]
    , "excludedPaths":[{"path":"/*"}]
}

db.replace_container(containerName, pkPath, indexing_policy=indexPolicy)


<ContainerProxy [dbs/Models/colls/IOTEnergyTelemetry]>

### Cross partition query workload assessment
- Target 1 million readings
  - 10000 docs with 100 readings
  - 1000 docs with 1000 readings
  - 100 docs with 10000 readings

In [22]:
RUCharges = []
docs = []
maxRange = 10000
IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]

for j in range(maxRange):
    readings = []

    # *** Produce N readings
    for r in range(100):        
        readings.append(
            {
                'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                , 'Metric': fake.random_number(digits=5)
                , 'Timestamp': fake.date_time_this_year().isoformat()
            })

    doc = {
        'id': str(uuid.uuid4())
        # , 'PartitionKey': fake.bothify('##_') + IOTSrc['Name'] + '_' + IOTSrc['Type']
        , 'PartitionKey': IOTSrc['Name'] + '_' + IOTSrc['Type']
        , 'Entity': IOTSrc
        , 'Readings': readings
        , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
    }

    # Create items, record charges and store docs locally (optional)
    # print(doc)
    ctr.create_item(doc)
    RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
    # print(ctr.client_connection.last_response_headers['x-ms-request-charge'])
    docs.append(doc)

with open('./OutputFiles/' + containerName + '_Opt1_docs.json', 'w') as jf:
    for d in docs:
        json.dump(d, jf)
        jf.write(',\n')

In [None]:
RUCharges = []
docs = []
maxRange = 1000
IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]

for j in range(maxRange):
    readings = []

    # *** Produce N readings
    for r in range(1000):        
        readings.append(
            {
                'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                , 'Metric': fake.random_number(digits=5)
                , 'Timestamp': fake.date_time_this_year().isoformat()
            })

    doc = {
        'id': str(uuid.uuid4())
        # , 'PartitionKey': fake.bothify('##_') + IOTSrc['Name'] + '_' + IOTSrc['Type']
        , 'PartitionKey': IOTSrc['Name'] + '_' + IOTSrc['Type']
        , 'Entity': IOTSrc
        , 'Readings': readings
        , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
    }

    # Create items, record charges and store docs locally (optional)
    # print(doc)
    ctr.create_item(doc)
    RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
    # print(ctr.client_connection.last_response_headers['x-ms-request-charge'])
    docs.append(doc)

with open('./OutputFiles/' + containerName + '_Opt1_docs.json', 'w') as jf:
    for d in docs:
        json.dump(d, jf)
        jf.write(',\n')

In [None]:
RUCharges = []
docs = []
maxRange = 100
IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]

for j in range(maxRange):
    readings = []

    # *** Produce N readings
    for r in range(10000):        
        readings.append(
            {
                'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                , 'Metric': fake.random_number(digits=5)
                , 'Timestamp': fake.date_time_this_year().isoformat()
            })

    doc = {
        'id': str(uuid.uuid4())
        # , 'PartitionKey': fake.bothify('##_') + IOTSrc['Name'] + '_' + IOTSrc['Type']
        , 'PartitionKey': IOTSrc['Name'] + '_' + IOTSrc['Type']
        , 'Entity': IOTSrc
        , 'Readings': readings
        , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
    }

    # Create items, record charges and store docs locally (optional)
    # print(doc)
    ctr.create_item(doc)
    RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
    # print(ctr.client_connection.last_response_headers['x-ms-request-charge'])
    docs.append(doc)

with open('./OutputFiles/' + containerName + '_Opt1_docs.json', 'w') as jf:
    for d in docs:
        json.dump(d, jf)
        jf.write(',\n')

### Clean up code

In [None]:
# containerPath = 'dbs/'+ databaseName +'/colls/' + containerName
# container = db.get_container_client(container=containerName)

indexPolicy = {
    "indexingMode":"consistent",
    "includedPaths":[
        {"path":"/*"}
        ]
    , "excludedPaths":[{"path":"/\"_etag\"/?"}]
}

db.replace_container(containerName, pkPath, indexing_policy=indexPolicy)

In [15]:
# Assume objects are instantiated
db.delete_container(containerName)