### Geospatial data with Cosmos DB

In [66]:
from azure.cosmos import CosmosClient, PartitionKey
from configparser import ConfigParser
from faker import Faker

import os
import json
import uuid

parser = ConfigParser()
parser.read('../NotebookConfig.cfg')

cosmosAccountURI = parser.get('CosmosDB', 'COSMOSDB_ACCOUNT_URI')
cosmosAccountKey = parser.get('CosmosDB', 'COSMOSDB_ACCOUNT_KEY')

databaseName = 'Learn'
containerName = 'GeoJSON2'
partitionKeypath = '/PartitionKey'
osPath = './OutputFiles/'

Faker.seed(0)
fake = Faker(['en-US'])

if not os.path.exists(osPath):
    os.mkdir(osPath)

In [67]:
client = CosmosClient(cosmosAccountURI, cosmosAccountKey)
db = client.create_database_if_not_exists(databaseName)

pkPath = PartitionKey(path=partitionKeypath)
ctr = db.create_container_if_not_exists(id=containerName, partition_key=pkPath, offer_throughput=400)

In [35]:
from collections import OrderedDict
maxRange = 10000
IOTSources = []

os.makedirs(os.path.dirname('./OutputFiles/'), exist_ok=True)
with open('./OutputFiles/' + containerName + '_referenceData.json', 'w') as jsonFile:
    for i in range(maxRange):
        entity = {            
            'Name': fake.bothify('????_############')
            , 'Type': fake.random_element(elements=('Type1', 'Type2', 'Type3', 'Type4', 'Type5', 'Type6'))
        }
        IOTSources.append(entity)

        # Save patients for reference
        json.dump(entity, jsonFile)
        if (i < maxRange):
            jsonFile.write(',\n')

In [None]:
from datetime import datetime, timedelta

RUCharges = []
daysRange = 10
iotRange = 10000

dtBase = datetime(year=2022, month=12, day=1)

for iot in range(iotRange):
    IOTSrc = IOTSources[iot]

    for day in range(daysRange):
        docs = []        
        readings = []

        # *** Produce 10 readings - 1 for each minute
        for m in range(1):
            readings.append(
                {
                    'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
                    , 'Metric': fake.random_number(digits=5)
                    , 'Timestamp': (dtBase + timedelta(days=day,minutes=m)).isoformat()  # fake.date_time_this_year().isoformat()
                })

        doc = {
            'id': str(uuid.uuid4())
            , 'PartitionKey': IOTSrc['Name'] + '_' + IOTSrc['Type'] + '_' + (dtBase + timedelta(days=day)).strftime('%Y_%m_%d')
            , 'Name': IOTSrc['Name']
            , 'Type': IOTSrc['Type']            
            , 'Entity': IOTSrc
            , 'Location': {
               "type":"Point",
                "coordinates":[ float(fake.numerify(text='40.#####')), float(fake.numerify(text='-74.#####')) ]     
            }
            , 'DateTime': (dtBase + timedelta(days=day)).isoformat()
            , 'Timestamp': (dtBase + timedelta(days=day)).timestamp()
            , 'Readings': readings
            , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
        }

        ctr.create_item(doc)
        RUCharges.append(float(ctr.client_connection.last_response_headers['x-ms-request-charge']))
        print(ctr.client_connection.last_response_headers['x-ms-request-charge'])

In [None]:
print('Average RU cost: ' + str(sum(RUCharges) / len(RUCharges)))

In [52]:
IOTSrc = IOTSources[fake.random_int(min=0, max=9999)]
dtBase = datetime(year=2022, month=12, day=15)
docs = []        
readings = []

doc = {
    'id': str(uuid.uuid4())
    , 'PartitionKey': IOTSrc['Name'] + '_' + IOTSrc['Type'] + '_' + (dtBase + timedelta(days=day)).strftime('%Y_%m_%d')
    , 'Name': IOTSrc['Name']
    , 'Type': IOTSrc['Type']            
    , 'Entity': IOTSrc
    , 'Location': {
        "type":"Point",
        "coordinates":[ float(fake.numerify(text='40.80###')), float(fake.numerify(text='-73.96500')) ]     
    }
    , 'DateTime': (dtBase + timedelta(days=day)).isoformat()
    , 'Timestamp': (dtBase + timedelta(days=day)).timestamp()
    , 'Readings': {
            'Dimension': fake.random_element(elements=('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9'))
            , 'Metric': fake.random_number(digits=5)
            , 'Timestamp': fake.date_time_this_year().isoformat()
    }
    , 'class': fake.random_element(elements=OrderedDict([("A", 0.40), ("B", 0.35), ("C", 0.15), ("D", 0.05), ("E", 0.05)]))
}

print(doc)
ctr.create_item(doc)
print(ctr.client_connection.last_response_headers['x-ms-request-charge'])


{'id': '2580d3d3-0b43-4493-95e0-19e777b5158d', 'PartitionKey': 'rlFs_200059313956_Type2_2022_12_24', 'Name': 'rlFs_200059313956', 'Type': 'Type2', 'Entity': {'Name': 'rlFs_200059313956', 'Type': 'Type2'}, 'Location': {'type': 'Point', 'coordinates': [40.80278, -73.965]}, 'DateTime': '2022-12-24T00:00:00', 'Timestamp': 1671861600.0, 'Readings': {'Dimension': 'D3', 'Metric': 61026, 'Timestamp': '2022-09-13T13:26:29'}, 'class': 'B'}
12.38


In [None]:
# SELECT ST_ISVALIDDETAILED ({
#     'type':'Polygon',
#     'coordinates': [[[40.808023, -73.966375], [40.806159, -73.961795], [40.799321, -73.966796], [40.801040, -73.970726], [40.808023, -73.966375]]]
# })

# SELECT COUNT(c.id) FROM c

In [60]:
for item in ctr.query_items(query="SELECT c.id, c.location FROM c WHERE ST_WITHIN(c.Location, { 'type':'Polygon', 'coordinates': [[[40.808023, -73.966375], [40.806159, -73.961795], [40.799321, -73.966796], [40.801040, -73.970726], [40.808023, -73.966375]]]})" \
    , enable_cross_partition_query=True):
    print(item)

print ('RUs: ' + ctr.client_connection.last_response_headers['x-ms-request-charge'])
print ('ActivityId: ' + ctr.client_connection.last_response_headers['x-ms-activity-id'])

{'id': 'a9c24048-8541-4289-872a-f69c4fe7d52b'}
{'id': '2580d3d3-0b43-4493-95e0-19e777b5158d'}
RUs: 858.31
ActivityId: 2fb066b7-6573-402f-a510-c9e41593dd93


In [64]:
# GeoSpatial indexing

indexPolicy = {
    "indexingMode":"consistent",
    "includedPaths":[
        {"path":"/*"}
        ]
    , "spatialIndexes": [
        {
            "path": "/Location/*",
            "types": [
                "Point"
            ]
        }
    ]
    , "excludedPaths":[{"path": "/\"_etag\"/?"}]
}

db.replace_container(containerName, pkPath, indexing_policy=indexPolicy)

<ContainerProxy [dbs/Learn/colls/GeoJSON2]>

In [65]:
for item in ctr.query_items(query="SELECT c.id, c.location FROM c WHERE ST_WITHIN(c.Location, { 'type':'Polygon', 'coordinates': [[[40.808023, -73.966375], [40.806159, -73.961795], [40.799321, -73.966796], [40.801040, -73.970726], [40.808023, -73.966375]]]})" \
    , enable_cross_partition_query=True):
    print(item)

print ('RUs: ' + ctr.client_connection.last_response_headers['x-ms-request-charge'])
print ('ActivityId: ' + ctr.client_connection.last_response_headers['x-ms-activity-id'])

{'id': 'a9c24048-8541-4289-872a-f69c4fe7d52b'}
{'id': '2580d3d3-0b43-4493-95e0-19e777b5158d'}
RUs: 7.51
ActivityId: 2c19496b-a992-4c1e-9379-99788b0fec5a


In [62]:
# Restore default indexing policy without GeoSpatial indexing

indexPolicy = {
    "indexingMode":"consistent",
    "includedPaths":[
        {"path":"/*"}
        ]
    , "excludedPaths":[{"path": "/\"_etag\"/?"}]
}

db.replace_container(containerName, pkPath, indexing_policy=indexPolicy)

<ContainerProxy [dbs/Learn/colls/GeoJSON2]>