In [None]:
import csv
import os
import random
import time
from datetime import datetime
from azure.cosmos import CosmosClient        # pip install azure-cosmos

# 1. Connect to Azure Cosmos DB

In [None]:
client    = CosmosClient('AZURE_COSMOS_DB_ENDPOINT', 'AZURE_COSMOS_DB_ACCOUNT_KEY')
database  = client.get_database_client('AZURE_COSMOS_DB_DATABASE_NAME')
container = database.get_container_client('AZURE_COSMOS_DB_CONTAINER_NAME')

# 2. Retrieve number of affected datasets by the actual query

In [None]:
sql_query = """
SELECT VALUE COUNT(1) FROM (
    SELECT DISTINCT c.SerialNumber FROM c
    WHERE ARRAY_LENGTH(c.InspectionsAndResults) > 0
    AND c.MachineName = "InspectionMachine1"
    AND EXISTS(
        SELECT VALUE i FROM i IN c.InspectionsAndResults 
        WHERE i.InspectionSetpoints["MachineSetPointGroupForMountainbike.AnyMachineSetpoint1"] != null
        OR i.InspectionSetpoints["MachineSetPointGroupForMountainbike.AnyMachineSetpoint2"] != null
        OR i.InspectionSetpoints["MachineSetPointGroupForMountainbike.AnyMachineSetpoint3"] != null
        OR i.InspectionSetpoints["MachineSetPointGroupForMountainbike.AnyMachineSetpoint4"] != null
        OR i.InspectionSetpoints["MachineSetPointGroupForMountainbike.AnyMachineSetpoint5"] != null
    )
) AS NumberOfDistinctSerialNumbers
"""

# Save the number of dataset affected by the actual use case operation
number_of_processed_datasets = list(container.query_items(query=sql_query, enable_cross_partition_query=True))[0]

# 3. Run the operation

In [None]:
# Define a list to store the operation durations for each dataset that is inserted
query_durations = []

# Currently available datasets in database (e.g. 10.000)
database_record_count = 10000

In [None]:
# Run this use case 10 times per iteration
for _ in range(10):

    sql_query = """
    SELECT * FROM c
    WHERE ARRAY_LENGTH(c.InspectionsAndResults) > 0
    AND c.MachineName = "InspectionMachine1"
    AND EXISTS(
        SELECT VALUE i FROM i IN c.InspectionsAndResults 
        WHERE i.InspectionSetpoints["MachineSetPointGroupForMountainbike.AnyMachineSetpoint1"] != null
        OR i.InspectionSetpoints["MachineSetPointGroupForMountainbike.AnyMachineSetpoint2"] != null
        OR i.InspectionSetpoints["MachineSetPointGroupForMountainbike.AnyMachineSetpoint3"] != null
        OR i.InspectionSetpoints["MachineSetPointGroupForMountainbike.AnyMachineSetpoint4"] != null
        OR i.InspectionSetpoints["MachineSetPointGroupForMountainbike.AnyMachineSetpoint5"] != null
    )
    """

    # Record the current timestamp before running the operation  
    query_start_time = time.time()
    
    # Run CRUD-operation
    documents = list(
        container.query_items(
            query = sql_query, 
            enable_cross_partition_query = True))
    
    # Record the current timestamp after running the operation
    query_end_time = time.time()

    # Calculate the duration time for this operation
    query_duration = query_end_time - query_start_time
    query_durations.append(query_duration)

# 4. Saving the recorded operation times in the CSV result file

In [None]:
# Calculate the average duration of all duration times of this iteration
mean_duration = sum(query_durations) / len(query_durations)

# Define the dataset to store
dataset_to_store = [[
    mean_duration,                # Calculated mean duration of this iteration
    number_of_processed_datasets, # Number of processed datasets
    database_record_count         # Current number of datasets in the database
    ]]
        
# Store values in csv result file
filepath = os.path.join("Experiment_Results", "select_to_serialnumber.csv")
file_exists = os.path.isfile(filepath)

with open(filepath, 'a', newline='') as csvfile:
    writer = csv.writer(csvfile)

    if not file_exists:
        writer.writerow(['DurationTime', 'NumberOfProcessedDatasets', 'NumberOfDatasetsInDatabase'])
    writer.writerows(dataset_to_store)