In [None]:
import csv
import os
import random
import time
from datetime import datetime
from azure.cosmos import CosmosClient        # pip install azure-cosmos

# 1. Connect to Azure Cosmos DB

In [None]:
client    = CosmosClient('AZURE_COSMOS_DB_ENDPOINT', 'AZURE_COSMOS_DB_ACCOUNT_KEY')
database  = client.get_database_client('AZURE_COSMOS_DB_DATABASE_NAME')
container = database.get_container_client('AZURE_COSMOS_DB_CONTAINER_NAME')

# 2. Retrieve all currently available serialnumbers in Azure Cosmos DB

In [None]:
# Define a list to store all retrieved serial numbers from the database
serialnumbers = []

# Retrieve all distinct serial numbers from the database
query = 'SELECT DISTINCT c.SerialNumber FROM c'
datasets = container.query_items(query=query, enable_cross_partition_query=True)

# Store the retrieved serial numbers in the list
for dataset in datasets:
    serialnumbers.append(dataset["SerialNumber"])

# 3. Run the operation

In [None]:
# Define a list to store the operation durations for each dataset that is inserted
query_durations = []

# Number of processed datasets is 1 because only one dataset is affected by the operation in this use case
number_of_processed_datasets = 1

# Currently available datasets in database (e.g. 10.000)
database_record_count = 10000

In [None]:
# Run this use case 10 times per iteration
for _ in range(10):

    sql_query = f'SELECT * FROM c WHERE c.SerialNumber = "{random.choice(serialnumbers)}"'

    # Record the current timestamp before running the operation  
    query_start_time = time.time()
    
    # Run CRUD-Operation
    documents = list(
        container.query_items(
            query = sql_query, 
            enable_cross_partition_query = True))
    
    # Record the current timestamp after running the operation
    query_end_time = time.time()

    # Calculate the duration time for this operation
    query_duration = query_end_time - query_start_time
    query_durations.append(query_duration)

# 4. Saving the recorded operation times in the CSV result file

In [None]:
# Calculate the average duration of all operations in this iteration
mean_duration = sum(query_durations) / len(query_durations)

# Define the dataset to store
dataset_to_store = [[
    mean_duration,                # Average duration of operations in this iteration
    number_of_processed_datasets, # Number of processed datasets (1 in this case, since 10,000 datasets are inserted sequentially)
    database_record_count         # Number of datasets in the database after inserting 10,000 datasets
]]

# Store values in the CSV result file
filepath = os.path.join("Experiment_Results", "select_to_serialnumber.csv")
file_exists = os.path.isfile(filepath)

with open(filepath, 'a', newline='') as csvfile:
    writer = csv.writer(csvfile)

    # Write header if the file does not exist
    if not file_exists:
        writer.writerow(['DurationTime', 'NumberOfProcessedDatasets', 'NumberOfDatasetsInDatabase'])
    
    # Append the dataset to the CSV file
    writer.writerows(dataset_to_store)