In [None]:
import csv
import os
import random
import time
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from azure.cosmos import CosmosClient        # pip install azure-cosmos

# 1. Connect to Azure Cosmos DB

In [None]:
client    = CosmosClient('AZURE_COSMOS_DB_ENDPOINT', 'AZURE_COSMOS_DB_ACCOUNT_KEY')
database  = client.get_database_client('AZURE_COSMOS_DB_DATABASE_NAME')
container = database.get_container_client('AZURE_COSMOS_DB_CONTAINER_NAME')

# 2. Retrieve number of affected datasets by the actual CRUD-Operation

In [None]:
sql_query = """
SELECT VALUE COUNT(1)
FROM (
    SELECT DISTINCT c.SerialNumber
    FROM c
    WHERE c.MachineName = 'InspectionMachine1'
) AS NumberOfDistinctSerialNumbers
"""

# Save the number of dataset affected by the actual use case operation
number_of_processed_datasets = list(container.query_items(query=sql_query, enable_cross_partition_query=True))[0]

# 3. Run the operation

In [None]:
# Define a list to store the operation durations for each dataset that is inserted
query_durations = []

# Currently available datasets in database (e.g. 10.000)
database_record_count = 10000

In [None]:
# Run this use case 10 times per iteration
for _ in range(10):

    # Record the current timestamp before starting the operation  
    query_start_time = time.time()

    # Define the SQL query to retrieve the ID and partition key of the datasets to be updated
    sql_query = "SELECT c.id, c.ArticleName FROM c WHERE c.MachineName = 'InspectionMachine1'"

    # Retrieve the datasets matching the query
    document_records = list(container.query_items(
        query=sql_query,
        enable_cross_partition_query=True
    ))

    # Define a function to update a single dataset
    def update_document(item):
        # Update the 'UpdateDateTime' attribute of the dataset
        container.patch_item(
            item=item['id'],
            partition_key=item['ArticleName'],
            patch_operations=[
                {
                    'op': 'replace',
                    'path': '/UpdateDateTime',
                    'value': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                }
            ]
        )

    # Use ThreadPoolExecutor to perform parallel updates, which is more efficient than sequential updates
    with ThreadPoolExecutor(max_workers=10) as executor:
        executor.map(update_document, document_records)
    
    # Record the current timestamp after completing the operation
    query_end_time = time.time()

    # Calculate the duration of the operation
    query_duration = query_end_time - query_start_time
    query_durations.append(query_duration)

# 4. Saving the recorded operation times in the CSV result file

In [None]:
# Calculate the average duration of all operations in this iteration
mean_duration = sum(query_durations) / len(query_durations)

# Define the dataset to store
dataset_to_store = [[
    mean_duration,                # Average duration of operations in this iteration
    number_of_processed_datasets, # Number of processed datasets
    database_record_count         # Current number of datasets in the database
]]

# Store values in the CSV result file
filepath = os.path.join("Experiment_Results", "select_to_serialnumber.csv")
file_exists = os.path.isfile(filepath)

with open(filepath, 'a', newline='') as csvfile:
    writer = csv.writer(csvfile)

    # Write header if the file does not exist
    if not file_exists:
        writer.writerow(['DurationTime', 'NumberOfProcessedDatasets', 'NumberOfDatasetsInDatabase'])
    
    # Append the dataset to the CSV file
    writer.writerows(dataset_to_store)