In [None]:
import csv
import json
import os
import time
import random
import pyodbc    # pip install pyodbc

# 1. Connect to Azure SQL Database

In [None]:
# Define database connection details
server   = 'AZURE_SQL_DATABASE_SERVER_NAME'
database = 'AZURE_SQL_DATABASE_DATABASE_NAME'
username = 'AZURE_SQL_DATABASE_USERNAME'
password = 'AZURE_SQL_DATABASE_PASSWORD'
driver   = '{ODBC Driver 17 for SQL Server}'

# Connect to Azure SQL Database
connection = pyodbc.connect(
    f'DRIVER={driver};SERVER={server};PORT=1433;DATABASE={database};UID={username};PWD={password};'
)

# Create a cursor
cursor = connection.cursor()

# 2. Read csv sequence file to insert data in a specific sequence

In [None]:
# The variables are used to specify the range of dataset lines to be read from the CSV sequence file.
# For this use case, the first 10,000 lines are read.
sequence_range_start = 0
sequence_range_end   = 10000

# Define lists to store values from the sequence file within the specified range.
order_numbers, serial_numbers, article_names, machine_names = [], [], [], []

# Read the values from the CSV sequence file
with open('sequence_of_inserting_data.csv', mode='r') as file:
    reader = csv.reader(file)
    next(reader)  # Skip the header row

    # Skip lines before the start of the desired range
    for _ in range(sequence_range_start):
        next(reader)

    # Read lines within the specified range
    for _ in range(sequence_range_end - sequence_range_start):
        row = next(reader)
        order_numbers.append(row[0])
        serial_numbers.append(row[1])
        article_names.append(row[2])
        machine_names.append(row[3])

# Combine the individual lists into a single iterable for easier processing
sequence_of_inserting_data = zip(order_numbers, serial_numbers, article_names, machine_names)

# 3. Prepare the data set to be inserted and insert it into Azure SQL Database

In [None]:
# Define a list to store the operation durations for each dataset that is inserted
query_durations = []

# Number of processed datasets is 1 because only one dataset is affected by the operation in this use case
number_of_processed_datasets = 1

# Number of datasets in the database (e.g., 10,000 after the first iteration)
database_record_count = 10000

In [None]:
for order_number, serial_number, article_name, machine_name in sequence_of_inserting_data:
    
    # Prepare the dataset
    
    # Read the reference dataset for the article
    reference_dataset_file_path = os.path.join(
        'Reference_Datasets', 
        f"reference_dataset_{article_name}.json"
    )
    
    with open(reference_dataset_file_path, 'r') as file:
        json_data = json.load(file)

    # Update JSON data with values from the sequence file
    json_data.update({
        'OrderNumber': order_number,     # Set the order number from the sequence file
        'SerialNumber': serial_number,   # Set the serial number from the sequence file
        'MachineName': machine_name,     # Set the machine name from the sequence file
    })

    # Generate a measured value for each inspection step
    for inspection in json_data['InspectionsAndResults']:
        lower_border_value = float(inspection['InspectionLowerBorderValue'])
        upper_border_value = float(inspection['InspectionUpperBorderValue'])

        # Generate a random measured value within the specified range
        measured_value = str(round(random.uniform(lower_border_value, upper_border_value), 2))
        inspection['InspectionResultMeasuredValue'] = measured_value

    # Run the insert operation

    # Convert data to a JSON string
    json_data_as_string = json.dumps(json_data)

    # Record the start time of the operation
    query_start_time = time.time()

    # Execute the stored procedure to insert the dataset into Azure SQL Database
    cursor.execute("{CALL SP_InsertInspectionOperation('" + json_data_as_string + "')}")

    # Record the end time of the operation
    query_end_time = time.time()

    # Calculate the duration of the operation
    query_duration = query_end_time - query_start_time
    query_durations.append(query_duration)

# 4. Saving the recorded operation times in the CSV result file

In [None]:
# Calculate the average duration of all operations in this iteration
mean_duration = sum(query_durations) / len(query_durations)

# Define the dataset to store
dataset_to_store = [[
    mean_duration,                # Average duration of operations in this iteration
    number_of_processed_datasets, # Number of processed datasets (1 in this case, since 10,000 datasets are inserted sequentially)
    database_record_count         # Number of datasets in the database after inserting 10,000 datasets
]]

# Store values in the CSV result file
filepath = os.path.join("Experiment_Results", "select_to_serialnumber.csv")
file_exists = os.path.isfile(filepath)

with open(filepath, 'a', newline='') as csvfile:
    writer = csv.writer(csvfile)

    # Write header if the file does not exist
    if not file_exists:
        writer.writerow(['DurationTime', 'NumberOfProcessedDatasets', 'NumberOfDatasetsInDatabase'])
    
    # Append the dataset to the CSV file
    writer.writerows(dataset_to_store)