In [1]:
import random
from collections import defaultdict

In [2]:
##### SETTINGS FOR THE GENERATOR #####

# Files to write to.
data_file = "../data/PrometheusDataFirst.csv"
results_file = "../data/PrometheusAnswersFirst.csv"

# Number of records (lines in csv file) to generate
num_records = 1000

# Range of values to generate.
value_lower_limit = -50
value_upper_limit = 50

# List of all available instruments that the Prometheus Remote Write Exporter for Cortex supports.
# instruments = ["ictr", "iudctr", "ivrec", "isobs", "iudobs", "ivobs", "fctr", "fudctr", "fvrec", "fsobs", "fudobs", "fvobs"]
instruments = ["isobs", "iudobs", "ivobs", "fsobs", "fudobs", "fvobs"]

# List of instruments that use a histogram aggregation.
histogram_instruments = ["ivrec", "ivobs", "fvrec", "fvobs"]

# List of instruments that use a sum aggregation.
sum_instruments = ["ictr", "fctr", "iudctr", "fudctr", "isobs", "fsobs", "iudobs", "fudobs"]

# Histogram boundaries for buckets. Buckets in the Go SDK include "lower" buckets -- For example, if values are from 0 to 1
# and there is a boundary at 0.5, the buckets would be (-int, 0.5) and (-inf, +inf) instead of (-inf, 0.5), [0.5, +inf]
histogram_boundaries = [-25, 0, 25]

# A 2D dictionary of answers. Rows represent instrument types and columns hold instrument strings (name, description, label).
# Each individual dictionary element is a list of 6 elements, which represent sum, count, (-inf, -25) bucket, (-inf, 0) bucket,
# (-inf, 25) bucket, and the (-inf, +inf) bucket.
answers = defaultdict(lambda: defaultdict(lambda: [0, 0, 0, 0, 0, 0]))

In [3]:
##### GENERATING DATA FILE #####
f = open(data_file, "w")

for i in range(num_records):
    # Randomly select an instrument.
    instr = random.choice(instruments)
    
    # Counters and SumObservers are additive monotonic, so they must add / observe positive values.
    val = random.randint(value_lower_limit, value_upper_limit)
    if instr in ["ictr", "fctr", "isobs", "fsobs"]:
        val = abs(val)
    
    # Create a unique name string that contains the instrument name, an index number, and the aggregation type.
    # ValueRecorders and ValueObservers use the histogram aggregation. Counters, UpDownCounters, SumObservers,
    # and UpDownSumObservers use a sum aggregation.
    name = ""
    if instr in histogram_instruments:
        name = f"p1name{i}_hist"
    elif instr in sum_instruments:
        name = f"p1name{i}_sum"

    # Create a unique description string.
    description = f"description{i}"

    # Create a unique label string.
    label = f"key{i},value{i}"

    # Create a unique record for an instrument and write it to the opened file along with the instrument name and value.
    # The output is formatted to make it easier to read.
    instrument_properties = f"\"{name},{description},{label}\""
    instrument_record = f"{instr},{val},{instrument_properties}"
    f.write(instrument_record + "\n")
    

    ############# RECORDING EXPECTED RESULTS #############
    # For usage in the `answers` defaultdict.
    # Formatted instrument label and properties for use in the answers dictionary.
    formatted_label = f"{{key{i}:value{i}}}"
    formatted_properties = f"{name},{formatted_label}"

    # Histogram aggregations.
    if instr in histogram_instruments:
        # Sum.
        answers[instr][formatted_properties][0] += val

        # Count.
        answers[instr][formatted_properties][1] += 1

        # (-inf, -25) bucket.
        answers[instr][formatted_properties][2] += 1 if val < histogram_boundaries[0] else 0

        # (-inf, 0) bucket
        answers[instr][formatted_properties][3] += 1 if val < histogram_boundaries[1] else 0

        # (-inf, 25) bucket
        answers[instr][formatted_properties][4] += 1 if val < histogram_boundaries[2] else 0

        # (-inf, +inf) bucket
        answers[instr][formatted_properties][5] += 1

    # Sum aggregations.
    elif instr in sum_instruments:
        # Sum. Sum aggregated instruments only track the cumulative sum.
        answers[instr][formatted_properties][0] += val

# Close the file when finished.
f.close()


In [4]:
##### GENERATING ANSWER FILE #####
f = open(results_file, 'w+')

# Iterate through every record in the answer dictionary. Note that order is not constant in a dictionary so the csv
# file may not be in order index wise.
for instrument in answers:
    for instrument_properties in answers[instrument]:
        # Retrieve the stored answer values.
        sum = answers[instrument][instrument_properties][0]
        count = answers[instrument][instrument_properties][1]
        bucket_0 = answers[instrument][instrument_properties][2]
        bucket_1 = answers[instrument][instrument_properties][3]
        bucket_2 = answers[instrument][instrument_properties][4]
        bucket_3 = answers[instrument][instrument_properties][5]

        # Prepare a record (row in csv file) that will be written to the csv file.
        record = ""

        # Write a histogram result to the answers csv file. The output is formatted.
        if instrument in histogram_instruments:
            # String that contains counts of each bucket.
            bucket_str = f"{{{bucket_0},{bucket_1},{bucket_2},{bucket_3}}}"            
            record = f"{instrument_properties}|{'hist'}|{sum}|{count}|{bucket_str}"
        
        # Write a sum result to the answers csv file. The output is formatted.
        elif instrument in sum_instruments:
            record = f"{instrument_properties}|{'sum'}|{sum}"

        # Write the full record to the csv file. 
        f.write(record + "\n")

# Save the records so it can be sorted later.
f.seek(0)
data = f.readlines()
    
# Close the file after writing.
f.close()

# Define custom key function so records are sorted based on its index.
def record_index(record):
    # Record always starts with "p1name<index>_". The index is retrieved using substrings.
    underscore_index = record.index('_')
    record_name = record[:underscore_index]
    return int(record_name[6:])

# Sort the data and then write it back to the file.
data = sorted(data, key=record_index)
f = open(results_file, 'w')
for record in data:
    f.write(record)

# Close the file after writing.
f.close()