In [355]:
import random
from collections import defaultdict

In [356]:
##### SETTINGS FOR THE GENERATOR #####
# Number of records (lines in csv file) to generate
num_records = 2000

# Number of characters the properties string (declared below) uses. This will be used for formatting the csv file.
properties_len = 37 + len(str(num_records)) * 3

# Range of values to generate.
value_lower_limit = -50
value_upper_limit = 50

# List of all available instruments that the Prometheus Remote Write Exporter for Cortex supports.
instruments = ["ictr", "iudctr", "ivrec", "isobs", "iudobs", "ivobs", "fctr", "fudctr", "fvrec", "fsobs", "fudobs", "fvobs"]

# List of instruments that use a histogram aggregation.
histogram_instruments = ["ivrec", "ivobs", "fvrec", "fvobs"]

# List of instruments that use a sum aggregation.
sum_instruments = ["ictr", "fctr", "iudctr", "fudctr", "isobs", "fsobs", "iudobs", "fudobs"]

# Histogram boundaries for buckets. Buckets in the Go SDK include "lower" buckets -- For example, if values are from 0 to 1
# and there is a boundary at 0.5, the buckets would be (-int, 0.5) and (-inf, +inf) instead of (-inf, 0.5), [0.5, +inf]
histogram_boundaries = [-25, 0, 25]

# A 2D dictionary of answers. Rows represent instrument types and columns hold instrument strings (name, description, label).
# Each individual dictionary element is a list of 6 elements, which represent sum, count, (-inf, -25) bucket, (-inf, 0) bucket,
# (-inf, 25) bucket, and the (-inf, +inf) bucket.
answers = defaultdict(lambda: defaultdict(lambda: [0, 0, 0, 0, 0, 0]))

In [357]:
##### GENERATING DATA FILE #####
f = open("PrometheusDataFirst.csv", "w")

for i in range(num_records):
    # Randomly select an instrument.
    instr = random.choice(instruments)
    
    # Counters and SumObservers are additive monotonic, so they must add / observe positive values.
    val = random.randint(value_lower_limit, value_upper_limit)
    if instr in ["ictr", "fctr", "isobs", "fsobs"]:
        val = abs(val)
    
    # Create a unique name string that contains the instrument name, an index number, and the aggregation type.
    # ValueRecorders and ValueObservers use the histogram aggregation. Counters, UpDownCounters, SumObservers,
    # and UpDownSumObservers use a sum aggregation.
    name = ""
    if instr in histogram_instruments:
        name = f"name{i}_hist"
    elif instr in sum_instruments:
        name = f"name{i}_sum"

    # Create a unique description string.
    description = f"description{i}"
    # "description" + str(i)

    # Create a unique label string.
    label = f"key{i}, value{i}"
    # label = "key" + str(i) + ", value" + str(i)

    # Create a unique record for an instrument and write it to the opened file along with the instrument name and value.
    # The output is formatted to make it easier to read.
    instrument_properties = f"\"{name},{description},{label}\""
    instrument_record = f"{instr:<6},{val:>4}, {instrument_properties}"
    f.write(instrument_record + "\n")
    

    ############# RECORDING EXPECTED RESULTS #############
    # For usage in the `answers` defaultdict.
    # Formatted instrument label and properties for use in the answers dictionary.
    formatted_label = f"{{key{i}:value{i}}}"
    formatted_properties = f"{name},{description},{formatted_label}"

    # Histogram aggregations.
    if instr in histogram_instruments:
        # Sum.
        answers[instr][formatted_properties][0] += val

        # Count.
        answers[instr][formatted_properties][1] += 1

        # (-inf, -25) bucket.
        answers[instr][formatted_properties][2] += 1 if val < histogram_boundaries[0] else 0

        # (-inf, 0) bucket
        answers[instr][formatted_properties][3] += 1 if val < histogram_boundaries[1] else 0

        # (-inf, 25) bucket
        answers[instr][formatted_properties][4] += 1 if val < histogram_boundaries[2] else 0

        # (-inf, +inf) bucket
        answers[instr][formatted_properties][5] += 1

    # Sum aggregations.
    elif instr in sum_instruments:
        # Sum. Sum aggregated instruments only track the cumulative sum.
        answers[instr][formatted_properties][0] += val

# Close the file when finished.
f.close()


In [358]:
##### GENERATING ANSWER FILE #####
f = open("PrometheusAnswersFirst.csv", 'w+')

# Iterate through every record in the answer dictionary. Note that order is not constant in a dictionary so the csv
# file may not be in order index wise.
for instrument in answers:
    for instrument_properties in answers[instrument]:
        # Retrieve the stored answer values.
        sum = answers[instrument][instrument_properties][0]
        count = answers[instrument][instrument_properties][1]
        bucket_0 = answers[instrument][instrument_properties][2]
        bucket_1 = answers[instrument][instrument_properties][3]
        bucket_2 = answers[instrument][instrument_properties][4]
        bucket_3 = answers[instrument][instrument_properties][5]

        # Prepare a record (row in csv file) that will be written to the csv file.
        record = ""

        # Write a histogram result to the answers csv file. The output is formatted.
        if instrument in histogram_instruments:
            # String that contains counts of each bucket.
            bucket_str = f"{{{bucket_0},{bucket_1},{bucket_2},{bucket_3}}}"            
            # record = f"{'histogram':<10}| {instrument_properties:<{properties_len + 1}}|{sum:>4} |{count:^3}| {bucket_str}"
            record = f"{instrument_properties:<{properties_len + 1}}|{'histogram':<10}|{sum:>4} |{count:^3}| {bucket_str}"
        
        # Write a sum result to the answers csv file. The output is formatted.
        elif instrument in sum_instruments:
            # record = f"{'sum':<10}| {instrument_properties:<{properties_len + 1}}|{sum:>4}"
            record = f"{instrument_properties:<{properties_len + 1}}|{'sum':<10}|{sum:>4}"

        # Write the full record to the csv file. 
        f.write(record + "\n")

# Save the records so it can be sorted later.
f.seek(0)
data = f.readlines()
    
# Close the file after writing.
f.close()

# Define custom key function so records are sorted based on its index.
def record_index(record):
    underscore_index = record.index('_')
    record_name = record[:underscore_index]
    print(int(record_name[4:]))
    return int(record_name[4:])

# Sort the data and then write it back to the file.
data = sorted(data, key=record_index)
f = open("PrometheusAnswersFirst.csv", 'w')
for record in data:
    f.write(record)
f.close()






0
5
7
32
35
41
44
48
50
61
70
75
93
98
121
129
147
149
150
153
167
196
220
230
242
254
292
297
307
309
314
339
355
357
362
367
383
384
397
407
459
488
494
510
535
554
567
575
577
614
626
632
639
646
659
663
671
674
684
705
707
711
723
727
777
798
818
874
890
926
949
950
961
967
988
997
1015
1029
1052
1066
1091
1092
1097
1117
1121
1125
1140
1143
1196
1206
1208
1214
1237
1240
1263
1275
1311
1312
1322
1327
1331
1337
1347
1373
1374
1409
1410
1412
1413
1415
1419
1441
1461
1509
1522
1540
1554
1569
1595
1646
1652
1659
1660
1668
1675
1683
1700
1702
1718
1727
1729
1734
1761
1765
1771
1810
1811
1821
1829
1832
1854
1861
1870
1876
1891
1904
1907
1914
1916
1923
1930
1933
1944
1948
1960
1964
1970
1982
1994
1
6
8
15
24
42
76
126
141
159
161
164
168
171
172
176
178
198
201
203
208
212
213
216
252
260
264
271
278
286
306
330
331
332
343
344
349
354
412
429
434
455
477
505
547
578
591
596
608
612
620
624
650
653
665
666
680
683
686
737
741
746
748
760
774
780
783
792
802
821
824
853
854
860
862
865
866
