In [1]:
import json
from collections import defaultdict
from functools import reduce

with open("../docker/sources.json", 'r') as input_file:
    json_data = json.load(input_file)

total_bytes = defaultdict(int)

data_size = {
    None: 4,
    'int8': 1,
    'uint8': 2,
    'int16': 2,
    'uint16': 2,
    'int32': 4,
    'uint32': 4,
    'int64': 8,
    'uint64': 8,
    'float32': 4,
    'float64': 8,
    'string': 1,
    'bool': 1
}

for device_name, channel_data in json_data.items():
    for channel in channel_data:
        total_bytes[device_name] = reduce(lambda x1, x2: x1*x2, channel["shape"], 1) * data_size[channel["type"]]

total_bytes = [(x, y) for x, y in total_bytes.items()]
total_bytes.sort(key=lambda x: (x[1], x[0]))

In [2]:
def get_pulse_stats(data):
    total = sum([x[1] for x in data])
    n_items = len(data)
    avg = total/n_items
    
    return total, n_items, avg

total_small, n_small, avg_small = get_pulse_stats([x for x in total_bytes if x[1]<4096])
total_large, n_large, avg_large = get_pulse_stats([x for x in total_bytes if 4096<=x[1]<=16384 and x[1]])
total_giga, n_giga, avg_giga = get_pulse_stats([x for x in total_bytes if x[1]>16384])

total_data = total_small + total_large + total_giga

In [3]:
def get_time_stats(avg_size):
    # avg_size is in bytes
    mb_s = avg_size * 100 / 1024 / 1024
    # Expecting 100MB partitions
    s_per_partition = 100 / mb_s
    gb_day = mb_s/1024 * 3600 * 24
    
    return mb_s, s_per_partition, gb_day

mb_s_small, s_per_partition_small, gb_day_small = get_time_stats(avg_small)
mb_s_large, s_per_partition_large, gb_day_large = get_time_stats(avg_large)
mb_s_giga, s_per_partition_giga, gb_day_giga = get_time_stats(avg_giga)

In [5]:
print("Average data volume / pulse / source (b) other:\t", avg_small)
print("Number of sources other:\t", n_small)
print("Daily data volume (Gb) other:\t", mb_s_small/1024 * 3600 * 24)
print("Seconds per partition other:\t", 100/mb_s_small)
print("% of DAQ other:\t", total_small/total_data * 100)
print()

print("Average data volume / pulse / source (b) LLRF:\t", avg_large)
print("Number of sources LLRF:\t", n_large)
print("Daily data volume (Gb) LLRF:\t", mb_s_large/1024 * 3600 * 24)
print("Seconds per partition LLRF:\t", 100/mb_s_large)
print("% of DAQ other:\t", total_large/total_data * 100)
print()

print("Average data volume / pulse / source (b) CAMERAS:\t", avg_giga)
print("Number of sources CAMERAS:\t", n_giga)
print("Seconds per partition CAMERAS:\t", 100/mb_s_giga)
print("Daily data volume (Gb) CAMERAS:\t", mb_s_giga/1024 * 3600 * 24)
print("% of DAQ other:\t", total_giga/total_data * 100)

Average data volume / pulse / source (b) other:	 7.9212376933895925
Number of sources other:	 711
Daily data volume (Gb) other:	 0.06373924545094937
Seconds per partition other:	 132375.2727272727
% of DAQ other:	 0.013160005099501975

Average data volume / pulse / source (b) LLRF:	 4936.205128205128
Number of sources LLRF:	 78
Daily data volume (Gb) LLRF:	 39.71980168269231
Seconds per partition LLRF:	 212.4255319148936
% of DAQ other:	 0.8996658031659532

Average data volume / pulse / source (b) CAMERAS:	 2494451.7647058824
Number of sources CAMERAS:	 17
Seconds per partition CAMERAS:	 0.42036330982075987
Daily data volume (Gb) CAMERAS:	 20071.923031526454
% of DAQ other:	 99.08717419173453
