In [29]:
import numpy as np
import re
import pprint
from collections import defaultdict
import os

In [30]:
NUM_RUNS=5
NUM_PUB=20
NUM_QOS_CONFIG=3
NUM_TOPIC_CONFIG=2
NUM_MSG=1000
NUM_BROKER=2

In [31]:
# Define the regular expression pattern for a log entry
log_pattern = re.compile(
    r"(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{4})"
    r"\[(?P<level>[A-Z]+)\],"
    r"(?P<message>.*)"
)

In [32]:
# Define the directory containing the log files
log_directories = ["subscriber/logs/config1/emqx","subscriber/logs/config2/emqx",
                  "subscriber/logs/config1/hivemq","subscriber/logs/config2/hivemq"]

In [33]:
#Store Log files 
log_files = list()

# Store parsed log entries
filtered_messages = []

# Store the number of the execution benchmark run
n_execs=dict()

# Process each directory / config
for directory in log_directories:
    
    # Process each log file
    for f in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, f)):
            
            log_file = os.path.join(directory, f)
            log_files.append(log_file)
            
            broker = os.path.basename(os.path.dirname(log_file))
            
            file_name=os.path.basename(log_files[0])
            parts=log_file.split('-')
            config=' '.join(parts[3:5])
            qos=' '.join(parts[6:7]).removesuffix('.log')
            exec_name = '-'.join(parts[1:])

            if exec_name not in n_execs:
                n_execs[exec_name] = 1
            else:
                n_execs[exec_name] = n_execs[exec_name] + 1
            
            with open(log_file, 'r') as file:
                for line in file:
                    _line=line.strip()
                    match = log_pattern.match(_line)
                    if match and (',topic/structure/' in _line): # Filter log messages that contain ',topic/structure/'
                        log_entry = match.groupdict()
                        log_entry['config'] = config
                        log_entry['qos'] = qos
                        log_entry['broker'] = broker
                        log_entry['exec'] = n_execs[exec_name]
                        filtered_messages.append(log_entry)
    n_execs=dict()
        
print(f"Number of logs: {len(log_files)}")    

Number of logs: 60


In [6]:
print(f"number of log entries: {len(filtered_messages)}")

number of log entries: 993388


In [7]:
sent_messages_per_experiment = NUM_MSG * NUM_PUB  
total_expected_messages = sent_messages_per_experiment * NUM_QOS_CONFIG * NUM_TOPIC_CONFIG * NUM_RUNS * NUM_BROKER
print(f"Total messages sent per experiment: {sent_messages_per_experiment}")
print(f"Total messages sent in the whole experiment: {total_expected_messages}")

Total messages sent per experiment: 20000
Total messages sent in the whole experiment: 1200000


In [8]:
# Filter log messages that contain ',topic/structure/'
# filtered_messages = [log for log in parsed_logs if ',topic/structure/' in log['message']]

In [9]:
print(filtered_messages[852476])

{'timestamp': '2024-08-29T12:26:36+0000', 'level': 'INFO', 'message': 'e17c2273510949317a6a783fefd129544ea8190f22fb84ffa9c215e3449d0dc6,1724934396966.6882,1724934396978.5635,topic/structure/node5,500', 'config': 'config 2', 'qos': '1', 'broker': 'hivemq', 'exec': 3}


In [10]:
# Total number of data entries
total_entries = len(filtered_messages)
print(f"Total number of data entries filtered: {total_entries}")

Total number of data entries filtered: 993388


In [11]:
lost_msgs=total_expected_messages-len(filtered_messages)
print(f"Lost messages / Percentage: {lost_msgs} / {lost_msgs/total_expected_messages*100}%")

Lost messages / Percentage: 206612 / 17.217666666666666%


In [12]:
# Creating numpy array with columns including the config and qos info
entries_list = []
for log in filtered_messages:
    message = log['message']
    config = log['config']
    qos = log['qos']
    broker = log['broker']
    exec_num = log['exec']
    parts = message.split(',')
    if len(parts) >= 5:
        row = [
            parts[0],                             # Column 1 (index 0) UUID
            (float(parts[2]) - float(parts[1])),  # Column 2 (index 1) Received Time - Send Time
            parts[3],                             # Column 4 (index 3) Topic
            int(parts[4]),                        # Column 5 (index 4) Size in bytes
            config,                               # Column 6 (index 5) Topic structure configuration
            qos,                                  # Column 7 (index 6) Subscription QoS
            broker,                               # Column 8 (index 7) MQTT Broker
            exec_num                              # Column 9 (index 8) Run #
        ]
        entries_list.append(tuple(row))
        
# Define a custom dtype for the structured array
dtype = [('uuid', 'U64'), ('latency', 'f8'), ('topic', 'U64'), ('size', 'i4'), ('config', 'U64'), ('qos', 'i4'),('broker', 'U64'), ('exec','i4')]

# Convert to NumPy structured array
entries_np_array = np.array(entries_list, dtype=dtype)

In [13]:
print(entries_np_array[600000])

('d456319c373b77195a173575053e6adcc0a266b1423d64de53c8bb268638cc75', 14.186279296875, 'topic/structure/node17', 500, 'config 1', 1, 'hivemq', 2)


In [14]:
# Check for duplicated entries based on the first column after filtering
unique_ids, counts = np.unique(entries_np_array['uuid'], return_counts=True)
duplicates = dict(zip(unique_ids[counts > 1], counts[counts > 1]))
print(f"Duplicated entries based on the first column in the filtered messages: {duplicates}")

Duplicated entries based on the first column in the filtered messages: {}


In [15]:
# Creating subset by broker
emqx_set = entries_np_array[entries_np_array['broker'] == "emqx" ]
hivemq_set = entries_np_array[entries_np_array['broker'] == "hivemq"]
total_sets = len(emqx_set)+len(hivemq_set)
print(total_sets)
print(len(entries_np_array))

993388
993388


In [21]:
# Creating subset by topic configuration for EMQX
emqx_config1_qos0_set = emqx_set[(emqx_set['config'] == "config 1") & (emqx_set['qos'] == 0)]
emqx_config1_qos1_set = emqx_set[(emqx_set['config'] == "config 1") & (emqx_set['qos'] == 1)]
emqx_config1_qos2_set = emqx_set[(emqx_set['config'] == "config 1") & (emqx_set['qos'] == 2)]

print(f"Number of received messages for EMQX, config 1, QoS 0:\t{len(emqx_config1_qos0_set)}")
_,count=np.unique(emqx_config1_qos0_set['exec'], return_counts=True)
avg_msg_emqx_config1_qos0_set=round(np.mean(count))
print(f"Average of received messages for EMQX, config 1, QoS 0:\t{avg_msg_emqx_config1_qos0_set}")
print(f"Number of lost messages for EMQX, config 1, QoS 0:\t{(NUM_MSG*NUM_PUB) - avg_msg_emqx_config1_qos0_set}\n")

print(f"Number of received messages for EMQX, config 1, QoS 1:\t{len(emqx_config1_qos1_set)}")
_,count=np.unique(emqx_config1_qos1_set['exec'], return_counts=True)
avg_msg_emqx_config1_qos1_set=round(np.mean(count))
print(f"Average of received messages for EMQX, config 1, QoS 1:\t{avg_msg_emqx_config1_qos1_set}")
print(f"Number of lost messages for EMQX, config 1, QoS 1: {(NUM_MSG*NUM_PUB) - avg_msg_emqx_config1_qos1_set}\n")


print(f"Number of received messages for EMQX, config 1, QoS 2:\t{len(emqx_config1_qos2_set)}")
_,count=np.unique(emqx_config1_qos2_set['exec'], return_counts=True)
avg_msg_emqx_config1_qos2_set=round(np.mean(count))
print(f"Average received messages for EMQX, config 1, QoS 2:\t{avg_msg_emqx_config1_qos2_set}")
print(f"Number of lost messages for EMQX, config 1, QoS 2: {(NUM_MSG*NUM_PUB) - avg_msg_emqx_config1_qos2_set}\n")



emqx_config2_qos0_set = emqx_set[(emqx_set['config'] == "config 2") & (emqx_set['qos'] == 0)]
emqx_config2_qos1_set = emqx_set[(emqx_set['config'] == "config 2") & (emqx_set['qos'] == 1)]
emqx_config2_qos2_set = emqx_set[(emqx_set['config'] == "config 2") & (emqx_set['qos'] == 2)]

print(f"Number of received messages for EMQX, config 2, QoS 0: {len(emqx_config2_qos0_set)}")
_,count=np.unique(emqx_config2_qos0_set['exec'], return_counts=True)
avg_msg_emqx_config2_qos0_set=round(np.mean(count))
print(f"Average received messages for EMQX, config 2, QoS 0:\t{avg_msg_emqx_config2_qos0_set}")
print(f"Number of lost messages for EMQX, config 2, QoS 0: {(NUM_MSG*NUM_PUB) - avg_msg_emqx_config2_qos0_set}\n")

print(f"Number of received messages for EMQX, config 2, QoS 1: {len(emqx_config2_qos1_set)}")
_,count=np.unique(emqx_config2_qos1_set['exec'], return_counts=True)
avg_msg_emqx_config2_qos1_set=round(np.mean(count))
print(f"Average received messages for EMQX, config 2, QoS 1:\t{avg_msg_emqx_config2_qos1_set}")
print(f"Number of lost messages for EMQX, config 2, QoS 1: {(NUM_MSG*NUM_PUB) - avg_msg_emqx_config2_qos1_set}\n")

print(f"Number of received messages for EMQX, config 2, QoS 2: {len(emqx_config2_qos2_set)}")
_,count=np.unique(emqx_config2_qos2_set['exec'], return_counts=True)
avg_msg_emqx_config2_qos2_set=round(np.mean(count))
print(f"Average received messages for EMQX, config 2, QoS 2:\t{avg_msg_emqx_config2_qos2_set}")
print(f"Number of lost messages for EMQX, config 2, QoS 2: {(NUM_MSG*NUM_PUB) - avg_msg_emqx_config2_qos2_set}\n")

Number of received messages for EMQX, config 1, QoS 0:	73066
Average of received messages for EMQX, config 1, QoS 0:	14613
Number of lost messages for EMQX, config 1, QoS 0:	5387

Number of received messages for EMQX, config 1, QoS 1:	100000
Average of received messages for EMQX, config 1, QoS 1:	20000
Number of lost messages for EMQX, config 1, QoS 1: 0

Number of received messages for EMQX, config 1, QoS 2:	100000
Average received messages for EMQX, config 1, QoS 2:	20000
Number of lost messages for EMQX, config 1, QoS 2: 0

Number of received messages for EMQX, config 2, QoS 0: 73075
Average received messages for EMQX, config 2, QoS 0:	14615
Number of lost messages for EMQX, config 2, QoS 0: 5385

Number of received messages for EMQX, config 2, QoS 1: 100000
Average received messages for EMQX, config 2, QoS 1:	20000
Number of lost messages for EMQX, config 2, QoS 1: 0

Number of received messages for EMQX, config 2, QoS 2: 100000
Average received messages for EMQX, config 2, QoS 2:	

In [24]:
# Calculating the average of the latency column for each subset for EMQX
latency_avg_emqx_config1_qo0 = np.mean(emqx_config1_qos0_set['latency'])
latency_avg_emqx_config1_qo1 = np.mean(emqx_config1_qos1_set['latency'])
latency_avg_emqx_config1_qo2 = np.mean(emqx_config1_qos2_set['latency'])

latency_avg_emqx_config2_qo0 = np.mean(emqx_config2_qos0_set['latency'])
latency_avg_emqx_config2_qo1 = np.mean(emqx_config2_qos1_set['latency'])
latency_avg_emqx_config2_qo2 = np.mean(emqx_config2_qos2_set['latency'])

print(f"Average latency for EMQX for subset with config 1, QoS 0: {round(latency_avg_emqx_config1_qo0,2)} ms")
print(f"Average latency for EMQX for subset with config 1, QoS 1: {round(latency_avg_emqx_config1_qo1,2)} ms")
print(f"Average latency for EMQX for subset with config 1, QoS 2: {round(latency_avg_emqx_config1_qo2,2)} ms")

print(f"Average latency for EMQX for subset with config 2, QoS 0: {round(latency_avg_emqx_config2_qo0,2)} ms")
print(f"Average latency for EMQX for subset with config 2, QoS 1: {round(latency_avg_emqx_config2_qo1,2)} ms")
print(f"Average latency for EMQX for subset with config 2, QoS 2: {round(latency_avg_emqx_config2_qo2,2)} ms")

Average latency for EMQX for subset with config 1, QoS 0: 4.93 ms
Average latency for EMQX for subset with config 1, QoS 1: 5.64 ms
Average latency for EMQX for subset with config 1, QoS 2: 9.4 ms
Average latency for EMQX for subset with config 2, QoS 0: 4.82 ms
Average latency for EMQX for subset with config 2, QoS 1: 6.18 ms
Average latency for EMQX for subset with config 2, QoS 2: 9.31 ms


In [27]:
# Creating subset by topic configuration and QoS for HiveMQ
hivemq_config1_qos0_set = hivemq_set[(hivemq_set['config'] == "config 1") & (hivemq_set['qos'] == 0) ]
hivemq_config1_qos1_set = hivemq_set[(hivemq_set['config'] == "config 1") & (hivemq_set['qos'] == 1) ]
hivemq_config1_qos2_set = hivemq_set[(hivemq_set['config'] == "config 1") & (hivemq_set['qos'] == 2) ]

print(f"Number of received messages for HiveMQ, config 1, QoS 0: {len(hivemq_config1_qos0_set)}")
_,count=np.unique(hivemq_config1_qos0_set['exec'], return_counts=True)
avg_msg_hivemq_config1_qos0_set=round(np.mean(count))
print(f"Average of received messages for HiveMQ, config 1, QoS 0:\t{avg_msg_hivemq_config1_qos0_set}")
print(f"Number of lost messages for HiveMQ, config 1, QoS 0:\t{(NUM_MSG*NUM_PUB) - avg_msg_hivemq_config1_qos0_set}\n")


print(f"Number of received messages for HiveMQ, config 1, QoS 1: {len(hivemq_config1_qos1_set)}")
_,count=np.unique(hivemq_config1_qos1_set['exec'], return_counts=True)
avg_msg_hivemq_config1_qos1_set=round(np.mean(count))
print(f"Average of received messages for HiveMQ, config 1, QoS 1:\t{avg_msg_hivemq_config1_qos1_set}")
print(f"Number of lost messages for HiveMQ, config 1, QoS 1:\t{(NUM_MSG*NUM_PUB) - avg_msg_hivemq_config1_qos1_set}\n")


print(f"Number of received messages for HiveMQ, config 1, QoS 2: {len(hivemq_config1_qos2_set)}")
_,count=np.unique(hivemq_config1_qos2_set['exec'], return_counts=True)
avg_msg_hivemq_config1_qos2_set=round(np.mean(count))
print(f"Average of received messages for HiveMQ, config 1, QoS 2:\t{avg_msg_hivemq_config1_qos2_set}")
print(f"Number of lost messages for HiveMQ, config 1, QoS 2:\t{(NUM_MSG*NUM_PUB) - avg_msg_hivemq_config1_qos2_set}\n")

hivemq_config2_qos0_set = hivemq_set[(hivemq_set['config'] == "config 2") & (hivemq_set['qos'] == 0) ]
hivemq_config2_qos1_set = hivemq_set[(hivemq_set['config'] == "config 2") & (hivemq_set['qos'] == 1) ]
hivemq_config2_qos2_set = hivemq_set[(hivemq_set['config'] == "config 2") & (hivemq_set['qos'] == 2) ]

print(f"Number of received messages for HiveMQ, config 2, QoS 0: {len(hivemq_config2_qos0_set)}")
_,count=np.unique(hivemq_config2_qos0_set['exec'], return_counts=True)
avg_msg_hivemq_config2_qos0_set=round(np.mean(count))
print(f"Average of received messages for HiveMQ, config 2, QoS 0:\t{avg_msg_hivemq_config2_qos0_set}")
print(f"Number of lost messages for HiveMQ, config 2, QoS 0:\t{(NUM_MSG*NUM_PUB) - avg_msg_hivemq_config2_qos0_set}\n")

print(f"Number of received messages for HiveMQ, config 2, QoS 1: {len(hivemq_config2_qos1_set)}")
_,count=np.unique(hivemq_config2_qos1_set['exec'], return_counts=True)
avg_msg_hivemq_config2_qos1_set=round(np.mean(count))
print(f"Average of received messages for HiveMQ, config 2, QoS 1:\t{avg_msg_hivemq_config2_qos1_set}")
print(f"Number of lost messages for HiveMQ, config 2, QoS 1:\t{(NUM_MSG*NUM_PUB) - avg_msg_hivemq_config2_qos1_set}\n")

print(f"Number of received messages for HiveMQ, config 2, QoS 2: {len(hivemq_config2_qos2_set)}")
_,count=np.unique(hivemq_config2_qos2_set['exec'], return_counts=True)
avg_msg_hivemq_config2_qos2_set=round(np.mean(count))
print(f"Average of received messages for HiveMQ, config 2, QoS 2:\t{avg_msg_hivemq_config2_qos2_set}")
print(f"Number of lost messages for HiveMQ, config 2, QoS 2:\t{(NUM_MSG*NUM_PUB) - avg_msg_hivemq_config2_qos2_set}\n")

Number of received messages for HiveMQ, config 1, QoS 0: 22366
Average of received messages for HiveMQ, config 1, QoS 0:	4473
Number of lost messages for HiveMQ, config 1, QoS 0:	15527

Number of received messages for HiveMQ, config 1, QoS 1: 100000
Average of received messages for HiveMQ, config 1, QoS 1:	20000
Number of lost messages for HiveMQ, config 1, QoS 1:	0

Number of received messages for HiveMQ, config 1, QoS 2: 100000
Average of received messages for HiveMQ, config 1, QoS 2:	20000
Number of lost messages for HiveMQ, config 1, QoS 2:	0

Number of received messages for HiveMQ, config 2, QoS 0: 24881
Average of received messages for HiveMQ, config 2, QoS 0:	4976
Number of lost messages for HiveMQ, config 2, QoS 0:	15024

Number of received messages for HiveMQ, config 2, QoS 1: 100000
Average of received messages for HiveMQ, config 2, QoS 1:	20000
Number of lost messages for HiveMQ, config 2, QoS 1:	0

Number of received messages for HiveMQ, config 2, QoS 2: 100000
Average of r

In [28]:
# Calculating the average of the latency column for each subset for HiveMQ
latency_avg_hivemq_config1_qos0 = np.mean(hivemq_config1_qos0_set['latency'])
latency_avg_hivemq_config1_qos1 = np.mean(hivemq_config1_qos1_set['latency'])
latency_avg_hivemq_config1_qos2 = np.mean(hivemq_config1_qos2_set['latency'])

latency_avg_hivemq_config2_qos0 = np.mean(hivemq_config2_qos0_set['latency'])
latency_avg_hivemq_config2_qos1 = np.mean(hivemq_config2_qos1_set['latency'])
latency_avg_hivemq_config2_qos2 = np.mean(hivemq_config2_qos2_set['latency'])

print(f"Average latency for HiveMQ for subset with config 1, QoS 0: {round(latency_avg_hivemq_config1_qos0,2)} ms")
print(f"Average latency for HiveMQ for subset with config 1, QoS 1: {round(latency_avg_hivemq_config1_qos1,2)} ms")
print(f"Average latency for HiveMQ for subset with config 1, QoS 2: {round(latency_avg_hivemq_config1_qos2,2)} ms")

print(f"Average latency for HiveMQ for subset with config 2, QoS 0: {round(latency_avg_hivemq_config2_qos0,2)} ms")
print(f"Average latency for HiveMQ for subset with config 2, QoS 1: {round(latency_avg_hivemq_config2_qos1,2)} ms")
print(f"Average latency for HiveMQ for subset with config 2, QoS 2: {round(latency_avg_hivemq_config2_qos2,2)} ms")

Average latency for HiveMQ for subset with config 1, QoS 0: 5.94 ms
Average latency for HiveMQ for subset with config 1, QoS 1: 14.17 ms
Average latency for HiveMQ for subset with config 1, QoS 2: 22.74 ms
Average latency for HiveMQ for subset with config 2, QoS 0: 6.06 ms
Average latency for HiveMQ for subset with config 2, QoS 1: 14.16 ms
Average latency for HiveMQ for subset with config 2, QoS 2: 24.97 ms
