In [5]:
### Inputs: traces_dict, node_details_dict and trace_details_dict
# Node details dict= nid: [nis, type]
### Config file: DB split and SLtype split
### Outputs: updated_node_details

In [1]:
import pickle
import yaml
import random
import json

import networkx as nx
import numpy as np
from collections import Counter

In [2]:
def pkl_to_dict(file_path):
    with open(file_path, 'rb') as pkl_file:
        T_prime = pickle.load(pkl_file)
    return T_prime

def save_dict_as_pkl(traces_dict, file_name):
    with open(file_name+'.pkl', 'wb') as pkl_file:
        pickle.dump(traces_dict, pkl_file)

def save_dict_as_json(traces_dict, file_name):
    with open(file_name+'.json', 'w') as json_file:
        json.dump(traces_dict, json_file)

def read_yaml(file):
    with open(file, 'r') as f:
        data = yaml.safe_load(f)
    return data

def build_digraph_from_tracesdict(traces_dict):

    full_graph_edge_list = []
    for edge_list in traces_dict.values():
        full_graph_edge_list.extend(edge_list)
    G = nx.DiGraph()
    G.add_edges_from(full_graph_edge_list)

    return G

def prune_node_details(traces_dict, node_dets):
    nodes_from_traces = []
    for _, e_list in traces_dict.items():
        for e in e_list:
            if e[0] not in nodes_from_traces:
                nodes_from_traces.append(e[0])
            if e[1] not in nodes_from_traces:
                nodes_from_traces.append(e[1])
    pruned_node_dets = {node: details for node, details\
                         in node_dets.items() if node in nodes_from_traces}

    return pruned_node_dets

def calc_graph_depth(G, initial_node):
    def dfs(node, visited, stack):
        # Using stack to avoid cycles
        if node in stack:
            return 0
        if node in visited:
            return visited[node]
        stack.add(node)
        max_depth = 0
        for neighbour in G.successors(node):
            depth = dfs(neighbour, visited, stack)
            max_depth = max(max_depth, depth)
        stack.remove(node)
        visited[node] = max_depth + 1
        
        return visited[node]
    
    visited = {}
    stack = set()

    return dfs(initial_node, visited, stack)
def find_inode_and_graph_depth(G):
    max_depth = -1
    node_with_max_depth = ''

    for node in G.nodes():
        depth = calc_graph_depth(G, node)
        if depth > max_depth:
            max_depth = depth
            node_with_max_depth = node
    return node_with_max_depth, max_depth

In [5]:
# Read in configs
config = read_yaml('enrichment_config.yaml')
databases = config['Databases']
workload_name = config['ExpWorkloadName']

'''
NODE ENRICHMENT
---------------
Input: Traces_dict, Node_details_dict
Output: Node split output
        node_split_output = {'sf_split': {DB1: {'count': 30, 'nodes_list': [nid1, nid2, ...]}, ...},..}
                             'sl_split': ,,}
'''
# Node details dict= nid: [nis, SF, DB_name] (or) [nis, SL, SL_type]
traces_dict = pkl_to_dict('traces/exp_500nodes_100ktraces.pkl')
# selected_keys = ['0b66f86d15919401842041000d5482'] # Uncomment only for testing
# traces_dict = {key: traces_dict[key] for key in selected_keys if key in traces_dict}

node_dets = pkl_to_dict('node_and_trace_details/new_node_details_data.pkl')
node_dets = prune_node_details(traces_dict, node_dets)
print("Num nodes in node_dets: ", len(node_dets))

sf_arr = [nid for nid, n_info in node_dets.items() if n_info[1] == "db"]
sl_arr = [nid for nid, n_info in node_dets.items() if n_info[1] != "db"]

sf_count = len(sf_arr)
print("Number of SF nodes in trace graph:", sf_count)
sl_count = len(sl_arr)
print("Number of SL nodes in trace graph:", sl_count)
total_nodes = sf_count + sl_count

db_split_arr = [[db_name, info['percentage']] for db_name, info in databases.items()]# [[DB1, 30%],...]
sl_type_split = [['Python', sl_count]]
print("Database split Input:", db_split_arr)

def percent_to_count(arr, count):
    raw_counts = [round(count * (i[1] / 100)) for i in arr]
    diff = count - sum(raw_counts)
    
    # Distribute the difference
    idx = 0
    while diff != 0:
        if diff > 0:
            raw_counts[idx] += 1  # Increase by 1 if we need to add
            diff -= 1
        elif diff < 0:
            raw_counts[idx] -= 1  # Decrease by 1 if we need to remove
            diff += 1
        idx = (idx + 1) % len(raw_counts)
    
    for idx, i in enumerate(arr):
        arr[idx] = [i[0], raw_counts[idx]]
    
    return arr

db_split_arr = percent_to_count(db_split_arr, sf_count) # nid: [nis, SF, DB_name]
print("Database split output:", db_split_arr)


sf_split_info = {ntype: {"count": value, "nodes_list": []} for ntype, value in db_split_arr}
sl_split_info = {'Python': {"count": sl_count, "nodes_list": []}}

def assign_nodes_to_types(split_arr, sfsl_arr, split_info):
    sfsl_arr_cpy = sfsl_arr.copy()
    # Assign nodes to db and sl types
    for i in split_arr:
        ctr = 0
        name = i[0] # type name: eg: Mongo, Redis, Relay
        for _ in range(i[1]):
            ctr += 1
            nid = sfsl_arr_cpy.pop(random.randint(0, len(sfsl_arr_cpy) - 1))
            node_dets[nid].append(name) # add type to node details
            split_info[name]["nodes_list"].append(nid) # add node to list of nodes for that type
        # print(ctr, name)
    return node_dets, split_info

node_dets, sf_split_info = assign_nodes_to_types(db_split_arr, sf_arr, sf_split_info)
node_dets, sl_split_info = assign_nodes_to_types(sl_type_split, sl_arr, sl_split_info)

# Saving node split output
node_split_output = {'sf_split': sf_split_info, 'sl_split': sl_split_info}
print("Nodes Split Output:", node_split_output)
save_dict_as_json(node_split_output, f'enrichment_runs/{workload_name}/node_split_output')
# save_dict_as_json(node_split_output, f'enrichment_runs/as_dmix1_2/node_split_output') ################# AS EXP

Num nodes in node_dets:  446
Number of SF nodes in trace graph: 135
Number of SL nodes in trace graph: 311
Database split Input: [['MongoDB', 10], ['Redis', 80], ['Postgres', 10]]
Database split output: [['MongoDB', 13], ['Redis', 108], ['Postgres', 14]]
Nodes Split Output: {'sf_split': {'MongoDB': {'count': 13, 'nodes_list': ['n355', 'n6761', 'n5358', 'n6456', 'n5288', 'n3125', 'n1624', 'n5124', 'n350', 'n7006', 'n4282', 'n7709', 'n7614']}, 'Redis': {'count': 108, 'nodes_list': ['n624', 'n7264', 'n2382', 'n2800', 'n2154', 'n3442', 'n7601', 'n6438', 'n7361', 'n33', 'n6578', 'n1063', 'n2695', 'n4895', 'n29', 'n1791', 'n2915', 'n1122', 'n4037', 'n2054', 'n2127', 'n3843', 'n4393', 'n6717', 'n7028', 'n25', 'n3882', 'n7049', 'n2006', 'n6818', 'n3428', 'n7957', 'n6941', 'n5913', 'n7529', 'n988', 'n2996', 'n7905', 'n7921', 'n3340', 'n5577', 'n870', 'n3814', 'n5661', 'n847', 'n8006', 'n1069', 'n7074', 'n377', 'n662', 'n6554', 'n4707', 'n5507', 'n6260', 'n607', 'n1702', 'n922', 'n4672', 'n7904'

In [9]:
# 0b5218f615919497680352000ed6c1 1 5: Mongo testing

## Sl to Sl: 0b51062f15919594922205000eaed6 (2 nodes)
## Sl to Db: 0b66f86d15919401842041000d5482 (2 nodes)

# ticker = 0
# for tid in traces_dict:
#     e_list = traces_dict[tid]
#     sf_needed = 0
#     t_nodes = []
#     for e in e_list:
#         if e[1] not in t_nodes:
#             t_nodes.append(e[1])
#         if e[0] not in t_nodes:
#             t_nodes.append(e[0])
#         if e[1] in sf_arr:
#             sf_needed += 1
#     # if sf_needed == 2 and len(t_nodes) < 35:
#     #     ticker += 1
#     #     print(tid, sf_needed, len(t_nodes))
#     if sf_needed == 1 and len(t_nodes) < 35:
#         ticker += 1
#         print(tid, sf_needed, len(t_nodes))
#         if ticker == 5:
#             break


In [6]:
'''
Object id Enrichment
Output: Trace packets.
        Trace packets = [t_node_calls_dict, t_data_ops_dict]
        t_node_calls_dict = Key: dm node, Value: list of [dm node, op_id]
        t_data_ops_dict = Key: data op id, Value: data op packet
'''

class Wl_config:
    """
    Format: record_count, record_size_dist,
                 data_access_pattern, rw_ratio, async_sync_ratio, seed
    """
    def __init__(self, record_count, record_size_dist,\
                 data_access_pattern, rw_ratio, async_sync_ratio, seed):
        self.record_count = record_count
        self.record_size_dist = record_size_dist
        self.data_access_pattern = data_access_pattern
        self.rw_ratio = rw_ratio
        self.async_sync_ratio = async_sync_ratio
        self.seed = seed

        # Setting seed
        np.random.seed(self.seed)
        random.seed(self.seed)
        # Generate object sizes and data access pattern
        self.obj_ids_list = np.arange(1, self.record_count + 1)
        self.object_sizes_dict = self.generate_object_sizes()
        self.probabilities = self.generate_data_access_pattern()

    def generate_object_sizes(self):
        if self.record_size_dist == 'lognormal':
            obj_sizes = np.random.lognormal(mean=np.log(self.record_count), \
                                                 sigma=np.log(self.record_count), \
                                                 size=self.record_count)
        elif self.record_size_dist == 'uniform':
            obj_sizes = np.random.uniform(low=1, high=self.record_count, size=self.record_count)
        else:
            raise ValueError('Invalid record size distribution, only lognormal & uniform are allowed for now')
        return dict(zip(self.obj_ids_list, obj_sizes))
    
    def generate_data_access_pattern(self):
        if self.data_access_pattern == 'zipfian':
            alpha = 1.2
            probabilities = np.random.zipf(alpha, len(self.obj_ids_list))
            probabilities = probabilities / probabilities.sum()
        elif self.data_access_pattern == 'uniform':
            probabilities = np.ones(len(self.obj_ids_list)) / len(self.obj_ids_list)
        else:
            raise ValueError('Invalid data access pattern, only zipfian & uniform are allowed for now.')
        return probabilities


def gen_sfnode_dataops(sf_node, wl_config, traces_dict, node_dets):
    '''
    For a given sf node, generate data ops (count total dm calls to sf node)
    Return: ops_dict= Key: op_id, Value: op_packet
    op_packet = {'op_id': op_id, 'op_type': op_type, 'op_obj_id': op_obj_id,\
                 'db': sf_node_db}
    '''
    obj_ids_list = wl_config.obj_ids_list
    # obj_sizes_dict = wl_config.object_sizes_dict
    data_acc_probabilities = wl_config.probabilities
    w_prob = wl_config.rw_ratio / (1 + wl_config.rw_ratio)

    sf_node_db = node_dets[sf_node][2]

   # find the number of ops to be generated
    total_ops = 0
    for e_list in traces_dict.values():# count total dm calls to sf node
        for e in e_list:
            if e[1] == node:
                total_ops += 1

    # generate ops for sf node
    ops_dict = {}   # key: op_id, value: op_packet
    for op_id in range(1, total_ops + 1):
        op_type = 'write' if random.random() < w_prob else 'read'
        # op_obj_id = np.random.choice(obj_ids_list,\
        #                              p=data_acc_probabilities)# Select by data access pattern
        op_obj_id = random.randrange(1, wl_config.record_count + 1)
        # op_obj_size = obj_sizes_dict[op_obj_id]
        operation = {'op_id': op_id, 'op_type': op_type, 'op_obj_id': f"key_{op_obj_id}",\
                      'db': sf_node_db} # op_packet
        ops_dict[op_id] = operation
    
    return ops_dict


# convert edges_list to node_calls_dict format 
def gen_node_calls_dict(edges_list, async_sync_ratio):
    '''
    Return: node_calls_dict = Key: dm node, Value: list of [dm node, op_id, async_flag]
            (op_id = -1 for SL) (async_flag = 1 for async, 0 for sync)
    '''
    node_calls_dict = {}
    for edge in edges_list:
        if edge[0] not in node_calls_dict:
            node_calls_dict[edge[0]] = []
        async_prob = async_sync_ratio / (1 + async_sync_ratio)
        async_flag = 1 if random.random() < async_prob else 0
        node_calls_dict[edge[0]].append([edge[1], -1, async_flag]) # [dm node, op_id, async/sync] (-1 for SL) (1/0 for async/sync)
    return node_calls_dict


# Reading enrichment config file
enrichment_config = read_yaml('enrichment_config.yaml')
record_count = enrichment_config['WorkloadConfig']['record_count']
record_size_dist = enrichment_config['WorkloadConfig']['record_size_dist']
data_access_pattern = enrichment_config['WorkloadConfig']['data_access_pattern']
rw_ratio = enrichment_config['WorkloadConfig']['rw_ratio']
async_sync_ratio = enrichment_config['WorkloadConfig']['async_sync_ratio']
# Format: record_count, record_size_dist, data_access_pattern, rw_ratio, async_sync_ratio, seed
wl1 = Wl_config(record_count, record_size_dist, data_access_pattern, rw_ratio, async_sync_ratio, seed=50) # to be read from config file

# Generate data op packets for each sf node
G_agg = build_digraph_from_tracesdict(traces_dict)
overall_data_ops = {}   # key: sf_node, value: ops_dict
check = 0
for node in node_dets:
    if node in G_agg.nodes() and node_dets[node][1] == 'db':
        overall_data_ops[node] = \
            gen_sfnode_dataops(node, wl1, traces_dict, node_dets)

def get_pop_first_dict_item(d):
    first_key = list(d.keys())[0]
    first_item = d.pop(first_key)
    return first_key, first_item

def get_node_type(node_id, data):
    '''
    data: node_split_output.json
    '''
    for split_type, services in data.items():
        for service, service_data in services.items():
            if node_id in service_data['nodes_list']:
                return service

def remove_self_node_calls(node_call_dict):
    for node, dm_nodes in node_call_dict.items():
        node_call_dict[node] = [dm_node for dm_node in dm_nodes if dm_node[0] != node]
    return node_call_dict

def get_leaf_nodes(node_call_dict):
    '''Returns: leaf nodes in a request call graph'''
    all_nodes = set(node_call_dict.keys())
    called_nodes = set()
    for calls in node_call_dict.values():
        for call in calls:
            called_nodes.add(call[0])
    leaf_nodes = called_nodes - all_nodes
    return leaf_nodes

def get_logger_nodes_for_request_call_graph(node_call_dict):
    '''Returns: list of nodes that log for the request call graph
                SL leaf nodes and SL node predecessor to SF leaf nodes.
    '''
    logger_nodes = set()
    t_leaf_nodes = get_leaf_nodes(node_call_dict) # find all leaf nodes
    for ln in t_leaf_nodes:
        for node, calls in node_call_dict.items():
            for call in calls:
                if call[0] == ln and call[1] != -1: # Leaf SF node
                    logger_nodes.add(node)
                elif call[0] == ln and call[1] == -1: # Leaf SL node
                    logger_nodes.add(ln)
    return list(logger_nodes)

def has_cycle(graph):
    def dfs(node, visited, rec_stack):
        if node not in visited:
            # Mark the current node as visited and add to the recursion stack
            visited.add(node)
            rec_stack.add(node)
            # Check all the nodes this node is connected to
            for neighbor_info in graph.get(node, []):
                neighbor = neighbor_info[0]
                # If the neighbor is not visited, do a recursive DFS call
                if neighbor not in visited and dfs(neighbor, visited, rec_stack):
                    return True
                # If the neighbor is already in the recursion stack, it's a cycle
                elif neighbor in rec_stack:
                    return True
            rec_stack.remove(node)
        return False
    visited = set()
    rec_stack = set()
    # Check for cycles starting from each node in the graph
    for node in graph.keys():
        if node not in visited:
            if dfs(node, visited, rec_stack):
                return True
    return False

'''
Making the trace packet:
trace_packet = [t_node_calls_dict, t_data_ops_dict, t_ini_node, t_ini_node_type]
t_node_calls_dict = Key: dm node, Value: list of [dm node, op_id]
t_data_ops_dict = Key: data op id, Value: data op packet
'''
def add_if_not_present(array, value):
    if value not in array:
        array.append(value)
    return array
trace_details_data = pkl_to_dict('node_and_trace_details/new_trace_details_data.pkl')
node_split_output = json.load(open(f'./enrichment_runs/{workload_name}/node_split_output.json'))
# node_split_output = json.load(open(f'./enrichment_runs/as_dmix1_2/node_split_output.json')) ################# AS EXP
all_trace_packets = {}
cycle_ctr = 0
unique_nodes_check = []
for tid in traces_dict:
    t_node_calls_dict = gen_node_calls_dict(traces_dict[tid], async_sync_ratio)
    t_data_ops_dict = {} # key: data op id, value: data op packet
    for t_node in t_node_calls_dict:
        for idx in range(len(t_node_calls_dict[t_node])):# Why is it not entering the if loop?
            dm_node = t_node_calls_dict[t_node][idx][0]
            if node_dets[dm_node][1] == 'db': # ie if dm node is a sf node
                # Select a data op id from the data ops dict and pop it
                if len(overall_data_ops[dm_node]) == 0:
                    print("Error: No data ops for sf node", dm_node)
                    break
                # Select a data op id from the data ops dict and pop it
                op_id, op_packet = get_pop_first_dict_item(overall_data_ops[dm_node])
                t_node_calls_dict[t_node][idx][1] = op_id
                t_data_ops_dict[op_id] = op_packet
    G = nx.DiGraph()
    G.add_edges_from(traces_dict[tid])
    t_ini_node, trace_depth = find_inode_and_graph_depth(G) # getting initial node
    t_ini_node_type = get_node_type(t_ini_node, node_split_output)
    t_node_calls_dict = remove_self_node_calls(t_node_calls_dict) # Remove self calls in node_calls_dict
    # Get log nodes for this request call graph
    t_logger_nodes = get_logger_nodes_for_request_call_graph(t_node_calls_dict)
    if has_cycle(t_node_calls_dict):
        cycle_ctr += 1
        continue

    trace_packet = {"tid": tid, "node_calls_dict": t_node_calls_dict, "data_ops_dict": t_data_ops_dict,\
                     "initial_node": t_ini_node, "initial_node_type": t_ini_node_type, "logger_nodes": t_logger_nodes}
    if has_cycle(t_node_calls_dict):
        cycle_ctr += 1
    all_trace_packets[tid] = trace_packet
print("Cycle Ctr: ", cycle_ctr)
print("Num Unique nodes: ", len(unique_nodes_check))
save_dict_as_json(all_trace_packets, f'enrichment_runs/{workload_name}/all_trace_packets')
# save_dict_as_json(all_trace_packets, f'enrichment_runs/as_dmix1_2/all_trace_packets') ################# AS EXP

Cycle Ctr:  13489
Num Unique nodes:  0


Testing

In [None]:
tid_inodes_cd = {}
traces_dict = pkl_to_dict('traces/exp_544nodes_100ktraces.pkl')
for tid, e_list in traces_dict.items():
    G = nx.DiGraph()
    G.add_edges_from(e_list)
    initial_node, trace_depth = find_inode_and_graph_depth(G)
    if tid not in tid_inodes_cd:
        tid_inodes_cd[tid] = [initial_node, trace_depth]
save_dict_as_json(tid_inodes_cd, f'node_and_trace_details/544_100k_inode_cd_data')

In [15]:
traces_dict = pkl_to_dict('traces/exp_500nodes_100ktraces.pkl')
unique_nodes = []
for tid, e_list in traces_dict.items():
    for e in e_list:
        if e[0] not in unique_nodes:
            unique_nodes.append(e[0])
        if e[1] not in unique_nodes:
            unique_nodes.append(e[1])
print(len(unique_nodes))
save_dict_as_json(unique_nodes, f'node_and_trace_details/500_100k_unique_nodes.json')

446


In [6]:
def read_json_file(file_path):
    try:
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data
    except FileNotFoundError:
        print(f"The file '{file_path}' was not found.")
    except json.JSONDecodeError:
        print(f"Error decoding JSON from the file '{file_path}'.")

# nso = read_json_file("enrichment_runs/dmix1_pg_heavy/node_split_output.json")
# atp = read_json_file("enrichment_runs/dmix1_pg_heavy/all_trace_packets.json")
# tdd = pkl_to_dict("deployment_files/mewbie_client/new_trace_details_data.pkl")


In [21]:
# third_elements = [value[2] for value in tdd.values() if len(value) > 2]
# next(iter(atp.items()))
def get_all_logger_nodes(data):
    logger_nodes = []
    # Iterate over the dictionary and extract logger_nodes
    for val in data.values():
        # print(val['logger_nodes'])
        if len(val['logger_nodes']) > 0:
            for ln in val['logger_nodes']:
                logger_nodes.append(ln)
    return logger_nodes
all_logger_nodes = get_all_logger_nodes(atp)
print(set(all_logger_nodes))

{'n7006', 'n812', 'n5971', 'n6076', 'n785', 'n2769', 'n8290', 'n1357', 'n6650', 'n3291', 'n2587', 'n945', 'n6141', 'n7475', 'n7547', 'n6212', 'n7924', 'n5500', 'n1410', 'n6342', 'n1765', 'n3820', 'n7356', 'n5871', 'n2663', 'n1200', 'n1436', 'n6046', 'n4912', 'n2868', 'n4578', 'n4850', 'n728', 'n8132', 'n668', 'n2838', 'n165', 'n2409', 'n7016', 'n8030', 'n895', 'n4800', 'n6255', 'n1914', 'n7642', 'n3392', 'n263', 'n423', 'n341', 'n7200', 'n7361', 'n8307', 'n3826', 'n1220', 'n4558', 'n2453', 'n158', 'n9', 'n1581', 'n7796', 'n4376', 'n3441', 'n7994', 'n7206', 'n5775', 'n7567', 'n5155', 'n6134', 'n4748', 'n1513', 'n1011', 'n2756', 'n3035', 'n4690', 'n2996', 'n7421', 'n2158', 'n6096', 'n6286', 'n2291', 'n7510', 'n995', 'n7549', 'n5510', 'n1439', 'n776', 'n7370', 'n572', 'n3888', 'n1860', 'n7595', 'n1562', 'n5015', 'n2685', 'n5447', 'n8221', 'n5259', 'n4610', 'n4592', 'n2134', 'n6693', 'n4923', 'n6109', 'n7900', 'n942', 'n3827', 'n8167', 'n1964', 'n2826', 'n366', 'n7045', 'n4715', 'n5856', '

In [None]:
from collections import Counter
traces_dict = pkl_to_dict('traces/exp_497nodes_100ktraces.pkl')
nso_nlist = []
atp_nlist = []
def extract_unique_nodes(data):
    unique_nodes = set()  # Use a set to keep unique values
    
    # Iterate over both sf_split and sl_split keys
    for split_key in ["sf_split", "sl_split"]:
        if split_key in data:
            # Iterate over each nested dictionary
            for service in data[split_key].values():
                # Add nodes_list items to the set
                nodes = service.get("nodes_list", [])
                unique_nodes.update(nodes)
    
    return list(unique_nodes)
nso_nlist = extract_unique_nodes(nso)
l_list = []
for tid in traces_dict:
    edges_list = traces_dict[tid]
    for edge in edges_list:
        l_list.append(edge[0])
        l_list.append(edge[1])
        # if edge[0] not in atp_nlist:
        #     atp_nlist.append(edge[0])
        # if edge[1] not in atp_nlist:
        #     atp_nlist.append(edge[1])

# print(len(set(nso_nlist)))
# print(len(set(atp_nlist)))
# print(set(nso_nlist) == set(atp_nlist))
# print(set(nso_nlist) == set(unique_nodes_check))

top_five = Counter(l_list).most_common(10)
top_10 = [item for item, count in Counter(l_list).most_common(10)]
print(top_five)
print(top_10)

[('n1765', 354503), ('n2134', 210271), ('n4376', 166338), ('n2977', 138532), ('n942', 65478), ('n4202', 56730), ('n5015', 39773), ('n2436', 36547), ('n6952', 36086), ('n6286', 35163)]
['n1765', 'n2134', 'n4376', 'n2977', 'n942', 'n4202', 'n5015', 'n2436', 'n6952', 'n6286']


In [14]:
atp = read_json_file("enrichment_runs/dmix1_pg_heavy/all_trace_packets.json")
node_counter = Counter()

# Iterate over each item in the data dictionary
sync_call_ctr = 0
async_call_ctr = 0
for item in atp.values():
    node_calls_dict = item.get("node_calls_dict", {})
    # Collect the first elements when the second element is not -1
    for calls in node_calls_dict.values():
        for call in calls:
            if call[2] == 1:
                async_call_ctr += 1
            else:
                sync_call_ctr += 1
    first_elements = [
        call[0]
        for calls in node_calls_dict.values()
        for call in calls
        if call[1] != -1
    ]
    # Update the counter with the collected elements
    node_counter.update(first_elements)

# Get the top 5 most common first elements
top_5 = node_counter.most_common(20)

# Print the results
print(f"Sync Calls: {sync_call_ctr}")
print(f"Async Calls: {async_call_ctr}")
print("Top 5 most frequent first elements when the second element is not -1:")
for element, count in top_5:
    print(f"Element {element} appears {count} times")

Sync Calls: 264365
Async Calls: 263939
Top 5 most frequent first elements when the second element is not -1:
Element n4037 appears 49168 times
Element n7049 appears 33921 times
Element n3882 appears 22988 times
Element n2127 appears 22224 times
Element n6572 appears 17279 times
Element n2405 appears 14588 times
Element n7709 appears 14000 times
Element n988 appears 8651 times
Element n5507 appears 7702 times
Element n2915 appears 7317 times
Element n3428 appears 6231 times
Element n5223 appears 5221 times
Element n2025 appears 3413 times
Element n530 appears 3221 times
Element n8006 appears 3168 times
Element n870 appears 2597 times
Element n2382 appears 2089 times
Element n6578 appears 1701 times
Element n2154 appears 1539 times
Element n7028 appears 879 times
