In [21]:
import json
import os
import math
import random
import numpy as np
from tqdm import tqdm

path_to_partition = "/home/lenovo/Downloads/graph-for-metis.txt.part.8"
num_partitions = int(path_to_partition.split('.')[-1])

configs_dir = f"final_topology_0"
topology_json = "reindexed"

B = 125_000_000 # BYTES

'''
ZTE         - Generates a workload for the ZTE topoloy
FAT_TREE    - Generates a workload for a fat tree topology
'''
topology = 'ZTE'

out_name = f'traces/trace_{configs_dir}'                          # name for saved trace
config_path = os.getcwd() + f'/topologies/{configs_dir}/'         # path to switch config files for topology
data_path = f'data/{topology_json}.json'                          # path to file containing topology

In [48]:
configs = os.listdir(config_path)

with open(data_path,'r') as f:
    data = json.load(f)

nodes = {}
for node in data['nodeList']:
    nodes[node['id']] = node

with open(path_to_partition, 'r') as f:
    for idx, line in enumerate(f):
        node_id, partition = idx, line.strip()
        nodes[node_id]['partition'] = int(partition)

sources = {i:list() for i in range(num_partitions)} # access
destinations = {i:list() for i in range(num_partitions)} # kernel or mixed 

match(topology):
    case "ZTE":
        source_name = 'Access'
    case "FAT_TREE":
        source_name = 'access'

for config in configs:
    id = int(config.split('.')[0])

    if nodes[id]['deviceLevel'] == source_name:
        sources[nodes[id]['partition']].append(id)
    else:
        destinations[nodes[id]['partition']].append(id)

for (p_id, (p_src, p_dest)) in enumerate(zip(list(sources.values()), list(destinations.values()))):
    print('partition:',p_id, '-> src:',len(p_src), 'dst:',len(p_dest))
    assert(p_dest != 0)

partition: 0 -> src: 621 dst: 12
partition: 1 -> src: 657 dst: 9
partition: 2 -> src: 635 dst: 17
partition: 3 -> src: 657 dst: 12
partition: 4 -> src: 641 dst: 16
partition: 5 -> src: 649 dst: 5
partition: 6 -> src: 647 dst: 4
partition: 7 -> src: 642 dst: 13


In [96]:
FLOW_THROUGHPUT = B                     # BYTES PER SECOND
SIMULATION_TIME = 1000000               # Ns
PAIRS_PER_SRC = {'mu': 4, 'sigma': 0}   # NORMAL DIST
MSG_SIZE = 50_000                       # BYTES
PACKET_SIZE = 1400                      # BYTES
BANDWIDTH = 10_000_000                  # BYTES
PRIO_LEVELS = 3                         # QOS PRIORITIES

PERCENT_INTRA_PARTITION = .66
PRCENT_INTER_PARTITION = 1 - PERCENT_INTRA_PARTITION

s_to_ns = lambda x : int(x * math.pow(10,9))
ns_to_s = lambda x : x * math.pow(10,-9)

In [97]:
parameters = (f"FLOW_THROUGHPUT:{FLOW_THROUGHPUT}__"
          f"SIMULATION_TIME:{SIMULATION_TIME}__"
          f"PAIRS_PER_SRC:{tuple(PAIRS_PER_SRC.values())}__"
          f"MSG_SIZE:{MSG_SIZE}__"
          f"PACKET_SIZE:{PACKET_SIZE}__"
          f"BANDWIDTH:{BANDWIDTH}__"
          f"PRIO_LEVELS:{PRIO_LEVELS}__"
          f"INTRA:{PERCENT_INTRA_PARTITION}__INTER:{PRCENT_INTER_PARTITION}")

print(parameters)

FLOW_THROUGHPUT:125000000__SIMULATION_TIME:1000000__PAIRS_PER_SRC:(4, 0)__MSG_SIZE:50000__PACKET_SIZE:1400__BANDWIDTH:10000000__PRIO_LEVELS:3__INTRA:0.66__INTER:0.33999999999999997


In [139]:
# generate flows
def generate_flows():
    flows = {}
    # iterate over all srcs and dests in all partitions
    for p_id_s, p_id_d, srcs, intra_dsts in zip(sources.keys(), destinations.keys(), sources.values(), destinations.values()):
        assert(p_id_s == p_id_d) # ensure dicts are alligned (python now respects order of dicts but you never know)
        p_id = p_id_s  
        
        # calculate inter dests i.e., destinations not in that partition
        inter_dsts = []
        for key, value in destinations.items():
            if key != p_id:
                inter_dsts += value

        # for eacn src in the current partition: calculate the number of dests and assing inter-intra dests based on the defined parameters
        for src in srcs:
            num_pairs = int(random.normalvariate(**PAIRS_PER_SRC))
            while num_pairs <= 0:
                num_pairs = int(random.normalvariate(**PAIRS_PER_SRC))
            # calculate percentages
            num_inter, num_intra = round(num_pairs*PRCENT_INTER_PARTITION), round(num_pairs*PERCENT_INTRA_PARTITION)
            # generate flows
            flows[src] = [('intra', x) for x in random.sample(intra_dsts, k=num_intra)] + \
                         [('inter', x) for x in random.sample(inter_dsts, k=num_inter)]
        
    return flows

flows = generate_flows()

# This is to make sure the currect prc of inter-intra flows is generated
eval = {key:{
            "num_intra": [x[0] for x in value].count('intra'),
            "num_inter":[x[0] for x in value].count('inter')
        }for key, value in flows.items()}

print('num_pairs:', PAIRS_PER_SRC, 'prc_intra:', PERCENT_INTRA_PARTITION, 'prc inter:', PRCENT_INTER_PARTITION)
print("flows for validation:", eval)

# This removes the data requires for the validation to make the flows in the expected format
flows = {key:[x[1] for x in value] for key, value in flows.items()}

num_pairs: {'mu': 4, 'sigma': 0} prc_intra: 0.66 prc inter: 0.33999999999999997
flows for validation: {255: {'num_intra': 3, 'num_inter': 1}, 1539: {'num_intra': 3, 'num_inter': 1}, 4596: {'num_intra': 3, 'num_inter': 1}, 3140: {'num_intra': 3, 'num_inter': 1}, 1792: {'num_intra': 3, 'num_inter': 1}, 4815: {'num_intra': 3, 'num_inter': 1}, 3375: {'num_intra': 3, 'num_inter': 1}, 500: {'num_intra': 3, 'num_inter': 1}, 302: {'num_intra': 3, 'num_inter': 1}, 2984: {'num_intra': 3, 'num_inter': 1}, 957: {'num_intra': 3, 'num_inter': 1}, 1153: {'num_intra': 3, 'num_inter': 1}, 1277: {'num_intra': 3, 'num_inter': 1}, 2635: {'num_intra': 3, 'num_inter': 1}, 315: {'num_intra': 3, 'num_inter': 1}, 5153: {'num_intra': 3, 'num_inter': 1}, 3080: {'num_intra': 3, 'num_inter': 1}, 984: {'num_intra': 3, 'num_inter': 1}, 4245: {'num_intra': 3, 'num_inter': 1}, 2575: {'num_intra': 3, 'num_inter': 1}, 938: {'num_intra': 3, 'num_inter': 1}, 220: {'num_intra': 3, 'num_inter': 1}, 1022: {'num_intra': 3, 'n

In [135]:
def generate_messages_for_flow(duration_ns):
    total_bytes_for_duration = ns_to_s(duration_ns) * FLOW_THROUGHPUT

    sizes = []

    # generate message sizes using poisson distribution
    while sum(sizes) < total_bytes_for_duration:
        sizes.append(int(random.expovariate(1/MSG_SIZE)))
    
    # distribute the message send times uniformly over the simulation time
    times = np.linspace(0, duration_ns, len(sizes))
    times = [int(x) for x in times]

    return times, sizes

In [136]:
'''
    Generation Logic:
        - For each source - while the current message time is less than the simulation time
            - pick the next message time (exp dist)
            - pick a random dest (uniform)
            - pick a random message (size)
            - split the message into packets based on packets size
            - add packets accumulating transmission time (terminal -> switch)
'''

message_id = 0
messages = []
for src, pairs in tqdm(flows.items()):
     for dst in pairs:
        flow_messages = generate_messages_for_flow(SIMULATION_TIME)

        for time, size in zip(*flow_messages):
            tos = random.randint(0, PRIO_LEVELS-1)

            messages.append({
                'message_id': message_id,
                'src': src,
                'dst': dst,
                'size': size,
                'timestamp':time,
                'tos': tos
            })
        
        message_id += 1

messages = sorted(messages, key=lambda x:x['timestamp'])

  0%|          | 0/5149 [00:00<?, ?it/s]

100%|██████████| 5149/5149 [00:01<00:00, 5018.51it/s]


In [137]:
unique_packet_id = 0

f =  open(out_name+'_'+parameters, 'w')

for msg in tqdm(messages):
    message_id, src, dst, size, timestamp, tos = msg.values()

    num_packets = math.ceil(size / PACKET_SIZE) # ceil -> padding last packet to always be PACKET_SIZE
    
    packet_time = timestamp
    packets = []
    for _ in range(num_packets):
        packet_info = (
            f"{str(unique_packet_id)} "
            f"{str(message_id)} "
            f"{str(src)} "
            f"{str(dst)} "
            f"{str(PACKET_SIZE)} "
            f"{str(packet_time)} "
            f"{str(tos)}\n"
        )

        f.write(packet_info)
        
        packet_time += PACKET_SIZE / BANDWIDTH

        unique_packet_id += 1 

f.close()
    

  0%|          | 0/72516 [00:00<?, ?it/s]

100%|██████████| 72516/72516 [00:06<00:00, 10514.39it/s]
