## AR 1. Description of HW
    - No of GPUS
    - topology of GPU/Switches (automation)
        - list of destination per fw,inp,wt
    - clock per gpu or clock per switch
   
## AR 2. Workload
    - how many instance of workload
    - range of gpus to occupy
    - for each task, add workload-ins attribute
    - write data in bytes

In [1]:
import os
import sys

sys.path.append(os.path.abspath("C:\\Users\\vr\\OneDrive - Intel Corporation\\Desktop\\Repo\\dl-modeling\\speedsim\\"))

In [2]:
from asap.workload import Workload, Task, TYPES
from asap.ips import IP, ExecutingUnit, Driver, Port
from asap.hw import Clock
from asap.buses import Bus
from asap.memories import Memory
from asap.system_platform import Platform
from asap.schedulers import SystemScheduler
from asap.mapping import MappingEntity
from speedsim import SpeedSim
from post_processing.utils import AnalysisData

In [3]:
import csv
def read_config(fname):
    if fname is None:
        return None
    with open(fname) as fin:
        reader = csv.reader(fin)
        return {row[0]: row[1] for row in reader if len(row) > 0}

In [4]:
# KeyWords
SUCCESSORS = 'successors'
PREDECESSORS = 'predecessors'
FW = 'FW'
INP = 'INP'
WT = 'WT'
COMM = 'COMM'
COMM_FW = COMM + '_FW'
COMM_INP = COMM + '_INP'
COMM_WT = COMM + '_WT'
COMM_SO = COMM + '_SO'
COMM_SO_FW = COMM + '_SO_FW'
COMM_SO_INP = COMM + '_SO_INP'
COMM_SO_WT = COMM + '_SO_WT'
GPUs = 1
EX_UNITS_PER_GPU = 1

In [5]:
import json

def _get_source(link, workload_graph):
    source = int(link['source'])
    return workload_graph['nodes'][source]['label']


def _get_target(link, workload_graph):
    target = int(link['target'])
    return workload_graph['nodes'][target]['label']

def speedsim_analysis_train(workload_json, config_dict, scaleout_dict, scaleout=False, include_timeline=False,
                            trace_gen=True, network_name=None):

    wl_json_fd = open(workload_json, 'r')
    wl_json = json.load(wl_json_fd)

    workload_cycles = 0
    successor_dict = {}
    predecessor_dict = {}

    last_fwd_source = ""

    for link in (wl_json["links"]):
        source = _get_source(link, wl_json)
        dst = _get_target(link, wl_json)
        if "bwd_" not in source:
            last_fwd_source = source
        if source not in successor_dict.keys():
            successor_dict[source] = []
        successor_dict[source].append(dst)
        if dst not in predecessor_dict.keys():
            predecessor_dict[dst] = []
        predecessor_dict[dst].append(source)

    # add second iteration of fwd tasks,
    # this will act as bwd-fwd tasks
    if int(config_dict["data_parallel"]) == 0:
        first_fwd_source = list(successor_dict.keys() - predecessor_dict.keys())
        temp_keys = list(successor_dict.keys())
        for key in temp_keys:
            if "bwd_" not in key:
                if key != last_fwd_source:
                    # there mite be layers  which dont have successors
                    if key in successor_dict.keys():
                        successor_dict["2_" + key] = ["2_" + x for x in successor_dict[key]]
                    # there mite be layers  which dont have predecessors
                    if key in predecessor_dict.keys():
                        predecessor_dict["2_" + key] = ["2_" + x for x in predecessor_dict[key]]
                else:
                    if len(successor_dict[key]) == 1:
                        successor_dict["2_" + key] = successor_dict[key]
                        predecessor_dict["2_" + key] = ["2_" + x for x in predecessor_dict[key]]

                        predecessor_dict[successor_dict[key][0]] = ["2_" + key]
                        successor_dict[key] = ["2_" + x for x in first_fwd_source]
                        for x in first_fwd_source:
                            predecessor_dict["2_" + x] = [key]
                    else:
                        raise Exception("unable to construct taskgraph")

    # Going throgh all the layers - create task of layer name
    # Through each out/in tensor in each layer, save connections
    TASK_NAME_SEPARATOR = '__'
    ref_freq = float(config_dict["frequency_in_Ghz"]) * 1000  # 1.5GH = 1500Mhz = 0.000666us
    ref_period = 1 / ref_freq  # 1.5GH = 1500Mhz = 0.000666us

    msg_size_keys = {'comms_time_fwd_cycles': 'fwd_pass_msg_size',
                     'comms_scaleout_time_fwd_cycles': 'fwd_pass_msg_size',
                     'comms_time_inp_grad_cycles': 'inp_grad_msg_size',
                     'comms_scaleout_time_inp_cycles': 'inp_grad_msg_size',
                     'comms_time_wtgrad_cycles': 'wt_grad_msg_size',
                     'comms_scaleout_time_wt_cycles': 'wt_grad_msg_size'}

    comms_type_keys = {'comms_time_fwd_cycles': 'fwd_collective_comms_type',
                       'comms_scaleout_time_fwd_cycles': 'fwd_collective_comms_type',
                       'comms_time_inp_grad_cycles': 'inp_collective_comms_type',
                       'comms_scaleout_time_inp_cycles': 'inp_collective_comms_type',
                       'comms_time_wtgrad_cycles': 'wt_collective_comms_type',
                       'comms_scaleout_time_wt_cycles': 'wt_collective_comms_type'}

    fwd_layer_sub_tasks = {'fwd_pass_comp_cycles': FW,
                           'comms_time_fwd_cycles': COMM_FW,
                           'comms_scaleout_time_fwd_cycles': COMM_SO_FW,
                           'comms_scaleout_time_wt_cycles': COMM_SO_WT}
    bwd_layer_sub_tasks = {'inp_grad_comp_cycles': INP,
                           'wt_grad_comp_cycles': WT,
                           'comms_time_inp_grad_cycles': COMM_INP,
                           'comms_time_wtgrad_cycles': COMM_WT,
                           'comms_scaleout_time_inp_cycles': COMM_SO_INP,
                           'comms_scaleout_time_wt_cycles': COMM_SO_WT}

    # Workload parsing
    workload = Workload('Resnet50')
    connections = dict()  # {layer -> {predeseccors: [layers], successors: [layers]}
    start = Task('Start', TYPES.START)
    end = Task('End', TYPES.END)
    workload.add_tasks([start, end])

    # Parsing layers, creating tasks and internal connections, saving external connections
    con = 0
    layers_num = len(wl_json['nodes'])
    for layer in wl_json['nodes']:
        layer_name = layer['data']['Layer']['Layer Name']
        layer_pass = layer['data']['Layer']['l_pass']
        sub_tasks = dict()
        sub_task2 = dict()
        if layer_pass == "bwd":
            for alias, sub_task in bwd_layer_sub_tasks.items():
                processing_cycles = float(layer['data']['Layer'][alias])
                workload_cycles += processing_cycles
                if processing_cycles > 0 or not sub_task.startswith(COMM):
                    if sub_task.startswith(COMM):
                        proc = Task(layer_name + TASK_NAME_SEPARATOR + sub_task, TYPES.PROC,
                                    processing_cycles=processing_cycles,
                                    msg_size=float(layer['data']['Layer'][msg_size_keys[alias]]),
                                    comms_type=layer['data']['Layer'][comms_type_keys[alias]],
                                    layer_pass=layer_pass)
                    else:
                        proc = Task(layer_name + TASK_NAME_SEPARATOR + sub_task, TYPES.PROC,
                                    processing_cycles=processing_cycles,
                                    layer_pass=layer_pass)
                    if sub_task.startswith(COMM):
                        proc.attach_attribute('TASK_TYPE', COMM)
                    sub_tasks[sub_task] = proc

                    workload.add_task(proc)

            if COMM_INP in sub_tasks.keys():
                workload.connect_tasks(str(con), sub_tasks[INP], sub_tasks[COMM_INP])
                con += 1
                if COMM_SO_INP in sub_tasks.keys():
                    workload.connect_tasks(str(con), sub_tasks[COMM_INP], sub_tasks[COMM_SO_INP])
                    con += 1
            elif COMM_SO_INP in sub_tasks.keys():
                workload.connect_tasks(str(con), sub_tasks[INP], sub_tasks[COMM_SO_INP])
                con += 1

            new_end = Task('End_of_' + layer_name + '_WT', TYPES.END)
            workload.add_task(new_end)

            if COMM_WT in sub_tasks.keys():
                workload.connect_tasks(str(con), sub_tasks[WT], sub_tasks[COMM_WT])
                con += 1
                if COMM_SO_WT in sub_tasks.keys():
                    workload.connect_tasks(str(con), sub_tasks[COMM_WT], sub_tasks[COMM_SO_WT])
                    con += 1
                    workload.connect_tasks(str(con), sub_tasks[COMM_SO_WT], new_end)
                    con += 1
                else:
                    workload.connect_tasks(str(con), sub_tasks[COMM_WT], new_end)
                    con += 1
            elif COMM_SO_WT in sub_tasks.keys():
                workload.connect_tasks(str(con), sub_tasks[WT], sub_tasks[COMM_SO_WT])
                con += 1
                workload.connect_tasks(str(con), sub_tasks[COMM_SO_WT], new_end)
                con += 1
            elif WT in sub_tasks.keys():
                workload.connect_tasks(str(con), sub_tasks[WT], new_end)
                con += 1
            else:
                print("Could not connect {}".format(new_end.name))

            if WT in sub_tasks.keys():
                workload.connect_tasks(str(con), sub_tasks[INP], sub_tasks[WT])
                con += 1

            # new_end = Task('End_of_' + layer_name + 'COMM_SO', TYPES.END)
            # workload.add_task(new_end)
            # workload.connect_tasks(str(con), sub_tasks[COMM_SO], new_end)
            # con += 1

        else:
            for alias, sub_task in fwd_layer_sub_tasks.items():
                processing_cycles = float(layer['data']['Layer'][alias])
                workload_cycles += processing_cycles
                # comms which have 0 processing cycles to be ignored,
                # INP and WT even though they have 0 processing cycle, create task related to that
                if processing_cycles > 0 or not sub_task.startswith(COMM):
                    if sub_task.startswith(COMM):
                        proc = Task(layer_name + TASK_NAME_SEPARATOR + sub_task, TYPES.PROC,
                                    processing_cycles=processing_cycles,
                                    msg_size=float(layer['data']['Layer'][msg_size_keys[alias]]),
                                    comms_type=layer['data']['Layer'][comms_type_keys[alias]],
                                    layer_pass=layer_pass)
                    else:
                        proc = Task(layer_name + TASK_NAME_SEPARATOR + sub_task, TYPES.PROC,
                                    processing_cycles=processing_cycles,
                                    layer_pass=layer_pass)
                    if sub_task.startswith(COMM):
                        proc.attach_attribute('TASK_TYPE', COMM)
                    sub_tasks[sub_task] = proc
                    workload.add_task(proc)

                    if int(config_dict["data_parallel"]) == 0:
                        if sub_task.startswith(COMM):
                            proc2 = Task("2_" + layer_name + TASK_NAME_SEPARATOR + sub_task, TYPES.PROC,
                                         processing_cycles=processing_cycles,
                                         msg_size=float(layer['data']['Layer'][msg_size_keys[alias]]),
                                         comms_type=layer['data']['Layer'][comms_type_keys[alias]],
                                         layer_pass=layer_pass)
                        else:
                            proc2 = Task("2_" + layer_name + TASK_NAME_SEPARATOR + sub_task, TYPES.PROC,
                                         processing_cycles=processing_cycles,
                                         layer_pass=layer_pass)
                        if sub_task.startswith(COMM):
                            proc2.attach_attribute('TASK_TYPE', COMM)
                        sub_task2[sub_task] = proc2
                        workload.add_task(proc2)

            if COMM_FW in sub_tasks.keys():
                workload.connect_tasks(str(con), sub_tasks[FW], sub_tasks[COMM_FW])
                con += 1
                if COMM_SO_FW in sub_tasks.keys():
                    workload.connect_tasks(str(con), sub_tasks[COMM_FW], sub_tasks[COMM_SO_FW])
                    con += 1
                    if COMM_SO_WT in sub_tasks.keys():
                        workload.connect_tasks(str(con), sub_tasks[COMM_SO_FW], sub_tasks[COMM_SO_WT])
                        con += 1
                else:
                    if COMM_SO_WT in sub_tasks.keys():
                        workload.connect_tasks(str(con), sub_tasks[COMM_FW], sub_tasks[COMM_SO_WT])
                        con += 1
            elif COMM_SO_FW in sub_tasks.keys():
                workload.connect_tasks(str(con), sub_tasks[FW], sub_tasks[COMM_SO_FW])
                con += 1
                if COMM_SO_WT in sub_tasks.keys():
                    workload.connect_tasks(str(con), sub_tasks[COMM_SO_FW], sub_tasks[COMM_SO_WT])
                    con += 1
            else:
                if COMM_SO_WT in sub_tasks.keys():
                    workload.connect_tasks(str(con), sub_tasks[FW], sub_tasks[COMM_SO_WT])
                    con += 1

            if COMM_FW in sub_task2.keys():
                workload.connect_tasks("2_" + str(con), sub_task2[FW], sub_task2[COMM_FW])
                con += 1
                if COMM_SO_FW in sub_task2.keys():
                    workload.connect_tasks("2_" + str(con), sub_task2[COMM_FW], sub_task2[COMM_SO_FW])
                    con += 1
                    if COMM_SO_WT in sub_task2.keys():
                        workload.connect_tasks("2_" + str(con), sub_task2[COMM_SO_FW], sub_task2[COMM_SO_WT])
                        con += 1
                else:
                    if COMM_SO_WT in sub_task2.keys():
                        workload.connect_tasks("2_" + str(con), sub_task2[COMM_FW], sub_task2[COMM_SO_WT])
                        con += 1
            elif COMM_SO_FW in sub_task2.keys():
                workload.connect_tasks("2_" + str(con), sub_task2[FW], sub_task2[COMM_SO_FW])
                con += 1
                if COMM_SO_WT in sub_task2.keys():
                    workload.connect_tasks("2_" + str(con), sub_task2[COMM_SO_FW], sub_task2[COMM_SO_WT])
                    con += 1
            else:
                if COMM_SO_WT in sub_task2.keys():
                    workload.connect_tasks("2_" + str(con), sub_task2[FW], sub_task2[COMM_SO_WT])
                    con += 1

        # Retreiving successors connections
        layer_connections = connections.get(layer_name, dict())
        successors = layer_connections.get(SUCCESSORS, [])
        if layer_name in successor_dict.keys():
            successors.extend(successor_dict[layer_name])
        layer_connections[SUCCESSORS] = successors

        # Retreiving predecessors connections
        predecessors = layer_connections.get(PREDECESSORS, [])
        if layer_name in predecessor_dict.keys():
            predecessors.extend(predecessor_dict[layer_name])
        layer_connections[PREDECESSORS] = predecessors
        connections[layer_name] = layer_connections

        if layer_pass != "bwd" and int(config_dict["data_parallel"]) == 0:
            layer_connections2 = connections.get("2_" + layer_name, dict())
            successors = layer_connections2.get(SUCCESSORS, [])
            if "2_" + layer_name in successor_dict.keys():
                successors.extend(successor_dict["2_" + layer_name])
            layer_connections2[SUCCESSORS] = successors

            predecessors = layer_connections2.get(PREDECESSORS, [])
            if "2_" + layer_name in predecessor_dict.keys():
                predecessors.extend(predecessor_dict["2_" + layer_name])
            layer_connections2[PREDECESSORS] = predecessors
            connections["2_" + layer_name] = layer_connections2

    # External connections
    for layer, layer_connections in connections.items():
        # Successors connections
        if "bwd_" not in layer:
            layer_fw_so_comm_t = workload.get_task(layer + TASK_NAME_SEPARATOR + COMM_SO_FW)
            if layer_fw_so_comm_t is None:
                layer_fw_so_comm_t = workload.get_task(layer + TASK_NAME_SEPARATOR + COMM_FW)
            if layer_fw_so_comm_t is None:
                layer_fw_so_comm_t = workload.get_task(layer + TASK_NAME_SEPARATOR + FW)
            successors = layer_connections.get(SUCCESSORS, [])
            for successor in successors:
                if "bwd_" not in successor:
                    successor_fw_t = workload.get_task(successor + TASK_NAME_SEPARATOR + FW)
                    workload.connect_tasks(str(con), layer_fw_so_comm_t, successor_fw_t);
                    con += 1
                else:
                    successor_inp_t = workload.get_task(successor + TASK_NAME_SEPARATOR + INP)
                    workload.connect_tasks(str(con), layer_fw_so_comm_t, successor_inp_t)
                    con += 1
        else:
            layer_comm_so_inp_t = workload.get_task(layer + TASK_NAME_SEPARATOR + COMM_SO_INP)
            if layer_comm_so_inp_t is None:
                layer_comm_so_inp_t = workload.get_task(layer + TASK_NAME_SEPARATOR + COMM_INP)
            if layer_comm_so_inp_t is None:
                layer_comm_so_inp_t = workload.get_task(layer + TASK_NAME_SEPARATOR + INP)
            successors = layer_connections.get(SUCCESSORS, [])
            for successor in successors:
                if "bwd_" in successor:  # redudant check bwd layer successors are always
                    successor_inp_t = workload.get_task(successor + TASK_NAME_SEPARATOR + INP)
                    workload.connect_tasks(str(con), layer_comm_so_inp_t, successor_inp_t)
                    con += 1
                else:
                    # this is last optimizer layer, which should get triggered after
                    # all sub tasks have to be executed before optimizer can kick in
                    # as COMM_SO_WT is the last subtask in this TE graph
                    layer_comm_so_wt_t = workload.get_task(layer + TASK_NAME_SEPARATOR + COMM_SO_WT)
                    if layer_comm_so_wt_t is None:
                        layer_comm_so_wt_t = workload.get_task(layer + TASK_NAME_SEPARATOR + COMM_WT)
                    if layer_comm_so_wt_t is None:
                        layer_comm_so_wt_t = workload.get_task(layer + TASK_NAME_SEPARATOR + WT)
                    successor_fw_t = workload.get_task(successor + TASK_NAME_SEPARATOR + FW)
                    workload.connect_tasks(str(con), layer_comm_so_wt_t, successor_fw_t)
                    con += 1

    # layers that dont have predecessors dont have any triggering point
    # so they have to be triggered from start
    start_layers = list(successor_dict.keys() - predecessor_dict.keys())
    for start_layer in start_layers:
        start_layer_fw = workload.get_task(start_layer + TASK_NAME_SEPARATOR + FW)
        workload.connect_tasks(str(con), start, start_layer_fw)
        con += 1

    end_layers = list(predecessor_dict.keys() - successor_dict.keys())
    for end_layer in end_layers:
        end_layer_comm_fw = workload.get_task(end_layer + TASK_NAME_SEPARATOR + COMM_FW)
        if end_layer_comm_fw is None:
            end_layer_comm_fw = workload.get_task(end_layer + TASK_NAME_SEPARATOR + FW)
        workload.connect_tasks(str(con), end_layer_comm_fw, end)
        con += 1

    workload.draw(os.path.basename(workload_json), view=False, format_='pdf', keep_gv=False)
    return workload 
   

In [7]:
from switch_worload.nw_traffic_gen import nw_traffic_gen
config_dict = read_config("C:\\Users\\vr\\OneDrive - Intel Corporation\\Desktop\\Repo\\dl-modeling\\speedsim\\switch_worload\\config.csv")
config_scaleout = read_config("C:\\Users\\vr\\OneDrive - Intel Corporation\\Desktop\\Repo\\dl-modeling\\speedsim\\switch_worload\\config_scaleout.csv")
workload_json = "C:\\Users\\vr\\OneDrive - Intel Corporation\\Desktop\\Repo\\dl-modeling\\speedsim\\switch_worload\\ResNet-50.prototxt_graph_out.json"

# number of gpus
# ngpu = float(config_scaleout["num_pvc"])
ngpu = 32

# model parallel
# mp = float(config_dict["model_split"])
mp = 8

#dp = ngpu / mp
dp = 8

(wt_scaleup, wt_scaleout, fwd_inp_scaleup, fwd_inp_scaleout) = nw_traffic_gen(int(ngpu / dp), int(dp), int(mp))

workload = speedsim_analysis_train(workload_json=workload_json,
                       config_dict=config_dict,
                       scaleout_dict=config_scaleout)

In [8]:
# extend workload to multiple instance

tasks = []
connections = []

workload_instance = int(ngpu / mp)

real_workload = Workload("Real_Workload")

for task in workload.tasks:
    for instance in range(0, workload_instance):
        gpu_range = (instance * mp, (instance+1)*mp-1)
        attributes = task.attributes
        attributes.update({"workload_instace": instance,
                           "gpu_range": gpu_range})
        t = Task(str(instance)+"_"+task.name,
                 type=task.type, read_bytes=task.read_bytes, write_bytes=task.write_bytes,
                 processing_cycles=task.processing_cycles, attributes=task.attributes)
        real_workload.add_task(t)

con = 0
for connection in workload.connections:
    for instance in range(0, workload_instance):
        source = real_workload.get_task(str(instance)+"_"+connection.source.name)
        target = real_workload.get_task(str(instance)+"_"+connection.target.name)
        real_workload.connect_tasks(str(con), source, target)   
        con+=1
        

In [9]:
real_workload.draw(os.path.basename("real_"+workload_json), view=False, format_='pdf', keep_gv=False)

'ResNet-50.prototxt_graph_out.json.pdf'

In [13]:
len(real_workload.tasks)

5000

In [11]:
len(real_workload.connections)

5184