In [410]:
from enum import Enum
import uuid


class DeviceType(Enum):
    CPU = 0
    GPU = 1
    
class GPUSpec:
    def __init__(self,  name, csv_name):
        self.id = uuid.uuid4()
        self.name = name
        self.csv_name = csv_name
    
class Device:
    def __init__(self, name, device_type=DeviceType.GPU, GPUSpec=None, task=None, device_id=uuid.uuid4()):
        self.name = name
        self.GPUSpec = GPUSpec
        self.type = device_type
        self.task = task
        self.remain_time = 0
    
    def assign(self, task):
        print(f"[Log] Assign layer {task.current_layer} of task {task.model.name} to device {self.name}")
        model = task.model
        layer_latency = model.layer_latency[1][self.GPUSpec.id][task.current_layer]
        self.remain_time = layer_latency
        self.task = task

class Model:
    def __init__(self, name, layer_input_shape, layer_latency):
        self.id = uuid.uuid4()
        self.name = name
        self.layer_input_shape = layer_input_shape
        self.layer_latency = layer_latency
        self.layer_movement_time = self.get_movement_time(self.layer_input_shape)
    
    @staticmethod
    def get_movement_time(layer_input_shape):
        movement_time = list()

        for shape in layer_input_shape:
            sleep(0.1)
            data = torch.randn(shape)
            data.to(0)
            with torch.autograd.profiler.profile(use_cuda=True) as prof:
                data.to(1)
            movement_time.append(round(prof.self_cpu_time_total * 1000))
        movement_time.append(0)
        return movement_time

class Task:
    def __init__(self, model, current_layer=0, input_data_position=0):
        self.id = uuid.uuid4()
        self.model = model
        self.current_layer = current_layer
        self.input_date_position = input_data_position

In [411]:
GPU_2060 = GPUSpec("RTX 2060", "2060")
GPU_1080 = GPUSpec("GTX 1008 Ti", "1080")
gpu_list = (GPU_2060, GPU_1080)

In [412]:
import torch
import pandas as pd
from time import sleep
from torchvision.models import resnet18, vgg16, alexnet, mobilenet_v3_large

def get_children(model: torch.nn.Module):
    # get children form model
    children = list(model.children())
    flatt_children = []
    if children == []:
        # if model has no children; model is last child
        return model
    else:
        # look for children from children... to the last child
        for child in children:
            try:
                flatt_children.extend(get_children(child))
            except TypeError:
                flatt_children.append(get_children(child))
    return flatt_children

def get_all_shape(model):
    children = get_children(model)
    all_input_shape = list()

    def make_forward(original_forward):
        def new_forward(x):
            all_input_shape.append(x.shape)
            out = original_forward(x)
            return out
        return new_forward

    for layer in children:
        original_forward = layer.forward
        layer.forward = make_forward(original_forward)

    data = torch.randn(1, 3, 224, 224)
    result = model(data)
    return all_input_shape



max_batch = 3

def prune_df(df):
    trash_list = list()

    for i in range(len(df)):
        if df.iloc[i]['Op'] in ['__add__', '__iadd__', '__mul__']:
            trash_list.append(i)
        else:
            pass
            

    for i in trash_list:
        df.at[i-1, 'duration'] = df.iloc[i]['duration'] + df.iloc[i-1]['duration']
    return df.drop(trash_list).reset_index()

In [413]:
resnet_layer = dict()
for i in range(1, max_batch + 1):
    for gpu in gpu_list:
        filename = f"Resnet/ResNet_{gpu.csv_name}_{i}.csv"
        if not i in resnet_layer:
            resnet_layer[i] = dict()
        resnet_layer[i][gpu.id] = prune_df(pd.read_csv(filename))['duration']

ResNet = Model("ResNet", get_all_shape(resnet18()), resnet_layer)

In [414]:
alexnet_layer = dict()
for i in range(1, max_batch + 1):
    for gpu in gpu_list:
        filename = f"AlexNet/ResNet_{gpu.csv_name}_{i}.csv"
        if not i in alexnet_layer:
            alexnet_layer[i] = dict()
        alexnet_layer[i][gpu.id] = prune_df(pd.read_csv(filename))['duration']
        
AlexNet = Model("AlexNet", get_all_shape(alexnet()), alexnet_layer)

In [415]:
rnn_layer = dict()
for i in range(1, max_batch + 1):
    for gpu in gpu_list:
        filename = f"rnn_result/rnn_{gpu.csv_name}_{i}.csv"
        if not i in rnn_layer:
            rnn_layer[i] = dict()
        rnn_layer[i][gpu.id] = prune_df(pd.read_csv(filename))['duration']
        
rnn_shape = [torch.Size([1, 28, 28]), torch.Size([1, 28, 28]), torch.Size([1, 128])]
RNN = Model("RNN", rnn_shape, rnn_layer)

In [416]:
Models = [ResNet, AlexNet, RNN]
Tasks = list()
Devices = list()
Devices.append(Device("GPU", device_type=DeviceType.CPU, device_id=0))

GPU_0 = Device("GPU (2060)", device_type=DeviceType.GPU, GPUSpec=GPU_2060)
GPU_1 = Device("GPU (1080)", device_type=DeviceType.GPU, GPUSpec=GPU_1080)
Devices.append(GPU_0)
Devices.append(GPU_1)

In [417]:
for model in Models:
    print(model.name, sum(model.layer_latency[1][GPU_2060.id]))

ResNet 2705521
AlexNet 1463499
RNN 459672


In [422]:
Task_1 = Task(ResNet)
Task_2 = Task(AlexNet)
Task_3 = Task(ResNet)
Task_4 = Task(AlexNet)
Tasks.append(Task_1)
Tasks.append(Task_2)
Tasks.append(Task_3)
Tasks.append(Task_4)

In [428]:
Tasks = [Task(ResNet), Task(ResNet), Task(AlexNet), Task(AlexNet)]

In [419]:
GPU_0.assign(Task_1)
GPU_1.assign(Task_2)

[Log] Assign layer 0 of task ResNet to device GPU (2060)
[Log] Assign layer 0 of task AlexNet to device GPU (1080)


In [429]:
tail_latency = 0
while True:
    min_delay = float("inf")
    for device in Devices:
        if device.type == DeviceType.GPU:
            if device.task:
                min_delay = min((min_delay, device.remain_time))
    
    for device in Devices:
        if device.type == DeviceType.GPU:
            if device.task:
                device.remain_time -= min_delay
                if device.remain_time == 0:
                    if device.task.current_layer + 1 < len(device.task.model.layer_input_shape):
                        device.task.current_layer += 1
                        device.assign(device.task)
                    else:
                        device.task = None
                else:
                    pass
    
    
    if min_delay == float("inf"):
        if len(Tasks) == 0:
            break
        else:
            for device in Devices:
                if device.type == DeviceType.GPU:
                    if not device.task and len(Tasks):
                        device.assign(Tasks.pop(0))
    else:
        tail_latency += min_delay
print(f"[Result] Tail latency: {tail_latency}")

[Log] Assign layer 0 of task ResNet to device GPU (2060)
[Log] Assign layer 0 of task ResNet to device GPU (1080)
[Log] Assign layer 1 of task ResNet to device GPU (1080)
[Log] Assign layer 1 of task ResNet to device GPU (2060)
[Log] Assign layer 2 of task ResNet to device GPU (1080)
[Log] Assign layer 3 of task ResNet to device GPU (1080)
[Log] Assign layer 4 of task ResNet to device GPU (1080)
[Log] Assign layer 5 of task ResNet to device GPU (1080)
[Log] Assign layer 6 of task ResNet to device GPU (1080)
[Log] Assign layer 7 of task ResNet to device GPU (1080)
[Log] Assign layer 2 of task ResNet to device GPU (2060)
[Log] Assign layer 8 of task ResNet to device GPU (1080)
[Log] Assign layer 3 of task ResNet to device GPU (2060)
[Log] Assign layer 9 of task ResNet to device GPU (1080)
[Log] Assign layer 10 of task ResNet to device GPU (1080)
[Log] Assign layer 4 of task ResNet to device GPU (2060)
[Log] Assign layer 11 of task ResNet to device GPU (1080)
[Log] Assign layer 12 of task

In [421]:
for i in ResNet.layer_latency[1].items():
    print(sum(i[1]))
for i in AlexNet.layer_latency[1].items():
    print(sum(i[1]))

2705521
2100057
1463499
1302990
