## Step 1: Loopnest Scheduling

Run Timeloop-Topk using either the baseline model or the effective bandwidth model (considering the cryptographic engine)

In [1]:
%load_ext autoreload
%autoreload 2

import os
import yaml
import shutil
from pathlib import Path

from utils import generate_arch_files, xml2mapping 

### Define the architecture

First, define an architecture design. The code below generates/detects a new architecture configuration based on the template design at `designs/{design_name}/template`.

In [2]:
configuration_dict = {}

# template design (with constraints and memory hierarchy representing "dataflow")
configuration_dict['TEMPLATE_DESIGN'] = 'eyeriss_like'

# number of bits used for I/O/W; we assume integer
configuration_dict['WORDBITS'] = 16

# DRAM bandwidth setting: words / cycle (not bits / cycle)
configuration_dict['DRAM_READ_BANDWIDTH'] = 32
configuration_dict['DRAM_WRITE_BANDWIDTH'] = 32

# SRAM setting
# - do we have a single shared glb or multiple glbs for each datatype? 
# - for each glb (if shared, just one), define depth/width/#banks and bandwidths
configuration_dict['SRAM_SHARED'] = True
configuration_dict['SRAM_DEPTH'] = [2 ** 13]
configuration_dict['SRAM_WIDTH'] = [2 ** 7]
configuration_dict['SRAM_BANKS'] = [32]                     # SRAM width and SRAM banks define the maximum possible bandwidth
configuration_dict['SRAM_READ_BANDWIDTH'] = [32]
configuration_dict['SRAM_WRITE_BANDWIDTH'] = [32]

# PE array setting
# - shape of PE array X x Y
# - whether a PE has a shared scratchpad or separate scratchpads for each datatype
configuration_dict['PE_X'] = 14
configuration_dict['PE_Y'] = 12
configuration_dict['PE_SPAD_SHARED'] = False
configuration_dict['PE_SPAD_DEPTH'] = [192, 12, 16]         # Weight, IFmap, OFmap
configuration_dict['PE_SPAD_WIDTH'] = [16, 16, 16]

# Cryptographic engine setting
# - type of cryptographic engine + dram (LPDDR4 + AES-GCM)
# - cycle for AES-GCM 
# - whether the cryptographic engines are shared among all datatypes or assigned to each datatype
configuration_dict['CRYPT_ENGINE_TYPE'] = 'effective_lpddr4_aesgcm'
configuration_dict['CRYPT_ENGINE_CYCLE_PER_BLOCK'] = 1            # avg. cycle/128bit

configuration_dict['CRYPT_ENGINE_SHARED'] = False
configuration_dict['CRYPT_ENGINE_COUNT'] = [1, 1, 1]

configuration_dict['EFFECTIVE_CONSERVATIVE'] = True

# Create directory for this configuration if it doesn't exist already
# iterate through design folders to check if any pre-exisiting folder
design_dir = 'designs/{}'.format(configuration_dict['TEMPLATE_DESIGN'])
arch_dir = None
total_vers = 0
for path in os.listdir(design_dir):
    if path != 'template' and os.path.isdir(os.path.join(design_dir, path)):
        try:
            with open(os.path.join(design_dir, path, 'config.yaml'), 'r') as f:
                config_file = yaml.safe_load(f)
            total_vers += 1
            if config_file == configuration_dict:
                arch_dir = path
                print("Pre-existing folder found. Setting the arch_dir to {}".format(arch_dir))
                break
        except:
            print("No config.yaml file in the directory {}".format(str(os.path.join(design_dir, path))))
            
if arch_dir is None:
    arch_dir = 'ver{}'.format(total_vers)
    shutil.copytree(os.path.join(design_dir, 'template'), os.path.join(design_dir, arch_dir))
    with open(os.path.join(design_dir, arch_dir, 'config.yaml'), 'w') as f:
        _ = yaml.dump(configuration_dict, f)
    
    # create baseline and effective files
    generate_arch_files(os.path.join(design_dir, arch_dir, 'arch'), configuration_dict)
    
    # create scheduling / evaluation folder
    os.mkdir(os.path.join(design_dir, arch_dir, 'scheduling'))
    os.mkdir(os.path.join(design_dir, arch_dir, 'evaluation'))
    
    # create folders for baseline scheduling / evaluation
    os.mkdir(os.path.join(design_dir, arch_dir, 'baseline_scheduling'))
    os.mkdir(os.path.join(design_dir, arch_dir, 'baseline_evaluation'))

No config.yaml file in the directory designs/eyeriss_like/.ipynb_checkpoints
Pre-existing folder found. Setting the arch_dir to ver0


..else if you know which folder you want to use, specify here instead of running the above cell

In [None]:
design_dir = 'designs/{}'.format('eyeriss_like') # define your design name here

arch_ver = 0
arch_dir = 'ver{}'.format(arch_ver)              # sub directory under designs/{name}/{arch_dir}
with open(os.path.join(design_dir, arch_dir, 'config.yaml'), 'r') as f:
    configuration_dict = yaml.safe_load(f)
print("Setting the architecture directory to: {}".format(os.path.join(design_dir, arch_dir)))
print("Printing configuration:")
for key, value in configuration_dict.items():
    print("{}: {}".format(key, value))

### Define the DNN workload

Define a DNN workload in PyTorch, and convert it into a Timeloop workload.

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.models as model_zoo

import pytorch2timeloop as pytorch2timeloop

# Note: this version only supports nn.Conv2d (both normal convs and depthwise/pointwise convs) and nn.Linear

# AlexNet
model_name = 'alexnet'
net = model_zoo.alexnet(pretrained=False)

# ResNet18
# model_name = 'resnet18'
# net = model_zoo.resnet18(pretrained=False)

# MobilenetV2
# model_name = 'mobilenet_v2'
# net = model_zoo.mobilenet_v2(pretrained=False)

# Input / Batch info
input_size = (3, 224, 224)
batch_size = 1

print(net)

# Convert to timeloop workloads; stored in workloads/{model_name}_batch{batch_size}
top_dir = 'workloads'
sub_dir = '{}_batch{}'.format(model_name, batch_size)
exception_module_names = []

overwrite = False
if not os.path.exists(os.path.join(top_dir, sub_dir)) or overwrite:
    pytorch2timeloop.convert_model(
            net,
            input_size,
            batch_size,
            sub_dir,
            top_dir,
            True,
            exception_module_names
        )



AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [4]:
# Check duplicate layers (layer information is identical)
# For a per-layer loopnest scheduling, only unique layers have to be searched

base_dir = Path(os.getcwd())
timeloop_dir = 'designs/{}/{}'.format(configuration_dict['TEMPLATE_DESIGN'], arch_dir)

n_layers = 0
layer_dict = {}
layer_duplicate_info = {}
unique_layers = []
for module in net.modules():
    if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
        n_layers += 1
        if n_layers not in layer_dict.keys():
            workload_path = os.path.join(base_dir, top_dir, sub_dir, '{}_layer{}.yaml'.format(sub_dir, n_layers))
            with open(workload_path, 'r') as f:
                workload_info = yaml.safe_load(f)
            layer_dict[n_layers] = workload_info
        
        # identify the earliest duplicate layer
        for key in range(1, n_layers):
            if layer_dict[key] == layer_dict[n_layers]:
                layer_duplicate_info[n_layers] = key
                break
        if n_layers not in layer_duplicate_info:
            unique_layers.append(n_layers)
            
print(layer_duplicate_info)
print(unique_layers)

{}
[1, 2, 3, 4, 5, 6, 7, 8]


### Define the top-k parameter for Timeloop

Prepare the mapper.yaml for top-k search

In [5]:
topk = 6
mapper_file_path = os.path.join(base_dir, timeloop_dir, 'mapper/mapper.yaml')
with open(mapper_file_path, 'r') as f:
    mapper_config = yaml.safe_load(f)
mapper_config['mapper']['topk'] = topk
with open(mapper_file_path, 'w') as f:
    _ = yaml.dump(mapper_config, f)

### Run Timeloop for the *baseline* model (w/o considering cryptographic engines)

Run timeloop for each unique layer, and convert the output to a mapping file in yaml format. Then, evaluate using the top-1 loopnest schedule for each unique layer.

Running this cell can take some time depending on your model and timeloop setting..

In [None]:
def get_cmd(workload_info, layer_id, base_dir, timeloop_dir, sub_dir, top_dir):
    cwd = f"{base_dir/timeloop_dir/'baseline_scheduling'/sub_dir/f'layer{layer_id}'}"
    if 'M' in workload_info['problem']['instance']:
        constraint_pth = base_dir/timeloop_dir/'constraints/*.yaml'
    else:
        # depthwise
        constraint_pth = base_dir/timeloop_dir/'constraints_dw/*.yaml'

    timeloopcmd = f"timeloop-mapper-topk " \
                  f"{base_dir/timeloop_dir/'arch/baseline.yaml'} " \
                  f"{base_dir/timeloop_dir/'arch/components/*.yaml'} " \
                  f"{base_dir/timeloop_dir/'mapper/mapper.yaml'} " \
                  f"{constraint_pth} " \
                  f"{base_dir/top_dir/sub_dir/sub_dir}_layer{layer_id}.yaml "
    return [cwd, timeloopcmd]

cwd_list = []
cmd_list = []

for layer_id in unique_layers:
    workload_path = os.path.join(base_dir, top_dir, sub_dir, '{}_layer{}.yaml'.format(sub_dir, layer_id))
    with open(workload_path, 'r') as f:
        workload_info = yaml.safe_load(f)
    [cwd, cmd] = get_cmd(workload_info, layer_id, base_dir, timeloop_dir, sub_dir, top_dir)
    cwd_list.append(cwd)
    cmd_list.append(cmd)
    
if not os.path.exists(os.path.join(base_dir, timeloop_dir, 'baseline_scheduling', sub_dir)):
    os.mkdir(os.path.join(base_dir, timeloop_dir, 'baseline_scheduling', sub_dir))
for cwd, cmd in zip(cwd_list, cmd_list):
    print("Executing cmd: {}".format(cmd))
    try:
        os.chdir(cwd)
    except:
        os.mkdir(cwd)
        os.chdir(cwd)
    os.system(cmd)
os.chdir(base_dir)

Convert to mapping (.yaml) files

In [None]:
def convert_to_mapping(base_dir, timeloop_dir, top_dir, sub_dir, layer_idx, topk_idx):
    xml_file = os.path.join(base_dir, timeloop_dir, 'baseline_scheduling', sub_dir, "layer{}".format(layer_idx), \
                            "timeloop-mapper-topk{}.map+stats.xml".format(topk_idx))
    workload_file = os.path.join(base_dir, top_dir, sub_dir, "{}_layer{}.yaml".format(sub_dir, layer_idx))
    with open(workload_file, 'r') as f:
        workload_info = yaml.safe_load(f)
    if 'M' in workload_info['problem']['instance']:
        dw = False
    else:
        dw = True
    arch_constraint_file = os.path.join(base_dir, timeloop_dir, 'constraints_dw' if dw else 'constraints' , \
                                        'eyeriss_like_arch_constraints.yaml' if (configuration_dict['TEMPLATE_DESIGN'] == 'eyeriss_like' or \
                                                                                 configuration_dict['TEMPLATE_DESIGN'] == 'eyeriss_like_hbm2') \
                                        else 'simple_output_stationary_arch_constraints.yaml' if configuration_dict['TEMPLATE_DESIGN'] == 'output_stationary' \
                                        else 'simple_weight_stationary_arch_constraints.yaml')
    mapping = xml2mapping(xml_file, workload_file, arch_constraint_file, dw)
    with open(os.path.join(base_dir, timeloop_dir, 'baseline_scheduling',sub_dir, "layer{}".format(layer_idx), \
                           "mapping{}.yaml".format(topk_idx)), 'w') as f:
        _ = yaml.dump({'mapping': mapping}, f)
        
for layer_idx in unique_layers:
    for k in range(1, topk + 1):
        convert_to_mapping(base_dir, timeloop_dir, top_dir, sub_dir, layer_idx, k)

Evaluate the top-1 loopnest schedule

In [None]:
def get_cmd_model(workload_info, layer_id, base_dir, timeloop_dir, sub_dir, top_dir):
    cwd = f"{base_dir/timeloop_dir/'baseline_evaluation'/sub_dir/f'layer{layer_id}'}"
    if 'M' in workload_info['problem']['instance']:
        constraint_pth = base_dir/timeloop_dir/'constraints/*.yaml'
    else:
        # depthwise
        constraint_pth = base_dir/timeloop_dir/'constraints_dw/*.yaml'

    timeloopcmd = f"timeloop-model " \
                  f"{base_dir/timeloop_dir/'arch/baseline.yaml'} " \
                  f"{base_dir/timeloop_dir/'arch/components/*.yaml'} " \
                  f"{base_dir/timeloop_dir/'baseline_scheduling'/sub_dir/f'layer{layer_id}/mapping1.yaml'} " \
                  f"{base_dir/top_dir/sub_dir/sub_dir}_layer{layer_id}.yaml "
    return [cwd, timeloopcmd]

cwd_list = []
cmd_list = []
for layer_id in unique_layers:
    workload_path = os.path.join(base_dir, top_dir, sub_dir, '{}_layer{}.yaml'.format(sub_dir, layer_id))
    with open(workload_path, 'r') as f:
        workload_info = yaml.safe_load(f)
    [cwd, cmd] = get_cmd_model(workload_info, layer_id, base_dir, timeloop_dir, sub_dir, top_dir)
    cwd_list.append(cwd)
    cmd_list.append(cmd)
    
if not os.path.exists(os.path.join(base_dir, timeloop_dir, 'baseline_evaluation', sub_dir)):
    os.mkdir(os.path.join(base_dir, timeloop_dir, 'baseline_evaluation', sub_dir))
for cwd, cmd in zip(cwd_list, cmd_list):
    print("Executing cmd: {}".format(cmd))
    try:
        os.chdir(cwd)
    except:
        os.mkdir(cwd)
        os.chdir(cwd)
    os.system(cmd)
os.chdir(base_dir)

### Run Timeloop for the *effective* model (considering cryptographic engines)

In [6]:
import time

def get_cmd(workload_info, layer_id, base_dir, timeloop_dir, sub_dir, top_dir):
    cwd = f"{base_dir/timeloop_dir/'scheduling'/sub_dir/f'layer{layer_id}'}"
    if 'M' in workload_info['problem']['instance']:
        constraint_pth = base_dir/timeloop_dir/'constraints/*.yaml'
    else:
        # depthwise
        constraint_pth = base_dir/timeloop_dir/'constraints_dw/*.yaml'

    timeloopcmd = f"timeloop-mapper-topk " \
                  f"{base_dir/timeloop_dir/'arch/effective.yaml'} " \
                  f"{base_dir/timeloop_dir/'arch/components/*.yaml'} " \
                  f"{base_dir/timeloop_dir/'mapper/mapper.yaml'} " \
                  f"{constraint_pth} " \
                  f"{base_dir/top_dir/sub_dir/sub_dir}_layer{layer_id}.yaml "
    return [cwd, timeloopcmd]

cwd_list = []
cmd_list = []

for layer_id in unique_layers:
    workload_path = os.path.join(base_dir, top_dir, sub_dir, '{}_layer{}.yaml'.format(sub_dir, layer_id))
    with open(workload_path, 'r') as f:
        workload_info = yaml.safe_load(f)
    [cwd, cmd] = get_cmd(workload_info, layer_id, base_dir, timeloop_dir, sub_dir, top_dir)
    cwd_list.append(cwd)
    cmd_list.append(cmd)
    
if not os.path.exists(os.path.join(base_dir, timeloop_dir, 'scheduling', sub_dir)):
    os.mkdir(os.path.join(base_dir, timeloop_dir, 'scheduling', sub_dir))
    
start_time = time.time()
for cwd, cmd in zip(cwd_list, cmd_list):
    print("Executing cmd: {}".format(cmd))
    try:
        os.chdir(cwd)
    except:
        os.mkdir(cwd)
        os.chdir(cwd)
    os.system(cmd)
os.chdir(base_dir)

# Time this cell
print("Execution time: {}s".format(time.time() - start_time))

Executing cmd: timeloop-mapper-topk /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/effective.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/*.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/mapper/mapper.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/constraints/*.yaml /home/workspace/scheduling/workloads/alexnet_batch1/alexnet_batch1_layer1.yaml 
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/effective.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/smartbuffer_SRAM.yaml /home/wo

[  3] Utilization = 0.33 | pJ/Compute =    8.194 | L5[WIO] Q11 C3 - L4[IO] M16 P55 Q5X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M4 
[  7] Utilization = 0.26 | pJ/Compute =   14.134 | L5[WIO] Q55 M4 C3 - L4[IO] M4 P55 M4X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] Q1 
[  2] Utilization = 0.24 | pJ/Compute =   43.014 | L5[WIO] Q11 - L4[IO] M16 P55 C3 Q5X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M4 
[  6] Utilization = 0.05 | pJ/Compute =  168.515 | L5[WIO] Q55 M4 - L4[IO] M4 P55 C3 M4X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] Q1 
[  6] Utilization = 0.05 | pJ/Compute =  173.150 | L5[WIO] Q55 M32 - L4[IO] P55 C3 - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M2 
[  5] STATEMENT: 200 invalid mappings (200 fanout, 0 capacity) found since the last valid mapping, terminating search.
[  7] Utilization = 0.07 | pJ/Compute =   44.657 | L5[WIO] Q55 M32 C3 - L4[IO] P55 - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M2 
[  1] STATEMENT: 200 invalid mappings (200 fanout, 0 capacity) 

[  7] Utilization = 0.72 | pJ/Compute =    7.759 | L5[WIO] Q5 M4 C3 - L4[IO] P55 Q11X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M16 
[  6] Utilization = 0.51 | pJ/Compute =   22.451 | L5[WIO] Q5 M4 - L4[IO] P55 C3 Q11X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M16 
[  2] Utilization = 0.51 | pJ/Compute =   23.654 | L5[WIO] Q5 M8 - L4[IO] P55 C3 Q11X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M8 
[  7] Utilization = 0.72 | pJ/Compute =    8.246 | L5[WIO] Q5 M2 C3 - L4[IO] M8 P55 Q11X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M4 
[  3] Utilization = 0.72 | pJ/Compute =    9.943 | L5[WIO] Q5 M8 C3 - L4[IO] P55 Q11X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M8 
[  6] Utilization = 0.51 | pJ/Compute =   26.059 | L5[WIO] Q5 M2 - L4[IO] M8 P55 C3 Q11X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M4 
[  2] Utilization = 0.51 | pJ/Compute =   23.654 | L5[WIO] Q5 M4 - L4[IO] M2 P55 C3 Q11X - L3[] Q1 S11Y - L2[I] Q1 - L1[W] R11 - L0[O] M8 
[  3] Utilization = 0.72 | pJ/Compute



Summary stats for best mapping found by mapper:
  Utilization = 0.72 | pJ/Compute =    6.459

Summary stats for best mapping found by mapper:
  Utilization = 0.72 | pJ/Compute =    6.892

Summary stats for best mapping found by mapper:
  Utilization = 0.72 | pJ/Compute =    6.910

Summary stats for best mapping found by mapper:
  Utilization = 0.72 | pJ/Compute =    7.344

Summary stats for best mapping found by mapper:
  Utilization = 0.72 | pJ/Compute =    7.759

Summary stats for best mapping found by mapper:
  Utilization = 0.72 | pJ/Compute =    7.813
Executing cmd: timeloop-mapper-topk /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/effective.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/*.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/mapper/mapper.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/constraints/*.yaml /home/workspace/scheduling/workloads/alexnet_batch1/alexnet_batch1_layer2.yaml 
  _______                _

[  4] Utilization = 0.06 | pJ/Compute =   13.906 | L5[WIO] Q27 M8 C64 - L4[IO] P27 - L3[] Q1 M2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M12 
[  3] Utilization = 0.18 | pJ/Compute =   11.858 | L5[WIO] Q27 C32 - L4[IO] M16 P27 M3X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M4 
[  2] Utilization = 0.09 | pJ/Compute =   11.834 | L5[WIO] Q27 C32 - L4[IO] M16 P27 M3X - L3[] Q1 S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M4 
[  4] Utilization = 0.18 | pJ/Compute =   13.243 | L5[WIO] Q27 M2 C64 - L4[IO] M16 P27 M6X - L3[] Q1 S5Y - L2[I] Q1 - L1[W] R5 - L0[O] Q1 
[  6] Utilization = 0.42 | pJ/Compute =   28.628 | L5[WIO] Q3 M4 C2 - L4[IO] M12 P27 C16 Q9X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M4 
[  5] Utilization = 0.27 | pJ/Compute =   28.627 | L5[WIO] Q3 M4 C2 - L4[IO] M12 P27 C16 Q9X - L3[] Q1 S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M4 
[  7] Utilization = 0.05 | pJ/Compute =  165.985 | L5[WIO] Q27 M2 C4 - L4[IO] M16 P27 C4 M6X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] Q1 
[  2] Uti

[  4] Utilization = 0.14 | pJ/Compute =   59.496 | L5[WIO] Q9 C16 - L4[IO] M8 P27 C2 M4X Q3X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M6 
[  0] Utilization = 0.14 | pJ/Compute =   60.811 | L5[WIO] Q9 M2 C32 - L4[IO] M8 P27 C2 M4X Q3X - L3[] Q1 S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M3 
[  2] Utilization = 0.14 | pJ/Compute =   59.323 | L5[WIO] Q9 M2 C4 - L4[IO] M4 P27 C8 M4X Q3X - L3[] Q1 M2Y S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M3 
[  0] Utilization = 0.36 | pJ/Compute =   10.514 | L5[WIO] Q9 M12 C64 - L4[IO] M4 P27 M4X Q3X - L3[] Q1 S5Y - L2[I] Q1 - L1[W] R5 - L0[O] Q1 
[  1] Utilization = 0.09 | pJ/Compute =   59.755 | L5[WIO] Q9 M2 C8 - L4[IO] M6 P27 C4 Q3X - L3[] Q1 S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M16 
[  5] Utilization = 0.14 | pJ/Compute =   59.484 | L5[WIO] Q9 C16 - L4[IO] M8 P27 C4 M4X Q3X - L3[] Q1 S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M6 
[  6] Utilization = 0.43 | pJ/Compute =   24.812 | L5[WIO] Q3 M6 - L4[IO] M2 P27 C16 Q9X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 C2 - L

[  5] Utilization = 0.27 | pJ/Compute =   29.988 | L5[WIO] Q3 M24 C8 - L4[IO] M2 P27 C4 Q9X - L3[] Q1 S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M4 
[  2] Utilization = 0.27 | pJ/Compute =    6.545 | L5[WIO] Q3 C32 - L4[IO] M16 P27 Q9X - L3[] Q1 S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M12 
[  0] Utilization = 0.42 | pJ/Compute =   27.024 | L5[WIO] Q3 M4 C2 - L4[IO] M8 P27 C8 Q9X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M6 
[  6] Utilization = 0.40 | pJ/Compute =   29.993 | L5[WIO] Q3 M24 C8 - L4[IO] M2 P27 C4 Q9X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M4 
[  7] Utilization = 0.43 | pJ/Compute =   28.660 | L5[WIO] Q3 M2 C8 - L4[IO] M24 P27 C2 Q9X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M4 
[  5] Utilization = 0.38 | pJ/Compute =   34.091 | L5[WIO] Q3 M12 C4 - L4[IO] M8 P27 C16 Q9X - L3[] Q1 M2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] Q1 
[  4] Utilization = 0.71 | pJ/Compute =    8.885 | L5[WIO] Q9 M4 C64 - L4[IO] M3 P27 M4X Q3X - L3[] Q1 M2Y S5Y - L2[I] Q1 - L1[W] R5 - L

[  6] Utilization = 0.71 | pJ/Compute =    7.647 | L5[WIO] Q9 C32 - L4[IO] M6 P27 M4X Q3X - L3[] Q1 M2Y S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M4 
[  3] Utilization = 0.43 | pJ/Compute =   24.826 | L5[WIO] Q3 M2 - L4[IO] M6 P27 C64 Q9X - L3[] Q1 M2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M8 
[  5] Utilization = 0.71 | pJ/Compute =    7.743 | L5[WIO] Q9 M6 C16 - L4[IO] P27 M4X Q3X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M8 
[  2] Utilization = 0.43 | pJ/Compute =   30.024 | L5[WIO] Q3 M64 - L4[IO] P27 C16 Q9X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M3 
[  5] Utilization = 0.54 | pJ/Compute =   11.654 | L5[WIO] Q9 M32 C16 - L4[IO] M2 P27 M3X Q3X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] Q1 
[  4] Utilization = 0.54 | pJ/Compute =    8.999 | L5[WIO] Q3 M16 C64 - L4[IO] M3 P27 Q9X - L3[] Q1 M2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M2 
[  7] Utilization = 0.71 | pJ/Compute =   13.158 | L5[WIO] Q27 M8 C32 - L4[IO] P27 M12X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] 

[  3] Utilization = 0.54 | pJ/Compute =    6.780 | L5[WIO] Q3 M6 C32 - L4[IO] M2 P27 Q9X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M16 
[  1] Utilization = 0.71 | pJ/Compute =    7.400 | L5[WIO] Q9 M2 C16 - L4[IO] M4 P27 M4X Q3X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 C2 - L0[O] M6 
[  7] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.54 | pJ/Compute =    6.538 | L5[WIO] Q3 M2 C32 - L4[IO] M6 P27 Q9X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] M16 
[  3] Utilization = 0.71 | pJ/Compute =   13.266 | L5[WIO] Q9 M48 C32 - L4[IO] P27 M4X Q3X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] Q1 
[  3] Utilization = 0.71 | pJ/Compute =   13.367 | L5[WIO] Q27 M8 C32 - L4[IO] M2 P27 M12X - L3[] Q1 C2Y S5Y - L2[I] Q1 - L1[W] R5 - L0[O] Q1 
[  5] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  3] Uti



Summary stats for best mapping found by mapper:
  Utilization = 0.71 | pJ/Compute =    7.254

Summary stats for best mapping found by mapper:
  Utilization = 0.71 | pJ/Compute =    7.302

Summary stats for best mapping found by mapper:
  Utilization = 0.71 | pJ/Compute =    7.304

Summary stats for best mapping found by mapper:
  Utilization = 0.71 | pJ/Compute =    7.397

Summary stats for best mapping found by mapper:
  Utilization = 0.71 | pJ/Compute =    7.400

Summary stats for best mapping found by mapper:
  Utilization = 0.71 | pJ/Compute =    7.500
Executing cmd: timeloop-mapper-topk /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/effective.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/*.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/mapper/mapper.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/constraints/*.yaml /home/workspace/scheduling/workloads/alexnet_batch1/alexnet_batch1_layer3.yaml 
  _______                _

[  2] Utilization = 0.05 | pJ/Compute =  181.389 | L5[WIO] Q13 - L4[IO] M384 P13 C48 - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  1] Utilization = 0.04 | pJ/Compute =  181.389 | L5[WIO] Q13 - L4[IO] M384 P13 C48 - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] Q1 
[  4] Utilization = 0.04 | pJ/Compute =  163.453 | L5[WIO] Q13 M8 - L4[IO] M3 P13 C96 - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  0] Utilization = 0.02 | pJ/Compute =  181.389 | L5[WIO] Q13 - L4[IO] M384 P13 C48 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] Q1 
[  4] Utilization = 0.04 | pJ/Compute =   23.589 | L5[WIO] Q13 M24 C96 - L4[IO] P13 M2X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  7] Utilization = 0.05 | pJ/Compute =  163.082 | L5[WIO] Q13 M4 - L4[IO] M4 P13 C24 M4X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M6 
[  5] Utilization = 0.05 | pJ/Compute =  175.540 | L5[WIO] Q13 M48 C3 - L4[IO] M2 P13 C8 M4X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] Q1 
[  3] Utilizatio

[  0] Utilization = 0.05 | pJ/Compute =  164.593 | L5[WIO] Q13 M4 C6 - L4[IO] M8 P13 C32 M2X - L3[] Q1 M3Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M2 
[  5] Utilization = 0.05 | pJ/Compute =  172.542 | L5[WIO] Q13 M4 C2 - L4[IO] M48 P13 C12 M2X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] Q1 
[  2] Utilization = 0.05 | pJ/Compute =  164.094 | L5[WIO] Q13 C2 - L4[IO] M32 P13 C24 M12X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] Q1 
[  7] Utilization = 0.05 | pJ/Compute =  169.944 | L5[WIO] Q13 M16 C4 - L4[IO] M6 P13 C4 M4X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] Q1 
[  4] Utilization = 0.61 | pJ/Compute =   19.151 | L5[WIO] M3 C6 - L4[IO] M4 P13 C16 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  2] Utilization = 0.62 | pJ/Compute =   18.887 | L5[WIO] M12 - L4[IO] P13 C32 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M16 
[  3] Utilization = 0.05 | pJ/Compute =  163.407 | L5[WIO] Q13 M2 C8 - L4[IO] M8 P13 C12 M3X - L3[] Q1 S3Y - L2[I] Q1 - L

[  1] Utilization = 0.93 | pJ/Compute =   10.670 | L5[WIO] M32 C16 - L4[IO] M4 P13 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M3 
[  1] Utilization = 0.62 | pJ/Compute =   23.149 | L5[WIO] M12 - L4[IO] M8 P13 C48 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M2 
[  5] Utilization = 0.05 | pJ/Compute =  163.656 | L5[WIO] Q13 C8 - L4[IO] M24 P13 C6 M2X - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M4 
[  4] Utilization = 0.46 | pJ/Compute =   21.197 | L5[WIO] M24 C8 - L4[IO] P13 C12 Q13X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  7] Utilization = 0.36 | pJ/Compute =   29.243 | L5[WIO] M3 C48 - L4[IO] M64 P13 C2 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  5] Utilization = 0.58 | pJ/Compute =   19.885 | L5[WIO] M12 C12 - L4[IO] P13 C2 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M16 
[  0] Utilization = 0.43 | pJ/Compute =   17.997 | L5[WIO] Q13 M2 C32 - L4[IO] M4 P13 M8X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3

[  2] Utilization = 0.60 | pJ/Compute =   20.417 | L5[WIO] M8 C2 - L4[IO] M4 P13 C48 Q13X - L3[] Q1 M3Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  5] Utilization = 0.93 | pJ/Compute =    8.083 | L5[WIO] M12 C48 - L4[IO] M2 P13 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  0] Utilization = 0.62 | pJ/Compute =   21.565 | L5[WIO] M64 - L4[IO] P13 C48 Q13X - L3[] Q1 M3Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M2 
[  1] Utilization = 0.93 | pJ/Compute =   16.131 | L5[WIO] M96 C12 - L4[IO] M2 P13 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M2 
[  2] Utilization = 0.61 | pJ/Compute =   20.910 | L5[WIO] M4 C2 - L4[IO] M12 P13 C16 Q13X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  6] Utilization = 0.57 | pJ/Compute =   23.101 | L5[WIO] M16 C24 - L4[IO] M4 P13 C8 Q13X - L3[] Q1 M3Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M2 
[  3] Utilization = 0.61 | pJ/Compute =   19.849 | L5[WIO] M4 C8 - L4[IO] M6 P13 C4 Q13X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M1

[  3] Utilization = 0.62 | pJ/Compute =   20.773 | L5[WIO] M24 - L4[IO] M2 P13 C96 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M2 
[  7] Utilization = 0.93 | pJ/Compute =    7.293 | L5[WIO] M4 C32 - L4[IO] M8 P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M6 
[  4] Utilization = 0.62 | pJ/Compute =   18.902 | L5[WIO] M3 - L4[IO] M4 P13 C64 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M8 
[  7] Utilization = 0.93 | pJ/Compute =    8.354 | L5[WIO] M4 C24 - L4[IO] M24 P13 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  4] Utilization = 0.93 | pJ/Compute =    9.559 | L5[WIO] M16 C96 - L4[IO] M2 P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M3 
[  6] Utilization = 0.93 | pJ/Compute =    6.952 | L5[WIO] M4 C24 - L4[IO] M6 P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M8 
[  5] Utilization = 0.62 | pJ/Compute =   19.481 | L5[WIO] M6 - L4[IO] M4 P13 C64 Q13X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M1



Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    6.831

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    6.952

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.120

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.294

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.293

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.407
Executing cmd: timeloop-mapper-topk /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/effective.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/*.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/mapper/mapper.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/constraints/*.yaml /home/workspace/scheduling/workloads/alexnet_batch1/alexnet_batch1_layer4.yaml 
  _______                _

[  2] Utilization = 0.05 | pJ/Compute =  162.408 | L5[WIO] Q13 C3 - L4[IO] P13 C64 M8X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  0] Utilization = 0.05 | pJ/Compute =  172.023 | L5[WIO] Q13 M2 - L4[IO] M64 P13 C192 - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  6] Utilization = 0.04 | pJ/Compute =  172.412 | L5[WIO] Q13 M2 C12 - L4[IO] M64 P13 C32 - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  4] Utilization = 0.04 | pJ/Compute =  168.949 | L5[WIO] Q13 M8 C2 - L4[IO] M8 P13 C32 - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M4 
[  1] Utilization = 0.05 | pJ/Compute =  162.946 | L5[WIO] Q13 C2 - L4[IO] M8 P13 C96 M4X - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  5] Utilization = 0.05 | pJ/Compute =  168.949 | L5[WIO] Q13 M8 C2 - L4[IO] M8 P13 C32 - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  7] Utilization = 0.04 | pJ/Compute =  172.412 | L5[WIO] Q13 M2 C12 - L4[IO] M64 P13 C16 - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0

[  0] Utilization = 0.19 | pJ/Compute =   40.723 | L5[WIO] M32 C8 - L4[IO] M8 P13 C24 Q13X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] Q1 
[  6] Utilization = 0.05 | pJ/Compute =  164.874 | L5[WIO] Q13 M8 C12 - L4[IO] P13 C32 - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M8 
[  1] Utilization = 0.19 | pJ/Compute =   39.106 | L5[WIO] M16 C8 - L4[IO] M16 P13 C3 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] Q1 
[  7] Utilization = 0.05 | pJ/Compute =  162.619 | L5[WIO] Q13 C16 - L4[IO] M2 P13 C12 M8X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  2] Utilization = 0.38 | pJ/Compute =   34.554 | L5[WIO] M64 C16 - L4[IO] M2 P13 C6 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] Q1 
[  4] Utilization = 0.05 | pJ/Compute =  162.615 | L5[WIO] Q13 C16 - L4[IO] M2 P13 C8 M8X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M16 
[  1] Utilization = 0.23 | pJ/Compute =    8.821 | L5[WIO] C192 - L4[IO] M32 P13 Q13X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 


[  5] Utilization = 0.55 | pJ/Compute =   24.764 | L5[WIO] M16 C16 - L4[IO] M4 P13 C2 Q13X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M4 
[  6] Utilization = 0.36 | pJ/Compute =   30.499 | L5[WIO] M16 C96 - L4[IO] M8 P13 C2 Q13X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M2 
[  1] Utilization = 0.50 | pJ/Compute =   26.420 | L5[WIO] M32 C4 - L4[IO] M2 P13 C32 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] Q1 
[  1] Utilization = 0.93 | pJ/Compute =    9.814 | L5[WIO] C192 - L4[IO] M64 P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] Q1 
[  4] Utilization = 0.87 | pJ/Compute =   12.088 | L5[WIO] M16 C96 - L4[IO] M8 P13 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M2 
[  6] Utilization = 0.46 | pJ/Compute =   21.544 | L5[WIO] M8 C16 - L4[IO] M4 P13 C4 Q13X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M8 
[  3] Utilization = 0.61 | pJ/Compute =   25.414 | L5[WIO] C192 - L4[IO] M64 P13 C2 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1

[  5] Utilization = 0.61 | pJ/Compute =   19.141 | L5[WIO] C24 - L4[IO] M8 P13 C8 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  1] Utilization = 0.61 | pJ/Compute =   19.003 | L5[WIO] M2 C3 - L4[IO] M4 P13 C32 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M8 
[  6] Utilization = 0.70 | pJ/Compute =    8.103 | L5[WIO] M8 C64 - L4[IO] M4 P13 Q13X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  4] Utilization = 0.93 | pJ/Compute =    7.270 | L5[WIO] M8 C32 - L4[IO] M2 P13 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M16 
[  5] Utilization = 0.93 | pJ/Compute =    8.443 | L5[WIO] M16 C64 - L4[IO] P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M8 
[  3] Utilization = 0.70 | pJ/Compute =    7.263 | L5[WIO] M8 C32 - L4[IO] M2 P13 Q13X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M16 
[  3] Utilization = 0.93 | pJ/Compute =    9.282 | L5[WIO] M16 C96 - L4[IO] M2 P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O

[  3] Utilization = 0.93 | pJ/Compute =    8.066 | L5[WIO] M4 C96 - L4[IO] M8 P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  2] Utilization = 0.93 | pJ/Compute =    8.473 | L5[WIO] M8 C96 - L4[IO] M4 P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M2 
[  0] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  2] Utilization = 0.93 | pJ/Compute =    7.349 | L5[WIO] M4 C96 - L4[IO] M2 P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M8 
[  5] Utilization = 0.93 | pJ/Compute =    7.392 | L5[WIO] C64 - L4[IO] M32 P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M4 
[  7] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  2] Utilization = 0.93 | pJ/Compute =   12.951 | L5[WIO] M32 C192 - L4[IO] M2 P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M2 
[  2] Utilization = 0.93 | pJ/Compute =    8.506 | L5[WIO] M2 C192 - L4[IO] M8 P13 Q13X - L3[] Q1 M



Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    6.865

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    6.925

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    6.993

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.025

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.026

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.206
Executing cmd: timeloop-mapper-topk /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/effective.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/*.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/mapper/mapper.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/constraints/*.yaml /home/workspace/scheduling/workloads/alexnet_batch1/alexnet_batch1_layer5.yaml 
  _______                _

[  5] Utilization = 0.04 | pJ/Compute =  172.051 | L5[WIO] Q13 M2 - L4[IO] M64 P13 C256 - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  6] Utilization = 0.05 | pJ/Compute =  162.431 | L5[WIO] Q13 C2 - L4[IO] P13 C32 M8X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M8 
[  7] Utilization = 0.05 | pJ/Compute =  164.922 | L5[WIO] Q13 M8 - L4[IO] M4 P13 C128 M2X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] Q1 
[  1] Utilization = 0.07 | pJ/Compute =   21.531 | L5[WIO] Q13 M2 C32 - L4[IO] M64 P13 - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] Q1 
[  4] Utilization = 0.04 | pJ/Compute =  172.843 | L5[WIO] Q13 M2 C32 - L4[IO] M64 P13 C2 - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] Q1 
[  3] Utilization = 0.23 | pJ/Compute =   21.045 | L5[WIO] M4 C4 - L4[IO] M8 P13 C32 Q13X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  0] Utilization = 0.05 | pJ/Compute =  163.280 | L5[WIO] Q13 M2 C32 - L4[IO] P13 C8 M4X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M8

[  4] Utilization = 0.46 | pJ/Compute =   23.375 | L5[WIO] M2 C8 - L4[IO] M32 P13 C32 Q13X - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M2 
[  3] Utilization = 0.38 | pJ/Compute =   34.664 | L5[WIO] M64 C16 - L4[IO] M2 P13 C8 Q13X - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] Q1 
[  6] Utilization = 0.19 | pJ/Compute =   40.793 | L5[WIO] M32 C8 - L4[IO] M8 P13 C8 Q13X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] Q1 
[  2] Utilization = 0.61 | pJ/Compute =   23.378 | L5[WIO] M2 C8 - L4[IO] M32 P13 C8 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M2 
[  1] Utilization = 0.19 | pJ/Compute =   37.753 | L5[WIO] M2 C8 - L4[IO] M128 P13 C16 Q13X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  5] Utilization = 0.58 | pJ/Compute =   20.496 | L5[WIO] M8 C16 - L4[IO] M2 P13 C8 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  3] Utilization = 0.19 | pJ/Compute =   44.442 | L5[WIO] M64 C32 - L4[IO] M4 P13 C2 Q13X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C

[  0] Utilization = 0.46 | pJ/Compute =   20.808 | L5[WIO] M2 C2 - L4[IO] M16 P13 C32 Q13X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  2] Utilization = 0.23 | pJ/Compute =   27.907 | L5[WIO] M32 - L4[IO] M4 P13 C128 Q13X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M2 
[  4] Utilization = 0.58 | pJ/Compute =   19.724 | L5[WIO] M8 C8 - L4[IO] P13 C8 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  6] Utilization = 0.46 | pJ/Compute =   19.721 | L5[WIO] M8 C8 - L4[IO] P13 C32 Q13X - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  3] Utilization = 0.46 | pJ/Compute =   19.721 | L5[WIO] M8 C8 - L4[IO] P13 C8 Q13X - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M16 
[  4] Utilization = 0.93 | pJ/Compute =    8.997 | L5[WIO] M2 C128 - L4[IO] M16 P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  2] Utilization = 0.61 | pJ/Compute =   23.273 | L5[WIO] M2 C2 - L4[IO] M32 P13 C16 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L

[  6] Utilization = 0.60 | pJ/Compute =   21.099 | L5[WIO] M4 C2 - L4[IO] M8 P13 C128 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M2 
[  3] Utilization = 0.61 | pJ/Compute =   23.096 | L5[WIO] M8 - L4[IO] M8 P13 C64 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  4] Utilization = 0.93 | pJ/Compute =    8.061 | L5[WIO] M8 C64 - L4[IO] M2 P13 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  3] Utilization = 0.58 | pJ/Compute =   24.510 | L5[WIO] M8 C32 - L4[IO] M8 P13 C4 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] Q1 
[  0] Utilization = 0.55 | pJ/Compute =   22.777 | L5[WIO] M16 C32 - L4[IO] M2 P13 C4 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  2] Utilization = 0.93 | pJ/Compute =    7.947 | L5[WIO] M8 C32 - L4[IO] M4 P13 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  3] Utilization = 0.61 | pJ/Compute =   19.153 | L5[WIO] C16 - L4[IO] M8 P13 C8 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 


[  5] Utilization = 0.61 | pJ/Compute =   20.980 | L5[WIO] C16 - L4[IO] M32 P13 C2 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M4 
[  4] Utilization = 0.93 | pJ/Compute =    8.896 | L5[WIO] C128 - L4[IO] M32 P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  5] Utilization = 0.93 | pJ/Compute =    7.316 | L5[WIO] C64 - L4[IO] M16 P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  0] Utilization = 0.61 | pJ/Compute =   19.554 | L5[WIO] C4 - L4[IO] M16 P13 C32 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M8 
[  0] Utilization = 0.61 | pJ/Compute =   19.541 | L5[WIO] C4 - L4[IO] M16 P13 C8 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  3] Utilization = 0.93 | pJ/Compute =    7.886 | L5[WIO] M2 C64 - L4[IO] M16 P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  3] Utilization = 0.93 | pJ/Compute =   10.397 | L5[WIO] M16 C128 - L4[IO] M2 P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 -



Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    6.588

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.051

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.079

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.168

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.229

Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    7.236
Executing cmd: timeloop-mapper-topk /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/effective.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/*.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/mapper/mapper.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/constraints/*.yaml /home/workspace/scheduling/workloads/alexnet_batch1/alexnet_batch1_layer6.yaml 
  _______                _

[  5] Utilization = 0.02 | pJ/Compute =  168.678 | L5[WIO] M128 C4 - L4[IO] M2 C64 - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C12 - L0[O] M16 
[  6] Utilization = 0.02 | pJ/Compute =  168.678 | L5[WIO] M128 C4 - L4[IO] M2 C64 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C9 - L0[O] M16 
[  7] Utilization = 0.04 | pJ/Compute =  168.678 | L5[WIO] M128 C4 - L4[IO] M2 C64 - L3[] Q1 C6Y - L2[I] Q1 - L1[W] C6 - L0[O] M16 
[  4] Utilization = 0.05 | pJ/Compute =  165.142 | L5[WIO] M8 C48 - L4[IO] M64 C16 M8X - L3[] Q1 C6Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  2] Utilization = 0.05 | pJ/Compute =  168.679 | L5[WIO] M128 C4 - L4[IO] M2 C192 - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  3] Utilization = 0.05 | pJ/Compute =  165.140 | L5[WIO] M8 C48 - L4[IO] M64 C16 M8X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C3 - L0[O] Q1 
[  3] Utilization = 0.05 | pJ/Compute =  173.389 | L5[WIO] M32 C2304 - L4[IO] M2 M4X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  5] Utilization = 0.05 | pJ/Compute =  162.801 | L5[WIO] M16

[  7] Utilization = 0.05 | pJ/Compute =  163.013 | L5[WIO] M4 C192 - L4[IO] M128 M4X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C6 - L0[O] M2 
[  5] Utilization = 0.05 | pJ/Compute =  166.837 | L5[WIO] M512 - L4[IO] M2 C96 - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C12 - L0[O] M4 
[  6] Utilization = 0.05 | pJ/Compute =  163.085 | L5[WIO] M2 C96 - L4[IO] M64 C24 - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  3] Utilization = 0.05 | pJ/Compute =  163.090 | L5[WIO] M2 C96 - L4[IO] M64 C16 - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  0] Utilization = 0.05 | pJ/Compute =  162.322 | L5[WIO] C32 - L4[IO] M32 C18 M8X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  4] Utilization = 0.05 | pJ/Compute =  162.520 | L5[WIO] M8 C8 - L4[IO] M4 C12 M8X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  5] Utilization = 0.05 | pJ/Compute =  162.663 | L5[WIO] M8 C32 - L4[IO] M4 C48 M4X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C6 - L0[O] M4 
[  5] Utilization = 0.05 | pJ/Compute =  163.762 | L5[WIO] M2 C3

[  0] Utilization = 0.05 | pJ/Compute =  162.759 | L5[WIO] C6 - L4[IO] M128 C1536 M8X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  1] Utilization = 0.05 | pJ/Compute =  162.504 | L5[WIO] C16 - L4[IO] M64 C144 M4X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  1] Utilization = 0.05 | pJ/Compute =  162.382 | L5[WIO] M16 - L4[IO] M4 C4608 M8X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  2] Utilization = 0.05 | pJ/Compute =  162.386 | L5[WIO] C2 - L4[IO] M64 C256 M8X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C9 - L0[O] M4 
[  4] Utilization = 0.05 | pJ/Compute =  162.386 | L5[WIO] C2 - L4[IO] M64 C256 M8X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C3 - L0[O] M4 
[  3] Utilization = 0.05 | pJ/Compute =  162.386 | L5[WIO] C2 - L4[IO] M64 C256 M8X - L3[] Q1 M2Y C3Y - L2[I] Q1 - L1[W] C6 - L0[O] M4 
[  6] Utilization = 0.05 | pJ/Compute =  162.338 | L5[WIO] M8 - L4[IO] M8 C1536 - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  4] Utilization = 0.05 | pJ/Compute =  162.115 | L5[W

[  5] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  0] Utilization = 0.05 | pJ/Compute =  162.189 | L5[WIO] M32 - L4[IO] C1024 M4X - L3[] Q1 M2Y C3Y - L2[I] Q1 - L1[W] C3 - L0[O] M16 
[  2] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.05 | pJ/Compute =  162.189 | L5[WIO] Q1 - L4[IO] M32 C1536 M8X - L3[] Q1 C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  1] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.123

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.086

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.115

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.149

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.086

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.115
Executing cmd: timeloop-mapper-topk /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/effective.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/*.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/mapper/mapper.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/constraints/*.yaml /home/workspace/scheduling/workloads/alexnet_batch1/alexnet_batch1_layer7.yaml 
  _______                _

[  5] Utilization = 0.01 | pJ/Compute =  163.255 | L5[WIO] M128 - L4[IO] M2 C256 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  6] Utilization = 0.02 | pJ/Compute =  163.255 | L5[WIO] M128 - L4[IO] M2 C256 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  7] Utilization = 0.05 | pJ/Compute =  163.255 | L5[WIO] M128 - L4[IO] M2 C256 - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  7] Utilization = 0.05 | pJ/Compute =  162.141 | L5[WIO] M4 - L4[IO] M4 C2048 M4X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  3] Utilization = 0.02 | pJ/Compute =  164.006 | L5[WIO] M4 C128 - L4[IO] M32 C32 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  2] Utilization = 0.05 | pJ/Compute =  162.525 | L5[WIO] M8 C2 - L4[IO] M4 C256 M4X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  4] Utilization = 0.05 | pJ/Compute =  167.216 | L5[WIO] M32 C256 - L4[IO] M8 C4 M4X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  6] Utilization = 0.05 | pJ/Compute =  162.141 | L5[WIO] M4 -

[  1] Utilization = 0.05 | pJ/Compute =  162.883 | L5[WIO] M32 - L4[IO] M4 C2048 M8X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  2] Utilization = 0.05 | pJ/Compute =  162.357 | L5[WIO] C16 - L4[IO] M32 C256 M8X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  5] Utilization = 0.05 | pJ/Compute =  162.814 | L5[WIO] M8 C32 - L4[IO] M4 C8 M4X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  7] Utilization = 0.05 | pJ/Compute =  162.357 | L5[WIO] C16 - L4[IO] M32 C128 M8X - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  6] Utilization = 0.05 | pJ/Compute =  162.603 | L5[WIO] M4 C8 - L4[IO] M16 C256 M8X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  0] Utilization = 0.05 | pJ/Compute =  163.489 | L5[WIO] M4 C64 - L4[IO] M32 C16 M2X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  4] Utilization = 0.05 | pJ/Compute =  162.891 | L5[WIO] M2 C16 - L4[IO] M64 C128 M4X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  6] Utilization = 0.05 | pJ/Compute =  163.003 | L5[W

[  7] Utilization = 0.05 | pJ/Compute =  162.681 | L5[WIO] M4 C16 - L4[IO] M16 C8 M8X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M4 
[  0] Utilization = 0.05 | pJ/Compute =  163.471 | L5[WIO] M4 C128 - L4[IO] M4 C8 M2X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  7] Utilization = 0.05 | pJ/Compute =  162.705 | L5[WIO] M16 - L4[IO] M8 C2048 M8X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  5] Utilization = 0.05 | pJ/Compute =  162.216 | L5[WIO] M2 - L4[IO] M16 C1024 M8X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  1] Utilization = 0.05 | pJ/Compute =  162.764 | L5[WIO] M128 - L4[IO] C1024 M8X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  4] Utilization = 0.05 | pJ/Compute =  162.541 | L5[WIO] M4 C32 - L4[IO] M2 C16 M8X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  7] Utilization = 0.05 | pJ/Compute =  162.705 | L5[WIO] M128 - L4[IO] C2048 M4X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  0] Utilization = 0.05 | pJ/Compute =  162.404 | L5[WI



Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.141

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.141

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.141

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.357

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.240

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.186
Executing cmd: timeloop-mapper-topk /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/effective.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/*.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/mapper/mapper.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/constraints/*.yaml /home/workspace/scheduling/workloads/alexnet_batch1/alexnet_batch1_layer8.yaml 
  _______                _

[  2] Utilization = 0.04 | pJ/Compute =  186.540 | L5[WIO] M125 C8 - L4[IO] C256 M2X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  6] Utilization = 0.05 | pJ/Compute =  163.475 | L5[WIO] M5 C8 - L4[IO] M5 C128 M8X - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] M5 
[  4] Utilization = 0.04 | pJ/Compute =  186.540 | L5[WIO] M125 C8 - L4[IO] C32 M2X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  7] Utilization = 0.01 | pJ/Compute =  166.889 | L5[WIO] M10 C128 - L4[IO] M10 C16 - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] M10 
[  7] Utilization = 0.05 | pJ/Compute =  164.204 | L5[WIO] M50 - L4[IO] M2 C256 - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M2 
[  1] Utilization = 0.04 | pJ/Compute =  186.540 | L5[WIO] M125 C8 - L4[IO] C256 M2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  3] Utilization = 0.05 | pJ/Compute =  163.476 | L5[WIO] M5 C8 - L4[IO] M5 C32 M8X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M5 
[  5] Utilization = 0.05 | pJ/Compute =  164.324 | L5[WIO] M4 C128 - 

[  2] Utilization = 0.05 | pJ/Compute =  168.366 | L5[WIO] M20 C256 - L4[IO] M5 M2X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M5 
[  3] Utilization = 0.05 | pJ/Compute =  168.428 | L5[WIO] M20 C256 - L4[IO] M5 M2X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C2 - L0[O] M5 
[  7] Utilization = 0.05 | pJ/Compute =  164.322 | L5[WIO] Q1 - L4[IO] M100 C4096 M5X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  2] Utilization = 0.05 | pJ/Compute =  163.970 | L5[WIO] M2 C64 - L4[IO] M25 C32 M5X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  3] Utilization = 0.03 | pJ/Compute =  163.966 | L5[WIO] M2 C64 - L4[IO] M25 C64 M5X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  4] Utilization = 0.05 | pJ/Compute =  162.466 | L5[WIO] C8 - L4[IO] M10 C32 M5X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M10 
[  5] Utilization = 0.03 | pJ/Compute =  172.824 | L5[WIO] M50 C4 - L4[IO] M2 C512 M5X - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  6] Utilization = 0.05 | pJ/Compute =  172.824 | L5[WIO] M50 C4 - L

[  3] Utilization = 0.05 | pJ/Compute =  165.513 | L5[WIO] M5 C64 - L4[IO] M20 C2 M5X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M2 
[  5] Utilization = 0.05 | pJ/Compute =  164.052 | L5[WIO] M2 C64 - L4[IO] M25 C4 M10X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  4] Utilization = 0.05 | pJ/Compute =  164.036 | L5[WIO] M2 C64 - L4[IO] M25 C4 M10X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  3] Utilization = 0.05 | pJ/Compute =  164.029 | L5[WIO] M2 C64 - L4[IO] M25 C4 M10X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M2 
[  7] Utilization = 0.05 | pJ/Compute =  164.052 | L5[WIO] M2 C64 - L4[IO] M25 C2 M10X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  3] Utilization = 0.05 | pJ/Compute =  164.665 | L5[WIO] C256 - L4[IO] M25 M8X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M5 
[  2] Utilization = 0.05 | pJ/Compute =  162.393 | L5[WIO] C8 - L4[IO] M5 C512 M8X - L3[] Q1 M5Y - L2[I] Q1 - L1[W] Q1 - L0[O] M5 
[  0] Utilization = 0.05 | pJ/Compute =  162.852 | L5[WIO] C16 - L

[  3] Utilization = 0.05 | pJ/Compute =  162.405 | L5[WIO] C8 - L4[IO] M5 C128 M10X - L3[] Q1 M5Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  7] Utilization = 0.05 | pJ/Compute =  162.406 | L5[WIO] C8 - L4[IO] M5 C64 M10X - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  7] Utilization = 0.05 | pJ/Compute =  162.332 | L5[WIO] C4 - L4[IO] M5 C256 M10X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M10 
[  1] Utilization = 0.05 | pJ/Compute =  162.303 | L5[WIO] M5 - L4[IO] C128 M10X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M10 
[  7] Utilization = 0.05 | pJ/Compute =  162.384 | L5[WIO] M2 - L4[IO] M4 C1024 M5X - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M5 
[  4] Utilization = 0.05 | pJ/Compute =  162.313 | L5[WIO] C2 - L4[IO] M5 C256 M10X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C8 - L0[O] M10 
[  3] Utilization = 0.05 | pJ/Compute =  162.432 | L5[WIO] C2 - L4[IO] M10 C1024 M4X - L3[] Q1 M5Y - L2[I] Q1 - L1[W] C2 - L0[O] M5 
[  4] Utilization = 0.05 | pJ/Compute =  162.303 | L5[WIO] Q



Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.376

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.399

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.406

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.332

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.384

Summary stats for best mapping found by mapper:
  Utilization = 0.05 | pJ/Compute =  162.336
Execution time: 128.90579557418823s


In [7]:
def convert_to_mapping(base_dir, timeloop_dir, top_dir, sub_dir, layer_idx, topk_idx):
    xml_file = os.path.join(base_dir, timeloop_dir, 'scheduling', sub_dir, "layer{}".format(layer_idx), \
                            "timeloop-mapper-topk{}.map+stats.xml".format(topk_idx))
    workload_file = os.path.join(base_dir, top_dir, sub_dir, "{}_layer{}.yaml".format(sub_dir, layer_idx))
    # print(workload_file)
    with open(workload_file, 'r') as f:
        workload_info = yaml.safe_load(f)
    if 'M' in workload_info['problem']['instance']:
        dw = False
    else:
        dw = True
    arch_constraint_file = os.path.join(base_dir, timeloop_dir, 'constraints_dw' if dw else 'constraints' , \
                                        'eyeriss_like_arch_constraints.yaml' if (configuration_dict['TEMPLATE_DESIGN'] == 'eyeriss_like' \
                                                                                 or configuration_dict['TEMPLATE_DESIGN'] == 'eyeriss_like_hbm2') \
                                        else 'simple_output_stationary_arch_constraints.yaml' if configuration_dict['TEMPLATE_DESIGN'] == 'output_stationary' \
                                        else 'simple_weight_stationary_arch_constraints.yaml')
    # print(layer_idx, dw)
    mapping = xml2mapping(xml_file, workload_file, arch_constraint_file, dw)
    with open(os.path.join(base_dir, timeloop_dir, 'scheduling',sub_dir, "layer{}".format(layer_idx), \
                           "mapping{}.yaml".format(topk_idx)), 'w') as f:
        _ = yaml.dump({'mapping': mapping}, f)
        
for layer_idx in unique_layers:
    for k in range(1, topk + 1):
        convert_to_mapping(base_dir, timeloop_dir, top_dir, sub_dir, layer_idx, k)

In [8]:
def get_cmd_model(workload_info, layer_id, base_dir, timeloop_dir, sub_dir, top_dir):
    cwd = f"{base_dir/timeloop_dir/'evaluation'/sub_dir/f'layer{layer_id}'}"
    if 'M' in workload_info['problem']['instance']:
        constraint_pth = base_dir/timeloop_dir/'constraints/*.yaml'
    else:
        # depthwise
        constraint_pth = base_dir/timeloop_dir/'constraints_dw/*.yaml'

    timeloopcmd = f"timeloop-model " \
                  f"{base_dir/timeloop_dir/'arch/baseline.yaml'} " \
                  f"{base_dir/timeloop_dir/'arch/components/*.yaml'} " \
                  f"{base_dir/timeloop_dir/'scheduling'/sub_dir/f'layer{layer_id}/mapping1.yaml'} " \
                  f"{base_dir/top_dir/sub_dir/sub_dir}_layer{layer_id}.yaml "
    return [cwd, timeloopcmd]

cwd_list = []
cmd_list = []
for layer_id in unique_layers:
    workload_path = os.path.join(base_dir, top_dir, sub_dir, '{}_layer{}.yaml'.format(sub_dir, layer_id))
    with open(workload_path, 'r') as f:
        workload_info = yaml.safe_load(f)
    [cwd, cmd] = get_cmd_model(workload_info, layer_id, base_dir, timeloop_dir, sub_dir, top_dir)
    cwd_list.append(cwd)
    cmd_list.append(cmd)
    
if not os.path.exists(os.path.join(base_dir, timeloop_dir, 'evaluation', sub_dir)):
    os.mkdir(os.path.join(base_dir, timeloop_dir, 'evaluation', sub_dir))
for cwd, cmd in zip(cwd_list, cmd_list):
    print("Executing cmd: {}".format(cmd))
    try:
        os.chdir(cwd)
    except:
        os.mkdir(cwd)
        os.chdir(cwd)
    os.system(cmd)
os.chdir(base_dir)

Executing cmd: timeloop-model /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/*.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/scheduling/alexnet_batch1/layer1/mapping1.yaml /home/workspace/scheduling/workloads/alexnet_batch1/alexnet_batch1_layer1.yaml 
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver0/scheduling/alexnet_batch1/layer1/mapping1.yaml /home/workspace/scheduling/workloads/alexnet_batch1/alexnet_batch1_layer1.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.72 | pJ/Compute =    6.267
Executing cmd: timeloop-model /home/workspace/scheduling/designs/ey