## Step 3: Simulated Annealing for Interlayer Dependency

Run simulated annealing to run joint search over multiple layers. 

In [None]:
%load_ext autoreload
%autoreload 2

import os
import yaml
import shutil
from pathlib import Path

First, we have to define the architecture and the workload similar as before..

In [None]:
configuration_dict = {}

# template design (with constraints and memory hierarchy representing "dataflow")
configuration_dict['TEMPLATE_DESIGN'] = 'eyeriss_like'

# number of bits used for I/O/W; we assume integer
configuration_dict['WORDBITS'] = 16

# DRAM bandwidth setting: words / cycle (not bits / cycle)
configuration_dict['DRAM_READ_BANDWIDTH'] = 32
configuration_dict['DRAM_WRITE_BANDWIDTH'] = 32

# SRAM setting
# - do we have a single shared glb or multiple glbs for each datatype? 
# - for each glb (if shared, just one), define depth/width/#banks and bandwidths
configuration_dict['SRAM_SHARED'] = True
configuration_dict['SRAM_DEPTH'] = [2 ** 13]
configuration_dict['SRAM_WIDTH'] = [2 ** 7]
configuration_dict['SRAM_BANKS'] = [32]                     # SRAM width and SRAM banks define the maximum possible bandwidth
configuration_dict['SRAM_READ_BANDWIDTH'] = [32]
configuration_dict['SRAM_WRITE_BANDWIDTH'] = [32]

# PE array setting
# - shape of PE array X x Y
# - whether a PE has a shared scratchpad or separate scratchpads for each datatype
configuration_dict['PE_X'] = 14
configuration_dict['PE_Y'] = 12
configuration_dict['PE_SPAD_SHARED'] = False
configuration_dict['PE_SPAD_DEPTH'] = [192, 12, 16]         # Weight, IFmap, OFmap
configuration_dict['PE_SPAD_WIDTH'] = [16, 16, 16]

# Cryptographic engine setting
# - type of cryptographic engine + dram (LPDDR4 + AES-GCM)
# - cycle for AES-GCM 
# - whether the cryptographic engines are shared among all datatypes or assigned to each datatype
configuration_dict['CRYPT_ENGINE_TYPE'] = 'effective_lpddr4_aesgcm'
configuration_dict['CRYPT_ENGINE_CYCLE_PER_BLOCK'] = 11            # avg. cycle/128bit

configuration_dict['CRYPT_ENGINE_SHARED'] = False
configuration_dict['CRYPT_ENGINE_COUNT'] = [1, 1, 1]

configuration_dict['EFFECTIVE_CONSERVATIVE'] = True

# Create directory for this configuration if it doesn't exist already
# iterate through design folders to check if any pre-exisiting folder
design_dir = 'designs/{}'.format(configuration_dict['TEMPLATE_DESIGN'])
arch_dir = None
total_vers = 0
for path in os.listdir(design_dir):
    if path != 'template' and os.path.isdir(os.path.join(design_dir, path)):
        try:
            with open(os.path.join(design_dir, path, 'config.yaml'), 'r') as f:
                config_file = yaml.safe_load(f)
            total_vers += 1
            if config_file == configuration_dict:
                arch_dir = path
                print("Pre-existing folder found. Setting the arch_dir to {}".format(arch_dir))
                break
        except:
            print("No config.yaml file in the directory {}".format(str(os.path.join(design_dir, path))))
            
if arch_dir == None:
    raise NameError("Architecture is not found!")

..else if you know which folder you want to use, specify here instead of running the above cell

In [None]:
design_dir = 'designs/{}'.format('eyeriss_like') # define your design name here

arch_ver = 0
arch_dir = 'ver{}'.format(arch_ver)              # sub directory under designs/{name}/{arch_dir}
with open(os.path.join(design_dir, arch_dir, 'config.yaml'), 'r') as f:
    configuration_dict = yaml.safe_load(f)
print("Setting the architecture directory to: {}".format(os.path.join(design_dir, arch_dir)))
print("Printing configuration:")
for key, value in configuration_dict.items():
    print("{}: {}".format(key, value))

Define the workload here. Skip the pytorch2timeloop conversion (should be done when generating loopnests)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.models as model_zoo

import pytorch2timeloop as pytorch2timeloop

# Note: this version only supports nn.Conv2d (both normal convs and depthwise/pointwise convs) and nn.Linear

# AlexNet
# model_name = 'alexnet'
# net = model_zoo.alexnet(pretrained=False)

# ResNet18
# model_name = 'resnet18'
# net = model_zoo.resnet18(pretrained=False)

# MobilenetV2
model_name = 'mobilenet_v2'
net = model_zoo.mobilenet_v2(pretrained=False)

# Input / Batch info
input_size = (3, 224, 224)
batch_size = 1

print(net)

top_dir = 'workloads'
sub_dir = '{}_batch{}'.format(model_name, batch_size)

In [None]:
base_dir = Path(os.getcwd())
timeloop_dir = 'designs/{}/{}'.format(configuration_dict['TEMPLATE_DESIGN'], arch_dir)

n_layers = 0
layer_dict = {}
layer_duplicate_info = {}
unique_layers = []
for module in net.modules():
    if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
        n_layers += 1
        if n_layers not in layer_dict.keys():
            workload_path = os.path.join(base_dir, top_dir, sub_dir, '{}_layer{}.yaml'.format(sub_dir, n_layers))
            with open(workload_path, 'r') as f:
                workload_info = yaml.safe_load(f)
            layer_dict[n_layers] = workload_info
        
        # identify the earliest duplicate layer
        for key in range(1, n_layers):
            if layer_dict[key] == layer_dict[n_layers]:
                layer_duplicate_info[n_layers] = key
                break
        if n_layers not in layer_duplicate_info:
            unique_layers.append(n_layers)
            
print(layer_duplicate_info)
print(unique_layers)

In [None]:
from pytorch_layer_dependency_utils import BackpropGraph

workload_path = os.path.join(base_dir, top_dir, sub_dir, 'layer_info_interlayer.yaml')

try:
    with open(workload_path, 'r') as f:
        layer_info = yaml.safe_load(f)
    for layer_idx in range(1, n_layers + 1):
        print(layer_idx, layer_info[layer_idx])
except:

    graph = BackpropGraph(net, [1, input_size[0], input_size[1], input_size[2]])
    consecutive_dict, dependent_dict = graph.get_dependency_info()

    # construct layer_info
    layer_info = {}
    for layer_idx in range(1, n_layers + 1):
        info = {}
        if layer_idx in unique_layers:
            info['layer_id_for_timeloop'] = layer_idx
        else:
            info['layer_id_for_timeloop'] = layer_duplicate_info[layer_idx]
        info['prev_layer'] = []
        info['next_layer'] = []
        info['dependent_prev_layer'] = []
        info['dependent_next_layer'] = []
        layer_info[layer_idx] = info

    for layer_idx in range(1, n_layers + 1):
        consecutive = consecutive_dict[layer_idx]
        dependent = dependent_dict[layer_idx]
        layer_info[layer_idx]['next_layer'].extend(consecutive)
        for i in consecutive:
            layer_info[i]['prev_layer'].append(layer_idx)
        if len(dependent) > 0 and not ignore_interlayer:
            layer_info[layer_idx]['dependent_next_layer'].extend(dependent)   
            for i in dependent:
                layer_info[i]['dependent_prev_layer'].append(layer_idx)

    for layer_idx in range(1, n_layers + 1):
        print(layer_idx, layer_info[layer_idx])

    # store therresults - this can take long for deep models like MobileNet..
    # f"{base_dir/top_dir/sub_dir/sub_dir}_layer{layer_id}.yaml "
    with open(workload_path, 'w') as f:
        _ = yaml.dump(layer_info, f)


### Prepare the folders

In [None]:
if not os.path.exists(os.path.join(base_dir, timeloop_dir, 'joint_topk')):
    os.mkdir(os.path.join(base_dir, timeloop_dir, 'joint_topk'))
if not os.path.exists(os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir)):
    os.mkdir(os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir))

In [None]:
# Define top-k you used for timeloop-topk
topk = 6

In [None]:
# Copy necessary files to the joint_topk folder
from authblock_assignment import AuthBlockAssignment

base_cost_dict, base_rehash_cost_dict, base_block_info_dict = AuthBlockAssignment(n_layers, layer_info, \
                                                                                  base_dir, timeloop_dir, top_dir, sub_dir, \
                                                                                  configuration_dict, \
                                                                                  mode="search", \
                                                                                  joint=False, return_cost_dict=True)

baseline_energy = 0
baseline_latency = 0
baseline_add_mem_traffic = 0

for key in base_cost_dict:
    baseline_energy += base_cost_dict[key]['total_energy'] / 10**6
    baseline_latency += base_cost_dict[key]['total_latency']
    baseline_add_mem_traffic += base_cost_dict[key]['add_memory_traffic']
for key in base_rehash_cost_dict:
    baseline_energy += base_rehash_cost_dict[key]['total_energy'] / 10**6
    baseline_latency += base_rehash_cost_dict[key]['total_latency']
    baseline_add_mem_traffic += base_rehash_cost_dict[key]['add_memory_traffic']   
    
for layer_idx in range(1, n_layers + 1):
    work_dir = os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, 'layer{}'.format(layer_idx))
    if not os.path.exists(work_dir):
        os.mkdir(work_dir)
        
    # """
    for k in range(1, topk + 1):
        if not os.path.exists(os.path.join(work_dir, 'eval{}'.format(k))):
            os.mkdir(os.path.join(work_dir, 'eval{}'.format(k)))
        layer_id_for_timeloop = layer_info[layer_idx]['layer_id_for_timeloop']
        cwd = f"{base_dir/timeloop_dir/'joint_topk'/sub_dir/f'layer{layer_idx}'/f'eval{k}'}"
        if 'M' in workload_info['problem']['instance']:
            constraint_pth = base_dir/timeloop_dir/'constraints/*.yaml'
        else:
            # depthwise
            constraint_pth = base_dir/timeloop_dir/'constraints_dw/*.yaml'

        timeloopcmd = f"timeloop-model " \
              f"{base_dir/timeloop_dir/'arch/baseline.yaml'} " \
              f"{base_dir/timeloop_dir/'arch/components/*.yaml'} " \
              f"{base_dir/timeloop_dir/'scheduling'/sub_dir/f'layer{layer_id_for_timeloop}'/f'mapping{k}.yaml'} " \
              f"{base_dir/top_dir/sub_dir/sub_dir}_layer{layer_idx}.yaml "
        
        try:
            os.chdir(cwd)
        except:
            os.mkdir(cwd)
            os.chdir(cwd)
        os.system(timeloopcmd)
        os.chdir(base_dir)
    # """

    # copy mapping1's result into here
    shutil.copy(os.path.join(work_dir, 'eval1', 'timeloop-model.map+stats.xml'), work_dir)


Failed to run Accelergy. Did you install Accelergy or specify ACCELERGYPATH correctly? Or check accelergy.log to see what went wrong
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer8/mapping1.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer8.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.25 | pJ/Compute =   49.312
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SR

Utilization = 0.75 | pJ/Compute =   13.428
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer7/mapping3.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer10.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.86 | pJ/Compute =   13.389
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batc

Utilization = 0.33 | pJ/Compute =   16.091
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer12/mapping5.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer12.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.50 | pJ/Compute =   16.110
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Failed to run Accelergy. Did you install Accelergy or specify ACCELERGYPATH correctly? Or check accelergy.log to see what went wrong
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer15/mapping1.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer15.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.17 | pJ/Compute =   15.849
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_

Utilization = 0.50 | pJ/Compute =   50.402
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer14/mapping3.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer17.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.53 | pJ/Compute =   50.402
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Utilization = 0.67 | pJ/Compute =   12.096
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer13/mapping5.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer19.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.67 | pJ/Compute =   12.497
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Failed to run Accelergy. Did you install Accelergy or specify ACCELERGYPATH correctly? Or check accelergy.log to see what went wrong
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer22/mapping1.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer22.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.67 | pJ/Compute =    9.486
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_

Utilization = 0.50 | pJ/Compute =   10.781
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer24/mapping3.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer24.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.33 | pJ/Compute =   10.771
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Utilization = 0.50 | pJ/Compute =   52.581
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer23/mapping5.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer26.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.32 | pJ/Compute =   61.305
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Failed to run Accelergy. Did you install Accelergy or specify ACCELERGYPATH correctly? Or check accelergy.log to see what went wrong
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer23/mapping1.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer29.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.50 | pJ/Compute =   52.581
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_

Utilization = 0.67 | pJ/Compute =    9.545
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer22/mapping3.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer31.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.67 | pJ/Compute =    9.887
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Utilization = 0.71 | pJ/Compute =   12.830
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer33/mapping5.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer33.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.33 | pJ/Compute =   13.252
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Failed to run Accelergy. Did you install Accelergy or specify ACCELERGYPATH correctly? Or check accelergy.log to see what went wrong
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer36/mapping1.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer36.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.75 | pJ/Compute =   10.584
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_

Utilization = 0.50 | pJ/Compute =   52.581
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer35/mapping3.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer38.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.25 | pJ/Compute =   52.581
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Utilization = 1.00 | pJ/Compute =   12.088
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer34/mapping5.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer40.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.57 | pJ/Compute =   13.701
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Failed to run Accelergy. Did you install Accelergy or specify ACCELERGYPATH correctly? Or check accelergy.log to see what went wrong
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer43/mapping1.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer43.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.33 | pJ/Compute =   10.518
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_

Utilization = 1.00 | pJ/Compute =   10.249
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer45/mapping3.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer45.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.83 | pJ/Compute =   10.415
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Utilization = 0.38 | pJ/Compute =   61.256
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer44/mapping5.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer47.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.25 | pJ/Compute =   69.979
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

Failed to run Accelergy. Did you install Accelergy or specify ACCELERGYPATH correctly? Or check accelergy.log to see what went wrong
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer44/mapping1.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer50.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.44 | pJ/Compute =   61.256
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_

Utilization = 0.33 | pJ/Compute =   10.814
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_batch1/layer52/mapping3.yaml /home/workspace/scheduling/workloads/mobilenet_v2_batch1/mobilenet_v2_batch1_layer52.yaml --oprefix timeloop-model. -o ./ > timeloop-model.accelergy.log 2>&1
Utilization = 0.33 | pJ/Compute =   10.954
execute:/usr/local/bin/accelergy /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/baseline.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_RF.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/arch/components/smartbuffer_SRAM.yaml /home/workspace/scheduling/designs/eyeriss_like/ver1/scheduling/mobilenet_v2_bat

### Run simulated annealing

Define the hyperparameters here:

In [None]:
initial_temp = 100
final_temp = 0.1
n_iters = 1000

cooling_scheduler = 'linear'

# Define layer idx if you don't want to search them for simulated anneling 
# (e.g., non-conv layers in AlexNet)
layers_exclude_from_search = []

# TODO: this option should not be used for ResNet18 - bug with dependent layer partial update due to residuals
use_partial_update = True

Run simulated annealing

In [None]:
import random
import time
import csv
import copy
import math

from authblock_assignment import PartialUpdateAuthBlockAssignment

csv_header = ['Iter', 'Temp', \
              'Cost (J x cycles)', 'Total Latency (cycles)', 'Total Energy (uJ)', 'Additional Off-chip Traffic (bits)']
logs = []

solution_cost_dict = copy.deepcopy(base_cost_dict)
solution_rehash_cost_dict = copy.deepcopy(base_rehash_cost_dict)
solution_block_info_dict = copy.deepcopy(base_block_info_dict)

current_cost_dict = copy.deepcopy(base_cost_dict)
current_rehash_cost_dict = copy.deepcopy(base_rehash_cost_dict)
current_block_info_dict = copy.deepcopy(base_block_info_dict)

solution_state = [1] * n_layers
current_state = [1] * n_layers
best_state = [1] * n_layers

i = 0
cost_best = baseline_energy * baseline_latency

layers_for_search = []
for idx in range(1, n_layers + 1):
    if len(layer_info[idx]['dependent_next_layer']) > 0 or len(layer_info[idx]['dependent_prev_layer']) > 0:
        if idx not in layers_exclude_from_search:
            layers_for_search.append(idx)
            
start_time = time.time()
while i < n_iters + 1:
    # temperature
    if cooling_scheduler == 'linear':
        current_temp = final_temp + (initial_temp - final_temp) / float(n_iters) * float(n_iters - i)
    elif cooling_scheduler == 'cosine':
        current_temp = final_temp + 0.5 * (initial_temp - final_temp) * (1 + math.cos(float(i) * math.pi / float(n_iters)))
    elif cooling_scheduler == 'quadratic':
        current_temp = final_temp + (initial_temp - final_temp) * (float(n_iters - i) / float(n_iters))**2
    
    layer2change = random.choice(layers_for_search)
    neighbor_loopnest = random.choice(list(range(1, topk + 1)))
    
    current_state[layer2change - 1] = neighbor_loopnest
    stats_file = os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, "layer{}".format(layer2change), \
                              "eval{}".format(neighbor_loopnest), "timeloop-model.stats.txt")
    with open(stats_file, 'r') as f:
        lines = f.read().split('\n')[-200:]
        for line in lines:
            if line.startswith('Energy'):
                energy = eval(line.split(': ')[1].split(' ')[0]) * float(10**6) # micro to pico
                # print(energy)
            elif line.startswith('Cycles'):
                cycle = eval(line.split(': ')[1])
    current_cost_dict[layer2change]['timeloop_energy'] = energy
    current_cost_dict[layer2change]['timeloop_cycle'] = cycle
    
    xml_file = os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, "layer{}".format(layer2change), \
                            "eval{}".format(neighbor_loopnest), "timeloop-model.map+stats.xml")
    shutil.copy(xml_file, os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, 'layer{}'.format(layer2change)))
    
    if use_partial_update:
        subset_layers = [layer2change]
        subset_layers.extend(layer_info[layer2change]['prev_layer'])
        subset_layers.extend(layer_info[layer2change]['next_layer'])
        
        current_cost_dict, current_rehash_cost_dict, current_block_info_dict = \
        PartialUpdateAuthBlockAssignment(n_layers, layer_info, \
                                         base_dir, timeloop_dir, top_dir, sub_dir, \
                                         configuration_dict, mode="search", \
                                         prev_block_info_dict=current_block_info_dict, subset_layers=subset_layers, \
                                         prev_cost_dict=current_cost_dict, prev_rehash_cost_dict=current_rehash_cost_dict)
        
    else:
        current_cost_dict, current_rehash_cost_dict, current_block_info_dict = \
        PartialUpdateAuthBlockAssignment(n_layers, layer_info, \
                                         base_dir, timeloop_dir, top_dir, sub_dir, \
                                         configuration_dict, \
                                         mode="search", \
                                         prev_block_info_dict=None, subset_layers=[], \
                                         prev_cost_dict=current_cost_dict, prev_rehash_cost_dict=None)
        
    solution_energy, solution_latency, solution_add_mem_traffic = 0, 0, 0
    for key in solution_cost_dict:
        solution_energy += solution_cost_dict[key]['total_energy'] / 10**6
        solution_latency += solution_cost_dict[key]['total_latency']
        solution_add_mem_traffic += solution_cost_dict[key]['add_memory_traffic']
    for key in solution_rehash_cost_dict:
        solution_energy += solution_rehash_cost_dict[key]['total_energy'] / 10**6
        solution_latency += solution_rehash_cost_dict[key]['total_latency']
        solution_add_mem_traffic += solution_rehash_cost_dict[key]['add_memory_traffic']
    
    current_energy, current_latency, current_add_mem_traffic = 0, 0, 0
    for key in current_cost_dict:
        current_energy += current_cost_dict[key]['total_energy'] / 10**6
        current_latency += current_cost_dict[key]['total_latency']
        current_add_mem_traffic += current_cost_dict[key]['add_memory_traffic']
    for key in current_rehash_cost_dict:
        current_energy += current_rehash_cost_dict[key]['total_energy'] / 10**6
        current_latency += current_rehash_cost_dict[key]['total_latency']
        current_add_mem_traffic += current_rehash_cost_dict[key]['add_memory_traffic']
    
    cost_solution = solution_energy * solution_latency
    cost_current = current_energy * current_latency
    cost_diff = (cost_solution - cost_current) / (10 ** 6 * n_layers)
    
    if cost_current < cost_best:
        best_state = copy.deepcopy(current_state)
        cost_best = cost_current
        print("Found best so far: ", best_state, " .. updating cost_best: {}".format(cost_best))
        
        # for i in range(1, n_layers + 1):
        #     print(current_cost_dict[i]['total_energy'])
        # print("-----------------------------------------------")
        # for key in current_rehash_cost_dict.keys():
        #     print(current_rehash_cost_dict[key]['total_energy'])
        #    
        # print(current_latency, current_energy)
        # break
        
    if cost_diff > 0 or (random.uniform(0, 1) < math.exp(cost_diff / current_temp)):
        solution_state = copy.deepcopy(current_state)
        solution_cost_dict = copy.deepcopy(current_cost_dict)
        solution_rehash_cost_dict = copy.deepcopy(current_rehash_cost_dict)
        solution_block_info_dict = copy.deepcopy(current_block_info_dict)
    else:
        # roll-back to the solution state
        xml_file = os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, "layer{}".format(layer2change), \
                                  "eval{}".format(solution_state[layer2change - 1]), "timeloop-model.map+stats.xml")
        shutil.copy(xml_file, os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, 'layer{}'.format(layer2change)))
        current_state = copy.deepcopy(solution_state)
        current_cost_dict = copy.deepcopy(solution_cost_dict)
        current_rehash_cost_dict = copy.deepcopy(solution_rehash_cost_dict)
        current_block_info_dict = copy.deepcopy(solution_block_info_dict)
    
    solution_energy, solution_latency, solution_add_mem_traffic = 0, 0, 0
    for key in solution_cost_dict:
        solution_energy += solution_cost_dict[key]['total_energy'] / 10**6
        solution_latency += solution_cost_dict[key]['total_latency']
        solution_add_mem_traffic += solution_cost_dict[key]['add_memory_traffic']
    for key in solution_rehash_cost_dict:
        solution_energy += solution_rehash_cost_dict[key]['total_energy'] / 10**6
        solution_latency += solution_rehash_cost_dict[key]['total_latency']
        solution_add_mem_traffic += solution_rehash_cost_dict[key]['add_memory_traffic']
        
    # print("Solution state: ", solution_state)
    print("Current iteration: {} (temperature: {:.2f}) -- Latency: {} ({:.2f}% faster), Energy: {} uW ({:.2f}% lower), Add Mem Traffic: {} bits ({:.2f}% smaller)"\
          .format(i+1, current_temp, solution_latency, (baseline_latency - solution_latency) / float(baseline_latency) * 100. , \
                  solution_energy, (baseline_energy - solution_energy) / baseline_energy * 100., \
                  solution_add_mem_traffic, (baseline_add_mem_traffic - solution_add_mem_traffic) / float(baseline_add_mem_traffic) * 100.))

    curr_log = [(i + 1), current_temp, cost_solution, solution_latency, solution_energy, solution_add_mem_traffic]
    logs.append(curr_log)
    i += 1
    
    if current_temp < final_temp:
        break
        
print("Execution time: {}s".format(time.time() - start_time))

# dump to csv file
with open(os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, 'SA_{}_top{}_summary.csv'.format(cooling_scheduler, topk)), 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(csv_header)
    writer.writerows(logs)
    
# dump best state & solution state to yaml file
state = {'best': best_state, 'final': solution_state}
with open(os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, 'SA_{}_state_annealing_only.yaml'.format(cooling_scheduler)), 'w') as f:
    _ = yaml.dump(state, f)
        

Copy the best states

In [None]:
with open(os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, 'SA_{}_state.yaml'.format('linear')), 'r') as f:
    states = yaml.safe_load(f)
    best_state = states['best']

# move the best solution result
for layer_idx in range(1, n_layers + 1):
    loopnest_id = best_state[layer_idx - 1]
    src = os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, 'layer{}'.format(layer_idx), \
                       'eval{}'.format(loopnest_id))
    src_files = os.listdir(src)
    for file in src_files:
        file_name = os.path.join(src, file)
        if os.path.isfile(file_name):
            shutil.copy(file_name, os.path.join(os.path.join(base_dir, timeloop_dir, 'joint_topk', sub_dir, 'layer{}'.format(layer_idx))))
            

Generate stats.csv

In [None]:
from authblock_assignment import AuthBlockAssignment

AuthBlockAssignment(n_layers, layer_info, \
                    base_dir, timeloop_dir, top_dir, sub_dir, \
                    configuration_dict, \
                    mode="search", \
                    joint=True, generate_summary=True, return_cost_dict=False)

In [None]:
cost, rehash, _ = AuthBlockAssignment(n_layers, layer_info, \
                    base_dir, timeloop_dir, top_dir, sub_dir, \
                    configuration_dict, \
                    mode="search", \
                    joint=True, generate_summary=False, return_cost_dict=True)

In [None]:
for i in range(1, n_layers+1):
    print(cost[i]['timeloop_energy'], current_cost_dict[i]['timeloop_energy'])

In [None]:
for key in rehash.keys():
    print(rehash[key]['total_energy'])