In [1]:
import pandas as pd
import numpy as np
from loaders import *

## Eyeriss

In [2]:
show_config(ConfigRegistry.EYERISS_ARCH)
show_config(ConfigRegistry.EYERISS_COMPONENTS_DIR)
result = run_accelergy(ConfigRegistry.EYERISS_ARCH, ConfigRegistry.EYERISS_COMPONENTS_DIR)
print(result.ert)

architecture:
  # Architecture Description
  version: 0.3
  subtree:
    - name: system
      local:
        - name: DRAM
          class: DRAM
          attributes:
            type: LPDDR4
            width: 64
            block-size: 4
            word-bits: 16
      subtree:
        - name: eyeriss
          attributes:
            technology: 45nm
          local:
            - name: shared_glb
              class: smartbuffer_SRAM
              attributes:
                memory_depth: 16384
                memory_width: 64
                n_banks: 32
                block-size: 4
                word-bits: 16
                read_bandwidth: 16
                write_bandwidth: 16
            - name: DummyBuffer[0..13] # for better mapping
              class: regfile
              attributes:
                depth: 16
                width: 16
                word-bits: 16
                block-size: 1
                meshX: 14
          subtree:
          - name: PE[0..167]
    

In [21]:
def parse_stat(layer_stat):
    """
    Total number of compute in this layer
    Number of cycles in this layer
    Energy used in this layer in Joule
    Energy / Compute in picoJoule
    """
    lines = layer_stat.split("\n")
    print('lines=',len(lines))
    if len(lines) < 25:
        print(lines)
        return (None, None, None, None)
    compute = int(lines[-18].split("=")[-1].strip())
    cycle = int(lines[-23].split(":")[-1].strip())
    energy = float(lines[-22].split(":")[-1].strip()[:-3])
    return compute, cycle, energy * 10**(-6), energy * 10**(-6)  / compute * 10**(12)

### Example layer

In [4]:
example_layer_stats, example_layer_mapping = run_timeloop_mapper(
    ConfigRegistry.EYERISS_ARCH, ConfigRegistry.EYERISS_COMPONENTS_DIR,
    ConfigRegistry.EYERISS_ARCH_CONSTRAINTS,
    ConfigRegistry.EYERISS_MAP_CONSTRAINTS,
    ConfigRegistry.VGG02_layer5, ConfigRegistry.DEFAULT_MAPPER_SETTING
)

input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 120
  Factorization options along problem dimension M = 495
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization o

[  4] Utilization = 0.57 | pJ/Compute =   10.807 | L5[WIO] Q28 M4 C128 - L4[IO] M2 P56 M4X Q2X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M2 
[  0] Utilization = 0.07 | pJ/Compute =  139.321 | L5[WIO] Q56 M4 - L4[IO] M16 P56 C128 M2X - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  5] Utilization = 0.14 | pJ/Compute =   37.150 | L5[WIO] Q7 M64 C8 - L4[IO] M2 P56 C16 Q8X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M2 
[  2] Utilization = 0.19 | pJ/Compute =   37.164 | L5[WIO] Q7 M64 C8 - L4[IO] M2 P56 C4 Q8X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M2 
[  7] Utilization = 0.57 | pJ/Compute =   23.291 | L5[WIO] Q7 M4 C4 - L4[IO] M2 P56 C16 Q8X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  6] Utilization = 0.19 | pJ/Compute =   49.055 | L5[WIO] Q14 M8 C16 - L4[IO] M16 P56 C4 M2X Q4X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  3] Utilization = 0.57 | pJ/Compute =   23.291 | L5[WIO] Q7 M4 C4 - L4[IO] M2 P56 C32 Q8X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W

[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =    5.811


[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.


In [5]:
parse_stat(example_layer_stats)

(924844032, 5505024, 0.005374359999999999, 5.8110987518379735)

### Squeeze Nets

In [6]:
squeeze_layers = ConfigRegistry.SQUEEZE_NET_LAYERS
squeeze_stat = []
for i in range(len(squeeze_layers)):
    layer_stats, layer_mapping = run_timeloop_mapper(
        ConfigRegistry.EYERISS_ARCH, ConfigRegistry.EYERISS_COMPONENTS_DIR,
        ConfigRegistry.EYERISS_ARCH_CONSTRAINTS,
        ConfigRegistry.EYERISS_MAP_CONSTRAINTS,
        squeeze_layers[i+1], ConfigRegistry.DEFAULT_MAPPER_SETTING
    )
    squeeze_stat.append(layer_stats)

input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 4
  Factorization options along problem dimension M = 210
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization opt

[  0] Utilization = 0.21 | pJ/Compute =   11.901 | L5[WIO] Q37 - L4[IO] P111 Q3X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M16 
[  3] Utilization = 0.21 | pJ/Compute =   15.276 | L5[WIO] Q37 C3 - L4[IO] P111 Q3X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  2] Utilization = 0.21 | pJ/Compute =   56.243 | L5[WIO] Q37 - L4[IO] P111 C3 Q3X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  1] Utilization = 0.64 | pJ/Compute =   13.066 | L5[WIO] Q37 M4 - L4[IO] P111 M4X Q3X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  5] Utilization = 0.21 | pJ/Compute =   16.623 | L5[WIO] Q111 M4 - L4[IO] M4 P111 M4X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  6] Utilization = 0.07 | pJ/Compute =  146.649 | L5[WIO] Q111 M4 - L4[IO] M4 P111 C3 M4X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  4] Utilization = 0.07 | pJ/Compute =   16.623 | L5[WIO] Q111 M4 - L4[IO] M4 P111 M4X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] Q1 
[  7] Utilization = 0.07 | pJ/C



Summary stats for best mapping found by mapper:
  Utilization = 0.86 | pJ/Compute =   11.930
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 84
  Factorization options along problem dimension M = 70
  Factorization options along pr

[  2] Utilization = 0.10 | pJ/Compute =   92.989 | L5[WIO] Q55 M8 C2 - L4[IO] P55 M2X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  0] Utilization = 0.05 | pJ/Compute =   92.973 | L5[WIO] Q55 M8 C2 - L4[IO] P55 M2X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  4] Utilization = 0.02 | pJ/Compute =  220.194 | L5[WIO] Q55 M8 C2 - L4[IO] P55 C2 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  5] Utilization = 0.05 | pJ/Compute =  220.202 | L5[WIO] Q55 M8 C2 - L4[IO] P55 C2 M2X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  7] Utilization = 0.10 | pJ/Compute =  220.218 | L5[WIO] Q55 M8 C2 - L4[IO] P55 C2 M2X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  6] Utilization = 0.24 | pJ/Compute =   47.788 | L5[WIO] Q11 C4 - L4[IO] M2 P55 C4 M2X Q5X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  1] Utilization = 0.01 | pJ/Compute =  168.551 | L5[WIO] Q55 M2 C16 - L4[IO] M2 P55 C4 - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  3] Utilization = 0.48 | pJ/Compute =   47.79



Summary stats for best mapping found by mapper:
  Utilization = 0.52 | pJ/Compute =   18.874
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 35
  Factorization options along problem dimension M = 210
  Factorization options along p

[  5] Utilization = 0.12 | pJ/Compute =   47.007 | L5[WIO] Q11 C2 - L4[IO] P55 C8 Q5X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  0] Utilization = 0.02 | pJ/Compute =  151.624 | L5[WIO] Q55 M4 - L4[IO] M4 P55 C16 M4X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  2] Utilization = 0.12 | pJ/Compute =   47.007 | L5[WIO] Q11 C2 - L4[IO] P55 C2 Q5X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  1] Utilization = 0.02 | pJ/Compute =  151.624 | L5[WIO] Q55 M4 - L4[IO] M4 P55 C2 M4X - L3[] Q1 - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  6] Utilization = 0.12 | pJ/Compute =   47.007 | L5[WIO] Q11 C2 - L4[IO] P55 C4 Q5X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  7] Utilization = 0.24 | pJ/Compute =   47.023 | L5[WIO] Q11 C2 - L4[IO] P55 C4 Q5X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  3] Utilization = 0.24 | pJ/Compute =   47.023 | L5[WIO] Q11 C2 - L4[IO] P55 C2 Q5X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  4] Utilization = 0.10 | pJ/Compute =  151.624 |

[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 0.52 | pJ/Compute =   18.003
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 35
  Factorization options along problem dimension M = 210
  Factorization options along p

[  0] Utilization = 0.07 | pJ/Compute =  140.922 | L5[WIO] Q55 M4 - L4[IO] M4 P55 C16 M4X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  6] Utilization = 0.36 | pJ/Compute =   33.798 | L5[WIO] Q11 C2 - L4[IO] P55 C4 Q5X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  5] Utilization = 0.36 | pJ/Compute =   33.798 | L5[WIO] Q11 C2 - L4[IO] P55 C8 Q5X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  2] Utilization = 0.36 | pJ/Compute =   33.798 | L5[WIO] Q11 C2 - L4[IO] P55 C2 Q5X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M16 
[  1] Utilization = 0.07 | pJ/Compute =  141.979 | L5[WIO] Q55 M2 C2 - L4[IO] M8 P55 C8 M4X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  3] Utilization = 0.07 | pJ/Compute =  137.269 | L5[WIO] Q55 M2 - L4[IO] M2 P55 C16 M2X - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  4] Utilization = 0.29 | pJ/Compute =  140.922 | L5[WIO] Q55 M4 - L4[IO] M4 P55 C2 M4X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] Q1 
[  7] Utiliza

[  4] Utilization = 0.79 | pJ/Compute =    7.553 | L5[WIO] Q5 M2 C4 - L4[IO] P55 Q11X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  1] Utilization = 0.79 | pJ/Compute =    7.785 | L5[WIO] Q5 M2 C4 - L4[IO] M2 P55 Q11X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, term



Summary stats for best mapping found by mapper:
  Utilization = 0.79 | pJ/Compute =    6.632
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 120
  Factorization options along problem dimension M = 70
  Factorization options along p

[  2] Utilization = 0.01 | pJ/Compute =  218.603 | L5[WIO] Q55 M8 C2 - L4[IO] P55 C64 M2X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  4] Utilization = 0.02 | pJ/Compute =  218.605 | L5[WIO] Q55 M8 C2 - L4[IO] P55 C4 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  5] Utilization = 0.05 | pJ/Compute =  218.609 | L5[WIO] Q55 M8 C2 - L4[IO] P55 C4 M2X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  3] Utilization = 0.24 | pJ/Compute =   45.613 | L5[WIO] Q11 C4 - L4[IO] M2 P55 C32 M2X Q5X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  7] Utilization = 0.24 | pJ/Compute =   45.613 | L5[WIO] Q11 C4 - L4[IO] M2 P55 C16 M2X Q5X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  1] Utilization = 0.26 | pJ/Compute =   44.212 | L5[WIO] Q5 M2 C8 - L4[IO] M2 P55 C4 Q11X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  0] Utilization = 0.10 | pJ/Compute =  218.617 | L5[WIO] Q55 M8 C2 - L4[IO] P55 C2 M2X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  6] Utilization = 0.10 | 



Summary stats for best mapping found by mapper:
  Utilization = 0.52 | pJ/Compute =   17.329
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 35
  Factorization options along problem dimension M = 210
  Factorization options along p

[  1] Utilization = 0.02 | pJ/Compute =  151.624 | L5[WIO] Q55 M4 - L4[IO] M4 P55 C2 M4X - L3[] Q1 - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  0] Utilization = 0.02 | pJ/Compute =  151.624 | L5[WIO] Q55 M4 - L4[IO] M4 P55 C16 M4X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  6] Utilization = 0.12 | pJ/Compute =   47.007 | L5[WIO] Q11 C2 - L4[IO] P55 C4 Q5X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  7] Utilization = 0.24 | pJ/Compute =   47.023 | L5[WIO] Q11 C2 - L4[IO] P55 C4 Q5X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  5] Utilization = 0.12 | pJ/Compute =   47.007 | L5[WIO] Q11 C2 - L4[IO] P55 C8 Q5X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  3] Utilization = 0.24 | pJ/Compute =   47.023 | L5[WIO] Q11 C2 - L4[IO] P55 C2 Q5X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  2] Utilization = 0.12 | pJ/Compute =   47.007 | L5[WIO] Q11 C2 - L4[IO] P55 C2 Q5X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  4] Utilization = 0.10 | pJ/Compute =  151.624 |

[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 0.52 | pJ/Compute =   18.003
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 35
  Factorization options along problem dimension M = 210
  Factorization options along p

[  0] Utilization = 0.07 | pJ/Compute =  140.922 | L5[WIO] Q55 M4 - L4[IO] M4 P55 C16 M4X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  6] Utilization = 0.36 | pJ/Compute =   33.798 | L5[WIO] Q11 C2 - L4[IO] P55 C4 Q5X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  5] Utilization = 0.36 | pJ/Compute =   33.798 | L5[WIO] Q11 C2 - L4[IO] P55 C8 Q5X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  2] Utilization = 0.36 | pJ/Compute =   33.798 | L5[WIO] Q11 C2 - L4[IO] P55 C2 Q5X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M16 
[  3] Utilization = 0.07 | pJ/Compute =  137.269 | L5[WIO] Q55 M2 - L4[IO] M2 P55 C16 M2X - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  7] Utilization = 0.07 | pJ/Compute =  140.922 | L5[WIO] Q55 M4 - L4[IO] M4 P55 C4 M4X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] Q1 
[  1] Utilization = 0.07 | pJ/Compute =  141.979 | L5[WIO] Q55 M2 C2 - L4[IO] M8 P55 C8 M4X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  4] Utilization

[  4] Utilization = 0.79 | pJ/Compute =    7.553 | L5[WIO] Q5 M2 C4 - L4[IO] P55 Q11X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  1] Utilization = 0.79 | pJ/Compute =    7.785 | L5[WIO] Q5 M2 C4 - L4[IO] M2 P55 Q11X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, term



Summary stats for best mapping found by mapper:
  Utilization = 0.79 | pJ/Compute =    6.632
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 120
  Factorization options along problem dimension M = 126
  Factorization options along 

[  1] Utilization = 0.14 | pJ/Compute =   14.735 | L5[WIO] Q9 C8 - L4[IO] P27 Q3X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  0] Utilization = 0.07 | pJ/Compute =   14.708 | L5[WIO] Q9 C8 - L4[IO] P27 Q3X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  2] Utilization = 0.10 | pJ/Compute =  141.208 | L5[WIO] Q27 M2 - L4[IO] P27 C16 M4X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  5] Utilization = 0.07 | pJ/Compute =   57.321 | L5[WIO] Q9 C8 - L4[IO] P27 C2 Q3X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  6] Utilization = 0.14 | pJ/Compute =   57.348 | L5[WIO] Q9 C8 - L4[IO] P27 C2 Q3X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  4] Utilization = 0.04 | pJ/Compute =   57.307 | L5[WIO] Q9 C8 - L4[IO] P27 C2 Q3X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  3] Utilization = 0.19 | pJ/Compute =  141.208 | L5[WIO] Q27 M2 - L4[IO] P27 C16 M4X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  7] Utilization = 0.14 | pJ/Compute =   59.156 |

[  4] Utilization = 0.57 | pJ/Compute =   55.296 | L5[WIO] Q9 - L4[IO] P27 C4 M4X Q3X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M4 
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] Utilization = 0.57 | pJ/Compute =   17.176 | L5[WIO] Q9 C16 - L4[IO] P27 M4X Q3X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.57 | pJ/Compute =   55.266 | L5[WIO] Q9 - L4[IO] P27 C16 M4X Q3X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings f



Summary stats for best mapping found by mapper:
  Utilization = 0.57 | pJ/Compute =   13.569
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 56
  Factorization options along problem dimension M = 330
  Factorization options along p

[  2] Utilization = 0.01 | pJ/Compute =  141.605 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  5] Utilization = 0.01 | pJ/Compute =   61.263 | L5[WIO] Q27 M8 C32 - L4[IO] P27 - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  0] Utilization = 0.02 | pJ/Compute =  141.605 | L5[WIO] Q27 M8 - L4[IO] P27 C4 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  1] Utilization = 0.05 | pJ/Compute =  141.605 | L5[WIO] Q27 M8 - L4[IO] P27 C4 - L3[] Q1 C8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  3] Utilization = 0.01 | pJ/Compute =  141.605 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  7] Utilization = 0.01 | pJ/Compute =   26.030 | L5[WIO] Q27 M8 C2 - L4[IO] P27 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  4] Utilization = 0.02 | pJ/Compute =  141.605 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  6] Utilization = 0.01 | pJ/Compute =  141.620 | L5[WIO] Q27 M2 - L4[IO] M4 P27 C4 M2X -

[  1] Utilization = 0.57 | pJ/Compute =   15.149 | L5[WIO] Q9 M2 C2 - L4[IO] P27 M4X Q3X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  5] Utilization = 0.57 | pJ/Compute =   13.094 | L5[WIO] Q9 M2 - L4[IO] M2 P27 M4X Q3X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  3] Utilization = 0.57 | pJ/Compute =   16.478 | L5[WIO] Q9 C4 - L4[IO] M2 P27 M4X Q3X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  7] Utilization = 0.57 | pJ/Compute =   17.806 | L5[WIO] Q9 M4 C2 - L4[IO] P27 M4X Q3X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M2 
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] Utilization = 0.57 | pJ/Compute =   13.843 | L5[WIO] Q9 C2 - L4[IO] P27 M4X Q3X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal



Summary stats for best mapping found by mapper:
  Utilization = 0.57 | pJ/Compute =   13.094
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 56
  Factorization options along problem dimension M = 330
  Factorization options along p

[  5] Utilization = 0.02 | pJ/Compute =   17.704 | L5[WIO] Q27 M8 C32 - L4[IO] P27 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  2] Utilization = 0.02 | pJ/Compute =  136.268 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M16 
[  0] Utilization = 0.07 | pJ/Compute =  136.268 | L5[WIO] Q27 M8 - L4[IO] P27 C4 - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  3] Utilization = 0.04 | pJ/Compute =  136.268 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  4] Utilization = 0.07 | pJ/Compute =  136.268 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  6] Utilization = 0.04 | pJ/Compute =  136.283 | L5[WIO] Q27 M2 - L4[IO] M4 P27 C32 M2X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M8 
[  5] Utilization = 0.07 | pJ/Compute =  136.283 | L5[WIO] Q27 M2 - L4[IO] M4 P27 C16 M2X - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M8 
[  6] Utilization = 0.14 | pJ/Compute =  13

[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] Utilization = 0.86 | pJ/Compute =   49.802 | L5[WIO] Q9 - L4[IO] M2 P27 C4 M4X Q3X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] Utilization = 0.86 | pJ/Compute =    7.443 | L5[WIO] Q9 M2 C2 - L4[IO] P27 M4X Q3X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M16 
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] Utilization = 0.86 | pJ/Compute =    7.324 | L5[WIO] Q9 C2 - L4[IO] M4 P27 M4X Q3X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M8 
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 0.86 | pJ/Compute =    7.324
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 165
  Factorization options along problem dimension M = 126
  Factorization options along 

[  5] Utilization = 0.04 | pJ/Compute =   55.632 | L5[WIO] Q9 C8 - L4[IO] P27 C32 Q3X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  0] Utilization = 0.14 | pJ/Compute =   55.653 | L5[WIO] Q9 C8 - L4[IO] P27 C4 Q3X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  2] Utilization = 0.04 | pJ/Compute =   55.632 | L5[WIO] Q9 C8 - L4[IO] P27 C8 Q3X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  6] Utilization = 0.04 | pJ/Compute =   55.632 | L5[WIO] Q9 C8 - L4[IO] P27 C16 Q3X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  7] Utilization = 0.07 | pJ/Compute =   55.639 | L5[WIO] Q9 C8 - L4[IO] P27 C16 Q3X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  1] Utilization = 0.07 | pJ/Compute =   57.406 | L5[WIO] Q9 C8 - L4[IO] M4 P27 C2 Q3X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  3] Utilization = 0.07 | pJ/Compute =   55.639 | L5[WIO] Q9 C8 - L4[IO] P27 C8 Q3X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  4] Utilization = 0.14 | pJ/Compute =   55.



Summary stats for best mapping found by mapper:
  Utilization = 0.57 | pJ/Compute =   14.312
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 56
  Factorization options along problem dimension M = 330
  Factorization options along p

[  0] Utilization = 0.02 | pJ/Compute =  141.605 | L5[WIO] Q27 M8 - L4[IO] P27 C4 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  2] Utilization = 0.01 | pJ/Compute =  141.605 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  1] Utilization = 0.05 | pJ/Compute =  141.605 | L5[WIO] Q27 M8 - L4[IO] P27 C4 - L3[] Q1 C8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  5] Utilization = 0.01 | pJ/Compute =   61.263 | L5[WIO] Q27 M8 C32 - L4[IO] P27 - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  3] Utilization = 0.01 | pJ/Compute =  141.605 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  7] Utilization = 0.01 | pJ/Compute =   26.030 | L5[WIO] Q27 M8 C2 - L4[IO] P27 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  4] Utilization = 0.02 | pJ/Compute =  141.605 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  6] Utilization = 0.01 | pJ/Compute =  141.620 | L5[WIO] Q27 M2 - L4[IO] M4 P27 C4 M2X -

[  5] Utilization = 0.57 | pJ/Compute =   13.094 | L5[WIO] Q9 M2 - L4[IO] M2 P27 M4X Q3X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  1] Utilization = 0.57 | pJ/Compute =   15.149 | L5[WIO] Q9 M2 C2 - L4[IO] P27 M4X Q3X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  3] Utilization = 0.57 | pJ/Compute =   16.478 | L5[WIO] Q9 C4 - L4[IO] M2 P27 M4X Q3X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  7] Utilization = 0.57 | pJ/Compute =   17.806 | L5[WIO] Q9 M4 C2 - L4[IO] P27 M4X Q3X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M2 
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] Utilization = 0.57 | pJ/Compute =   13.843 | L5[WIO] Q9 C2 - L4[IO] P27 M4X Q3X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal



Summary stats for best mapping found by mapper:
  Utilization = 0.57 | pJ/Compute =   13.094
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 56
  Factorization options along problem dimension M = 330
  Factorization options along p

[  2] Utilization = 0.02 | pJ/Compute =  136.268 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M16 
[  3] Utilization = 0.04 | pJ/Compute =  136.268 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  4] Utilization = 0.07 | pJ/Compute =  136.268 | L5[WIO] Q27 M8 - L4[IO] P27 C8 - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  5] Utilization = 0.02 | pJ/Compute =   17.704 | L5[WIO] Q27 M8 C32 - L4[IO] P27 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M16 
[  3] Utilization = 0.16 | pJ/Compute =   31.085 | L5[WIO] Q3 M32 C2 - L4[IO] P27 C8 Q9X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  2] Utilization = 0.14 | pJ/Compute =  136.283 | L5[WIO] Q27 M2 - L4[IO] M4 P27 C2 M2X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M8 
[  6] Utilization = 0.04 | pJ/Compute =  136.283 | L5[WIO] Q27 M2 - L4[IO] M4 P27 C32 M2X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M8 
[  7] Utilization = 0.07 | pJ/Compute =

[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] Utilization = 0.86 | pJ/Compute =   49.802 | L5[WIO] Q9 - L4[IO] M2 P27 C4 M4X Q3X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M16 
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] Utilization = 0.86 | pJ/Compute =    7.443 | L5[WIO] Q9 M2 C2 - L4[IO] P27 M4X Q3X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M16 
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] Utilization = 0.86 | pJ/Compute =    7.324 | L5[WIO] Q9 C2 - L4[IO] M4 P27 M4X Q3X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M8 
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 0.86 | pJ/Compute =    7.324
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 165
  Factorization options along problem dimension M = 350
  Factorization options along 

[  0] Utilization = 0.10 | pJ/Compute =   25.126 | L5[WIO] Q13 M2 C16 - L4[IO] M2 P13 M2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M3 
[  1] Utilization = 0.14 | pJ/Compute =  140.477 | L5[WIO] Q13 C16 - L4[IO] P13 C2 M3X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  2] Utilization = 0.02 | pJ/Compute =   39.346 | L5[WIO] Q13 C128 - L4[IO] M6 P13 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  7] Utilization = 0.05 | pJ/Compute =   25.096 | L5[WIO] Q13 M2 C16 - L4[IO] M2 P13 M2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M3 
[  6] Utilization = 0.05 | pJ/Compute =  145.593 | L5[WIO] Q13 M3 C4 - L4[IO] P13 C32 M2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  2] Utilization = 0.93 | pJ/Compute =   30.764 | L5[WIO] M4 C2 - L4[IO] P13 C16 Q13X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  5] Utilization = 0.02 | pJ/Compute =  147.049 | L5[WIO] Q13 M2 C32 - L4[IO] M2 P13 C8 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M3 
[  4] Utilization = 0.



Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =   10.959
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 140
  Factorization options along problem dimension M = 1050
  Factorization options along

[  3] Utilization = 0.01 | pJ/Compute =  146.131 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  6] Utilization = 0.01 | pJ/Compute =  146.131 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C4 - L3[] Q1 - L2[I] Q1 - L1[W] C3 - L0[O] M4 
[  1] Utilization = 0.02 | pJ/Compute =  146.163 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C2 - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  2] Utilization = 0.04 | pJ/Compute =  146.210 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C2 - L3[] Q1 C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  0] Utilization = 0.01 | pJ/Compute =  146.147 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C2 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C3 - L0[O] M4 
[  5] Utilization = 0.02 | pJ/Compute =  146.178 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  4] Utilization = 0.01 | pJ/Compute =  146.147 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  3] Utilization = 0.35 | pJ/Compute =   27.536 



Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =   10.410
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 140
  Factorization options along problem dimension M = 1050
  Factorization options along

[  2] Utilization = 0.11 | pJ/Compute =  135.377 | L5[WIO] Q13 M2 C2 - L4[IO] P13 C8 M6X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M16 
[  7] Utilization = 0.05 | pJ/Compute =  140.049 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C4 - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  6] Utilization = 0.02 | pJ/Compute =  140.038 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C4 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M4 
[  0] Utilization = 0.04 | pJ/Compute =  140.044 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C2 - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M4 
[  1] Utilization = 0.05 | pJ/Compute =  140.049 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C2 - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  5] Utilization = 0.07 | pJ/Compute =  140.054 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  3] Utilization = 0.02 | pJ/Compute =  140.038 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M4 
[  4] Uti

[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.93 | pJ/Compute =    8.665 | L5[WIO] M16 C6 - L4[IO] P13 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M12 
[  2] Utilization = 0.93 | pJ/Compute =    7.683 | L5[WIO] M3 C16 - L4[IO] M2 P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M8 
[  2] Utilization = 0.93 | pJ/Compute =    7.446 | L5[WIO] M8 C6 - L4[IO] P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M12 
[  3] Utilization = 0.93 | pJ/Compute =    7.974 | L5[WIO] M8 C12 - L4[IO] P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M6 
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.93 | pJ/Compute =    7.182 | L5[WIO] C12 - L4[IO] M8 P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M6 
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 sub



Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    6.651
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 480
  Factorization options along problem dimension M = 350
  Factorization options along 

[  0] Utilization = 0.02 | pJ/Compute =  139.975 | L5[WIO] Q13 - L4[IO] M6 P13 C16 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C12 - L0[O] M4 
[  1] Utilization = 0.04 | pJ/Compute =  139.483 | L5[WIO] Q13 C16 - L4[IO] P13 C3 M3X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  2] Utilization = 0.07 | pJ/Compute =  139.493 | L5[WIO] Q13 C16 - L4[IO] P13 C3 M3X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  3] Utilization = 0.02 | pJ/Compute =  143.749 | L5[WIO] Q13 M2 C16 - L4[IO] M2 P13 C2 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C12 - L0[O] M3 
[  4] Utilization = 0.05 | pJ/Compute =  143.759 | L5[WIO] Q13 M2 C16 - L4[IO] M2 P13 C2 M2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C6 - L0[O] M3 
[  6] Utilization = 0.10 | pJ/Compute =  143.779 | L5[WIO] Q13 M2 C16 - L4[IO] M2 P13 C2 M2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C3 - L0[O] M3 
[  7] Utilization = 0.14 | pJ/Compute =  143.798 | L5[WIO] Q13 M2 C16 - L4[IO] M2 P13 C2 M2X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C2 - L0[O] M3 
[  2] Utiliza

[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =   10.392
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 140
  Factorization options along problem dimension M = 1050
  Factorization options along

[  6] Utilization = 0.01 | pJ/Compute =  146.131 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C4 - L3[] Q1 - L2[I] Q1 - L1[W] C3 - L0[O] M4 
[  3] Utilization = 0.01 | pJ/Compute =  146.131 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  4] Utilization = 0.01 | pJ/Compute =  146.147 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  0] Utilization = 0.01 | pJ/Compute =  146.147 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C2 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C3 - L0[O] M4 
[  5] Utilization = 0.02 | pJ/Compute =  146.178 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  7] Utilization = 0.02 | pJ/Compute =  146.163 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C4 - L3[] Q1 C3Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  2] Utilization = 0.04 | pJ/Compute =  146.210 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C2 - L3[] Q1 C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  1] Utilization = 0.02 | pJ/Compute =  146.163 



Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =   10.410
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 140
  Factorization options along problem dimension M = 1050
  Factorization options along

[  2] Utilization = 0.11 | pJ/Compute =  135.377 | L5[WIO] Q13 M2 C2 - L4[IO] P13 C8 M6X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M16 
[  3] Utilization = 0.02 | pJ/Compute =  140.038 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M4 
[  1] Utilization = 0.05 | pJ/Compute =  140.049 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C2 - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  7] Utilization = 0.05 | pJ/Compute =  140.049 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C4 - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  6] Utilization = 0.02 | pJ/Compute =  140.038 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C4 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M4 
[  5] Utilization = 0.07 | pJ/Compute =  140.054 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  4] Utilization = 0.04 | pJ/Compute =  140.044 | L5[WIO] Q13 M2 C4 - L4[IO] M24 P13 C3 - L3[] Q1 C2Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M4 
[  0] Uti

[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.93 | pJ/Compute =    8.665 | L5[WIO] M16 C6 - L4[IO] P13 Q13X - L3[] Q1 C4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M12 
[  2] Utilization = 0.93 | pJ/Compute =    7.683 | L5[WIO] M3 C16 - L4[IO] M2 P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] M8 
[  2] Utilization = 0.93 | pJ/Compute =    7.446 | L5[WIO] M8 C6 - L4[IO] P13 Q13X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M12 
[  3] Utilization = 0.93 | pJ/Compute =    7.974 | L5[WIO] M8 C12 - L4[IO] P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M6 
[  3] Utilization = 0.93 | pJ/Compute =    7.182 | L5[WIO] C12 - L4[IO] M8 P13 Q13X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M6 
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 sub



Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    6.651
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 480
  Factorization options along problem dimension M = 210
  Factorization options along 

[  0] Utilization = 0.02 | pJ/Compute =  139.211 | L5[WIO] Q13 - L4[IO] M8 P13 C96 - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  1] Utilization = 0.05 | pJ/Compute =  139.211 | L5[WIO] Q13 - L4[IO] M8 P13 C96 - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  2] Utilization = 0.05 | pJ/Compute =  139.310 | L5[WIO] Q13 C2 - L4[IO] M8 P13 C4 - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C12 - L0[O] M4 
[  6] Utilization = 0.05 | pJ/Compute =  176.408 | L5[WIO] Q13 M16 C4 - L4[IO] P13 C8 - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C6 - L0[O] Q1 
[  5] Utilization = 0.05 | pJ/Compute =  140.416 | L5[WIO] Q13 M2 C2 - L4[IO] M2 P13 C48 - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  4] Utilization = 0.02 | pJ/Compute =  140.415 | L5[WIO] Q13 M2 C2 - L4[IO] M2 P13 C48 - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  3] Utilization = 0.01 | pJ/Compute =  140.414 | L5[WIO] Q13 M2 C2 - L4[IO] M2 P13 C48 - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  2] Utilization = 0.07 | pJ/Compu

[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] Utilization = 0.93 | pJ/Compute =   10.834 | L5[WIO] C16 - L4[IO] M4 P13 Q13X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C8 - L0[O] M4 
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =   10.834
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 84
  Factorization options along problem dimension M = 495
  Factorization options along p

[  5] Utilization = 0.19 | pJ/Compute =  137.960 | L5[WIO] Q13 C2 - L4[IO] P13 C4 M8X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  4] Utilization = 0.19 | pJ/Compute =  137.960 | L5[WIO] Q13 C2 - L4[IO] P13 C32 M8X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  0] Utilization = 0.38 | pJ/Compute =  137.964 | L5[WIO] Q13 C2 - L4[IO] P13 C2 M8X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  2] Utilization = 0.10 | pJ/Compute =   38.279 | L5[WIO] Q13 M4 C32 - L4[IO] M2 P13 M2X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  7] Utilization = 0.01 | pJ/Compute =  146.775 | L5[WIO] Q13 M2 - L4[IO] M64 P13 C64 - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  6] Utilization = 0.38 | pJ/Compute =  137.964 | L5[WIO] Q13 C2 - L4[IO] P13 C4 M8X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  1] Utilization = 0.10 | pJ/Compute =  137.909 | L5[WIO] Q13 - L4[IO] M8 P13 C8 M4X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  7] Utilization = 0.10 | pJ/Compute =  142



Summary stats for best mapping found by mapper:
  Utilization = 0.62 | pJ/Compute =   10.911
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 84
  Factorization options along problem dimension M = 495
  Factorization options along p

[  0] Utilization = 0.02 | pJ/Compute =   22.515 | L5[WIO] Q13 M8 C64 - L4[IO] M8 P13 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  1] Utilization = 0.29 | pJ/Compute =  135.120 | L5[WIO] Q13 - L4[IO] M8 P13 C8 M4X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M4 
[  4] Utilization = 0.57 | pJ/Compute =  134.786 | L5[WIO] Q13 C2 - L4[IO] P13 C32 M8X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M8 
[  7] Utilization = 0.04 | pJ/Compute =  144.122 | L5[WIO] Q13 M2 - L4[IO] M64 P13 C64 - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  0] Utilization = 0.07 | pJ/Compute =   19.635 | L5[WIO] Q13 C64 - L4[IO] M16 P13 - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  3] Utilization = 0.14 | pJ/Compute =  137.171 | L5[WIO] Q13 M8 - L4[IO] M4 P13 C64 M2X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  2] Utilization = 0.29 | pJ/Compute =  135.475 | L5[WIO] Q13 M2 C8 - L4[IO] P13 C2 M4X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M8 
[  5] Utilization = 0

[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    6.789
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 220
  Factorization options along problem dimension M = 210
  Factorization options along 

[  0] Utilization = 0.10 | pJ/Compute =  137.589 | L5[WIO] Q13 C8 - L4[IO] P13 C8 M2X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C8 - L0[O] M4 
[  3] Utilization = 0.09 | pJ/Compute =   46.755 | L5[WIO] C512 - L4[IO] M4 P13 Q13X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  4] Utilization = 0.25 | pJ/Compute =   35.837 | L5[WIO] M8 C128 - L4[IO] P13 Q13X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  1] Utilization = 0.31 | pJ/Compute =   37.285 | L5[WIO] M8 C4 - L4[IO] P13 C32 Q13X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  2] Utilization = 0.62 | pJ/Compute =   37.286 | L5[WIO] M8 C4 - L4[IO] P13 C32 Q13X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  7] Utilization = 0.25 | pJ/Compute =   46.389 | L5[WIO] M8 C128 - L4[IO] P13 C2 Q13X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  5] Utilization = 0.01 | pJ/Compute =  140.282 | L5[WIO] Q13 M2 C2 - L4[IO] M2 P13 C128 - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  6] Utilization = 0.02 | pJ/Compute =  140.282 | L



Summary stats for best mapping found by mapper:
  Utilization = 0.62 | pJ/Compute =   10.332
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 84
  Factorization options along problem dimension M = 495
  Factorization options along p

[  1] Utilization = 0.10 | pJ/Compute =  137.909 | L5[WIO] Q13 - L4[IO] M8 P13 C8 M4X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  7] Utilization = 0.01 | pJ/Compute =  146.775 | L5[WIO] Q13 M2 - L4[IO] M64 P13 C64 - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  5] Utilization = 0.19 | pJ/Compute =  137.960 | L5[WIO] Q13 C2 - L4[IO] P13 C4 M8X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  0] Utilization = 0.38 | pJ/Compute =  137.964 | L5[WIO] Q13 C2 - L4[IO] P13 C2 M8X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  4] Utilization = 0.19 | pJ/Compute =  137.960 | L5[WIO] Q13 C2 - L4[IO] P13 C32 M8X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  2] Utilization = 0.10 | pJ/Compute =   38.279 | L5[WIO] Q13 M4 C32 - L4[IO] M2 P13 M2X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  6] Utilization = 0.38 | pJ/Compute =  137.964 | L5[WIO] Q13 C2 - L4[IO] P13 C4 M8X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  3] Utilization = 0.19 | pJ/Compute =  142



Summary stats for best mapping found by mapper:
  Utilization = 0.62 | pJ/Compute =   10.911
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 84
  Factorization options along problem dimension M = 495
  Factorization options along p

[  0] Utilization = 0.02 | pJ/Compute =   22.515 | L5[WIO] Q13 M8 C64 - L4[IO] M8 P13 - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  7] Utilization = 0.04 | pJ/Compute =  144.122 | L5[WIO] Q13 M2 - L4[IO] M64 P13 C64 - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  4] Utilization = 0.57 | pJ/Compute =  134.786 | L5[WIO] Q13 C2 - L4[IO] P13 C32 M8X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M8 
[  1] Utilization = 0.29 | pJ/Compute =  135.120 | L5[WIO] Q13 - L4[IO] M8 P13 C8 M4X - L3[] Q1 M2Y C2Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M4 
[  0] Utilization = 0.07 | pJ/Compute =   19.635 | L5[WIO] Q13 C64 - L4[IO] M16 P13 - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M4 
[  5] Utilization = 0.29 | pJ/Compute =  135.475 | L5[WIO] Q13 M2 C8 - L4[IO] P13 C4 M4X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C2 - L0[O] M8 
[  2] Utilization = 0.29 | pJ/Compute =  135.475 | L5[WIO] Q13 M2 C8 - L4[IO] P13 C2 M4X - L3[] Q1 M4Y S3Y - L2[I] Q1 - L1[W] R3 C4 - L0[O] M8 
[  3] Utilization =

[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 0.93 | pJ/Compute =    6.789
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 220
  Factorization options along problem dimension M = 1225
  Factorization options along

[  2] Utilization = 0.06 | pJ/Compute =  134.998 | L5[WIO] Q13 M20 - L4[IO] P13 C512 M5X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M5 
[  0] Utilization = 0.01 | pJ/Compute =  168.107 | L5[WIO] Q13 M100 C4 - L4[IO] M10 P13 C64 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  5] Utilization = 0.01 | pJ/Compute =  168.106 | L5[WIO] Q13 M100 C4 - L4[IO] M10 P13 C32 - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  1] Utilization = 0.01 | pJ/Compute =  168.106 | L5[WIO] Q13 M100 C4 - L4[IO] M10 P13 C16 - L3[] Q1 - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  6] Utilization = 0.01 | pJ/Compute =  168.107 | L5[WIO] Q13 M100 C4 - L4[IO] M10 P13 C32 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  4] Utilization = 0.45 | pJ/Compute =   28.080 | L5[WIO] M50 C8 - L4[IO] M4 P13 C4 Q13X - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  3] Utilization = 0.02 | pJ/Compute =  168.110 | L5[WIO] Q13 M100 C4 - L4[IO] M10 P13 C16 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  7] Utilization = 0.02 | pJ

[  3] Utilization = 0.77 | pJ/Compute =   18.023 | L5[WIO] M5 C4 - L4[IO] M8 P13 C8 Q13X - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M5 
[  5] Utilization = 0.77 | pJ/Compute =   18.610 | L5[WIO] M10 C2 - L4[IO] M4 P13 C16 Q13X - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M5 
[  5] Utilization = 0.77 | pJ/Compute =   17.778 | L5[WIO] M4 C8 - L4[IO] M5 P13 C8 Q13X - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M10 
[  4] Utilization = 0.77 | pJ/Compute =   18.659 | L5[WIO] M10 C8 - L4[IO] M2 P13 C4 Q13X - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M10 
[  7] Utilization = 0.77 | pJ/Compute =   17.877 | L5[WIO] M4 C4 - L4[IO] M10 P13 C8 Q13X - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M5 
[  4] Utilization = 0.77 | pJ/Compute =   17.877 | L5[WIO] M4 C4 - L4[IO] M10 P13 C64 Q13X - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M5 
[  2] Utilization = 0.77 | pJ/Compute =   17.482 | L5[WIO] M4 C4 - L4[IO] M5 P13 C32 Q13X - L3[] Q1 M5Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M10 
[  



Summary stats for best mapping found by mapper:
  Utilization = 0.77 | pJ/Compute =    9.245


In [7]:
squeeze_total_compute = 0
squeeze_total_cycle = 0
squeeze_total_energy = 0

for stat in squeeze_stat:
    compute, cycle, energy, energy_per_compute = parse_stat(stat)
    squeeze_total_compute += compute
    squeeze_total_cycle += cycle
    squeeze_total_energy += energy

print(f"Squeeze Net Total Compute:", squeeze_total_compute)
print(f"Squeeze Net Total Cycle:", squeeze_total_cycle)
print(f"Squeeze Net Total Energy:", squeeze_total_energy)
print(f"Squeeze Net Energy / Computer pJ / Compute", squeeze_total_energy / squeeze_total_compute * 10 ** (12))

Squeeze Net Total Compute: 349151936
Squeeze Net Total Cycle: 2626188
Squeeze Net Total Energy: 0.00307392
Squeeze Net Energy / Computer pJ / Compute 8.803960920898344


In [11]:
import json
with open("eyeriss-squeezenet.json", "w") as f:
    json.dump({"stats": squeeze_stat}, f)

### Mobile Nets

In [24]:
mobile_layers = ConfigRegistry.MOBILE_NET_LAYERS
mobile_stat = []
for i in range(len(mobile_layers)):
    layer_stats, layer_mapping = run_timeloop_mapper(
        ConfigRegistry.EYERISS_ARCH, ConfigRegistry.EYERISS_COMPONENTS_DIR,
        ConfigRegistry.EYERISS_ARCH_CONSTRAINTS,
        ConfigRegistry.EYERISS_MAP_CONSTRAINTS,
        mobile_layers[i+1], ConfigRegistry.DEFAULT_MAPPER_SETTING
    )
    mobile_stat.append(layer_stats)

input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 4
  Factorization options along problem dimension M = 126
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization opt

[  0] Utilization = 0.04 | pJ/Compute =   20.236 | L5[WIO] Q112 - L4[IO] M16 P112 - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] Q1 
[  2] Utilization = 0.04 | pJ/Compute =  151.805 | L5[WIO] Q112 - L4[IO] M16 P112 C3 - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  3] Utilization = 0.04 | pJ/Compute =   24.073 | L5[WIO] Q112 C3 - L4[IO] M16 P112 - L3[] Q1 M2Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  1] Utilization = 0.21 | pJ/Compute =   14.586 | L5[WIO] Q28 - L4[IO] M4 P112 Q4X - L3[] Q1 C3Y S3Y - L2[I] Q1 - L1[W] R3 - L0[O] M8 
[  7] Utilization = 0.04 | pJ/Compute =   24.073 | L5[WIO] Q112 C3 - L4[IO] M16 P112 M2X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  6] Utilization = 0.04 | pJ/Compute =  151.805 | L5[WIO] Q112 - L4[IO] M16 P112 C3 M2X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 - L0[O] Q1 
[  4] Utilization = 0.04 | pJ/Compute =   20.236 | L5[WIO] Q112 - L4[IO] M16 P112 M2X - L3[] Q1 S3Y - L2[I] Q1 - L1[W] R3 C3 - L0[O] Q1 
[  5] Utilization = 0.11 | pJ/Compute =   



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   13.153
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:03:56,313 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:03:56,314 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 56
  Factorization options along problem dimension M = 70
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 10
Mapspace Dimension [IndexFactorization] Size: 39200
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 4900 Residue: 0
Mapspace construction

[  6] Utilization = 0.05 | pJ/Compute =  151.013 | L5[WIO] Q112 - L4[IO] M2 P112 C32 M2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  2] Utilization = 0.23 | pJ/Compute =   39.003 | L5[WIO] Q16 M2 C8 - L4[IO] P112 Q7X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  3] Utilization = 0.28 | pJ/Compute =   30.005 | L5[WIO] Q28 C8 - L4[IO] M2 P112 M2X Q4X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  5] Utilization = 0.23 | pJ/Compute =   58.697 | L5[WIO] Q16 M2 C8 - L4[IO] P112 C2 Q7X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  1] Utilization = 0.33 | pJ/Compute =   53.998 | L5[WIO] Q16 M2 C4 - L4[IO] P112 C8 Q7X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  0] Utilization = 0.38 | pJ/Compute =   58.561 | L5[WIO] Q28 C4 - L4[IO] M2 P112 C4 M2X Q4X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  4] Utilization = 0.05 | pJ/Compute =  159.373 | L5[WIO] Q56 M8 C2 - L4[IO] P112 C2 M2X Q2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  7] Utilization = 0

[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 0.67 | pJ/Compute =   20.073
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 35
  Factorization options along problem dimension M = 630
  Factorization options along p

[  0] Utilization = 0.02 | pJ/Compute =  149.942 | L5[WIO] Q112 C2 - L4[IO] M8 P112 C2 - L3[] Q1 M3Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  3] Utilization = 0.02 | pJ/Compute =  149.942 | L5[WIO] Q112 C2 - L4[IO] M8 P112 C8 - L3[] Q1 M3Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  4] Utilization = 0.02 | pJ/Compute =  149.942 | L5[WIO] Q112 C2 - L4[IO] M8 P112 C4 - L3[] Q1 M3Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  6] Utilization = 0.02 | pJ/Compute =   35.610 | L5[WIO] Q112 C8 - L4[IO] M8 P112 - L3[] Q1 M3Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  7] Utilization = 0.04 | pJ/Compute =   35.720 | L5[WIO] Q112 C8 - L4[IO] M8 P112 - L3[] Q1 M3Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  5] Utilization = 0.04 | pJ/Compute =  149.958 | L5[WIO] Q112 C2 - L4[IO] M8 P112 C4 - L3[] Q1 M3Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  2] Utilization = 0.07 | pJ/Compute =  149.989 | L5[WIO] Q112 C2 - L4[IO] M8 P112 C2 - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  1] Utilization = 0.04 | pJ/Compute =  149.958 

[  5] Utilization = 0.57 | pJ/Compute =   18.164 | L5[WIO] Q14 M4 - L4[IO] M2 P112 Q8X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  5] Utilization = 0.67 | pJ/Compute =   38.710 | L5[WIO] Q16 M6 - L4[IO] M2 P112 C4 M2X Q7X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  5] Utilization = 0.67 | pJ/Compute =   36.700 | L5[WIO] Q16 M2 - L4[IO] P112 C4 M2X Q7X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M6 
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.67 | pJ/Compute =   17.373 | L5[WIO] Q16 M2 - L4[IO] M2 P112 M2X Q7X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C2 - L0[O] M12 
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrad



Summary stats for best mapping found by mapper:
  Utilization = 0.70 | pJ/Compute =   17.412
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:04:05,960 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:04:05,960 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 224
  Factorization options along problem dimension M = 175
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 8
Mapspace Dimension [IndexFactorization] Size: 313600
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 39200 Residue: 0
Mapspace construct

[  0] Utilization = 0.04 | pJ/Compute =  142.743 | L5[WIO] Q56 - L4[IO] P56 C32 M2X - L3[] Q1 C3Y - L2[I] Q1 - L1[W] Q1 - L0[O] M12 
[  4] Utilization = 0.05 | pJ/Compute =  142.743 | L5[WIO] Q56 - L4[IO] P56 C4 M2X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C6 - L0[O] M12 
[  2] Utilization = 0.02 | pJ/Compute =  142.743 | L5[WIO] Q56 - L4[IO] P56 C4 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C12 - L0[O] M12 
[  6] Utilization = 0.07 | pJ/Compute =  142.743 | L5[WIO] Q56 - L4[IO] P56 C4 M2X - L3[] Q1 C6Y - L2[I] Q1 - L1[W] C4 - L0[O] M12 
[  1] Utilization = 0.29 | pJ/Compute =   36.656 | L5[WIO] Q7 C16 - L4[IO] P56 C3 Q8X - L3[] Q1 M6Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  3] Utilization = 0.04 | pJ/Compute =  142.743 | L5[WIO] Q56 - L4[IO] P56 C4 M2X - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C8 - L0[O] M12 
[  4] Utilization = 0.29 | pJ/Compute =   16.252 | L5[WIO] Q7 C8 - L4[IO] P56 Q8X - L3[] Q1 M6Y - L2[I] Q1 - L1[W] C12 - L0[O] M4 
[  7] Utilization = 0.10 | pJ/Compute =  142.743 | L5[WIO] Q56 - L4[IO] P

[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   13.937
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 80
  Factorization options along problem dimension M = 1050
  Factorization options along 

[  6] Utilization = 0.05 | pJ/Compute =   28.178 | L5[WIO] Q28 M4 C8 - L4[IO] P56 Q2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C3 - L0[O] M9 
[  3] Utilization = 0.05 | pJ/Compute =   84.761 | L5[WIO] Q28 M4 C3 - L4[IO] P56 C8 Q2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M9 
[  7] Utilization = 0.14 | pJ/Compute =   28.325 | L5[WIO] Q28 M4 C8 - L4[IO] P56 Q2X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] Q1 - L0[O] M9 
[  2] Utilization = 0.02 | pJ/Compute =  151.533 | L5[WIO] Q56 M4 - L4[IO] M18 P56 C3 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  4] Utilization = 0.05 | pJ/Compute =   84.761 | L5[WIO] Q28 M4 C3 - L4[IO] P56 C4 Q2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C2 - L0[O] M9 
[  1] Utilization = 0.10 | pJ/Compute =   84.782 | L5[WIO] Q28 M4 C3 - L4[IO] P56 C2 Q2X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M9 
[  0] Utilization = 0.05 | pJ/Compute =   84.761 | L5[WIO] Q28 M4 C3 - L4[IO] P56 C2 Q2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C4 - L0[O] M9 
[  5] Utilization = 0.10 | pJ/Compute 

[  3] Utilization = 0.86 | pJ/Compute =   46.430 | L5[WIO] Q14 M3 - L4[IO] P56 C2 M3X Q4X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.86 | pJ/Compute =   14.723 | L5[WIO] Q28 M8 - L4[IO] P56 M6X Q2X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M3 
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.86 | pJ/Compute =   14.113 | L5[WIO] Q14 - L4[IO] M8 P56 M3X Q4X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M6 
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 1.00 | pJ/Compute =   



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   13.353
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:04:17,252 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:04:17,253 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 350
  Factorization options along problem dimension M = 175
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 8
Mapspace Dimension [IndexFactorization] Size: 490000
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 61250 Residue: 0
Mapspace construct

[  3] Utilization = 0.01 | pJ/Compute =  142.168 | L5[WIO] Q56 - L4[IO] P56 C36 M2X - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] M12 
[  1] Utilization = 0.11 | pJ/Compute =  142.168 | L5[WIO] Q56 - L4[IO] P56 C4 M2X - L3[] Q1 C9Y - L2[I] Q1 - L1[W] C4 - L0[O] M12 
[  5] Utilization = 0.05 | pJ/Compute =  142.168 | L5[WIO] Q56 - L4[IO] P56 C36 M2X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M12 
[  4] Utilization = 0.02 | pJ/Compute =  142.168 | L5[WIO] Q56 - L4[IO] P56 C36 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M12 
[  2] Utilization = 0.14 | pJ/Compute =  142.168 | L5[WIO] Q56 - L4[IO] P56 C4 M2X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C3 - L0[O] M12 
[  7] Utilization = 0.02 | pJ/Compute =  142.168 | L5[WIO] Q56 - L4[IO] P56 C6 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C12 - L0[O] M12 
[  0] Utilization = 0.55 | pJ/Compute =   34.675 | L5[WIO] Q7 C18 - L4[IO] P56 C2 Q8X - L3[] Q1 M6Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  6] Utilization = 0.57 | pJ/Compute =   15.038 | L5[WIO] Q7 C9 - L4

[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   23.854
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 80
  Factorization options along problem dimension M = 1050
  Factorization options along 

[  6] Utilization = 0.05 | pJ/Compute =   28.178 | L5[WIO] Q28 M4 C8 - L4[IO] P56 Q2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C3 - L0[O] M9 
[  0] Utilization = 0.05 | pJ/Compute =   84.761 | L5[WIO] Q28 M4 C3 - L4[IO] P56 C2 Q2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C4 - L0[O] M9 
[  3] Utilization = 0.05 | pJ/Compute =   84.761 | L5[WIO] Q28 M4 C3 - L4[IO] P56 C8 Q2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M9 
[  7] Utilization = 0.14 | pJ/Compute =   28.325 | L5[WIO] Q28 M4 C8 - L4[IO] P56 Q2X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] Q1 - L0[O] M9 
[  4] Utilization = 0.05 | pJ/Compute =   84.761 | L5[WIO] Q28 M4 C3 - L4[IO] P56 C4 Q2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C2 - L0[O] M9 
[  2] Utilization = 0.02 | pJ/Compute =  151.533 | L5[WIO] Q56 M4 - L4[IO] M18 P56 C3 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  1] Utilization = 0.10 | pJ/Compute =   84.782 | L5[WIO] Q28 M4 C3 - L4[IO] P56 C2 Q2X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M9 
[  5] Utilization = 0.10 | pJ/Compute 

[  3] Utilization = 0.86 | pJ/Compute =   46.430 | L5[WIO] Q14 M3 - L4[IO] P56 C2 M3X Q4X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.86 | pJ/Compute =   14.723 | L5[WIO] Q28 M8 - L4[IO] P56 M6X Q2X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M3 
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 0.86 | pJ/Compute =   14.113 | L5[WIO] Q14 - L4[IO] M8 P56 M3X Q4X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M6 
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 1.00 | pJ/Compute =   



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   13.353
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:04:28,944 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:04:28,945 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 350
  Factorization options along problem dimension M = 126
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 6
Mapspace Dimension [IndexFactorization] Size: 264600
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 33075 Residue: 0
Mapspace construct

[  4] Utilization = 0.01 | pJ/Compute =  149.873 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C8 M2X - L3[] Q1 - L2[I] Q1 - L1[W] C6 - L0[O] Q1 
[  5] Utilization = 0.02 | pJ/Compute =  149.877 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C8 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C3 - L0[O] Q1 
[  2] Utilization = 0.01 | pJ/Compute =  149.873 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C24 M2X - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  0] Utilization = 0.05 | pJ/Compute =  149.884 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C6 M2X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  3] Utilization = 0.02 | pJ/Compute =  149.877 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C24 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  6] Utilization = 0.04 | pJ/Compute =  149.880 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C8 M2X - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  7] Utilization = 0.07 | pJ/Compute =  149.891 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C8 M2X - L3[] Q1 C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  1] Utilization = 0.10 | pJ/Compute = 

[  6] Utilization = 0.98 | pJ/Compute =   23.530 | L5[WIO] Q2 C6 - L4[IO] M2 P28 C4 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  3] Utilization = 1.00 | pJ/Compute =   21.863 | L5[WIO] Q2 C2 - L4[IO] P28 C6 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  0] Utilization = 0.78 | pJ/Compute =   33.681 | L5[WIO] Q4 C12 - L4[IO] P28 C2 M2X Q7X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  7] Utilization = 1.00 | pJ/Compute =   22.163 | L5[WIO] Q2 M2 - L4[IO] P28 C12 Q14X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] Utilization = 1.00 | pJ/Compute =   12.120 | L5[WIO] Q4 C3 - L4[IO] P28 



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   11.818
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 56
  Factorization options along problem dimension M = 1050
  Factorization options along 

[  1] Utilization = 0.38 | pJ/Compute =   14.430 | L5[WIO] Q14 C2 - L4[IO] M4 P28 M4X Q2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M6 
[  3] Utilization = 0.10 | pJ/Compute =   77.649 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C2 M4X Q2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C8 - L0[O] M6 
[  4] Utilization = 0.19 | pJ/Compute =   77.657 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C2 M4X Q2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M6 
[  6] Utilization = 0.16 | pJ/Compute =   33.366 | L5[WIO] Q7 M3 C16 - L4[IO] M8 P28 M2X Q4X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  5] Utilization = 0.38 | pJ/Compute =   77.673 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C2 M4X Q2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M6 
[  0] Utilization = 0.10 | pJ/Compute =   49.188 | L5[WIO] Q7 M3 C2 - L4[IO] M8 P28 C4 M2X Q4X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  7] Utilization = 0.10 | pJ/Compute =   77.649 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C16 M4X Q2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M

[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] Utilization = 1.00 | pJ/Compute =   21.994 | L5[WIO] Q2 M4 - L4[IO] M2 P28 C4 Q14X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   12.451
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:04:41,266 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:04:41,267 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 336
  Factorization options along problem dimension M = 126
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 6
Mapspace Dimension [IndexFactorization] Size: 254016
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 31752 Residue: 0
Mapspace construct

[  0] Utilization = 0.01 | pJ/Compute =  149.455 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C16 M2X - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  3] Utilization = 0.01 | pJ/Compute =   36.441 | L5[WIO] Q28 C64 - L4[IO] M16 P28 M2X - L3[] Q1 - L2[I] Q1 - L1[W] C3 - L0[O] Q1 
[  4] Utilization = 0.04 | pJ/Compute =   36.605 | L5[WIO] Q28 C64 - L4[IO] M16 P28 M2X - L3[] Q1 C3Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  1] Utilization = 0.02 | pJ/Compute =  149.458 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C16 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  5] Utilization = 0.01 | pJ/Compute =  161.398 | L5[WIO] Q28 C64 - L4[IO] M16 P28 C3 M2X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  2] Utilization = 0.05 | pJ/Compute =  149.463 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C16 M2X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  6] Utilization = 0.19 | pJ/Compute =   51.122 | L5[WIO] Q7 M2 C4 - L4[IO] M2 P28 C6 M2X Q4X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  7] Utilization = 0.38 | pJ/Compute =  

[  2] Utilization = 0.67 | pJ/Compute =   28.065 | L5[WIO] Q2 M2 C3 - L4[IO] M2 P28 C16 Q14X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  1] Utilization = 0.76 | pJ/Compute =   32.280 | L5[WIO] Q4 - L4[IO] M4 P28 C8 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  2] Utilization = 0.67 | pJ/Compute =   22.905 | L5[WIO] Q2 C6 - L4[IO] M2 P28 C8 Q14X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  5] Utilization = 0.67 | pJ/Compute =   22.905 | L5[WIO] Q2 C6 - L4[IO] M2 P28 C16 Q14X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  2] Utilization = 0.67 | pJ/Compute =   22.175 | L5[WIO] Q2 C2 - L4[IO] M2 P28 C48 Q14X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  4] Utilization = 0.80 | pJ/Compute =   33.460 | L5[WIO] Q4 C16 - L4[IO] P28 C4 M2X Q7X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  5] Utilization = 0.67 | pJ/Compute =   21.508 | L5[WIO] Q2 C2 - L4[IO] P28 C6 Q14X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  0] Utiliza



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   22.515
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 56
  Factorization options along problem dimension M = 1050
  Factorization options along 

[  7] Utilization = 0.10 | pJ/Compute =   77.649 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C16 M4X Q2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M6 
[  3] Utilization = 0.10 | pJ/Compute =   77.649 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C2 M4X Q2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C8 - L0[O] M6 
[  4] Utilization = 0.19 | pJ/Compute =   77.657 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C2 M4X Q2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M6 
[  1] Utilization = 0.38 | pJ/Compute =   14.430 | L5[WIO] Q14 C2 - L4[IO] M4 P28 M4X Q2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M6 
[  5] Utilization = 0.38 | pJ/Compute =   77.673 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C2 M4X Q2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M6 
[  6] Utilization = 0.16 | pJ/Compute =   33.366 | L5[WIO] Q7 M3 C16 - L4[IO] M8 P28 M2X Q4X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  0] Utilization = 0.10 | pJ/Compute =   49.188 | L5[WIO] Q7 M3 C2 - L4[IO] M8 P28 C4 M2X Q4X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C4 - L0[O] M

[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   12.451
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:04:52,689 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:04:52,690 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 336
  Factorization options along problem dimension M = 126
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 6
Mapspace Dimension [IndexFactorization] Size: 254016
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 31752 Residue: 0
Mapspace construct

[  3] Utilization = 0.01 | pJ/Compute =   36.441 | L5[WIO] Q28 C64 - L4[IO] M16 P28 M2X - L3[] Q1 - L2[I] Q1 - L1[W] C3 - L0[O] Q1 
[  4] Utilization = 0.04 | pJ/Compute =   36.605 | L5[WIO] Q28 C64 - L4[IO] M16 P28 M2X - L3[] Q1 C3Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  5] Utilization = 0.01 | pJ/Compute =  161.398 | L5[WIO] Q28 C64 - L4[IO] M16 P28 C3 M2X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  0] Utilization = 0.01 | pJ/Compute =  149.455 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C16 M2X - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  1] Utilization = 0.02 | pJ/Compute =  149.458 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C16 M2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  2] Utilization = 0.05 | pJ/Compute =  149.463 | L5[WIO] Q28 C3 - L4[IO] M16 P28 C16 M2X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  3] Utilization = 0.05 | pJ/Compute =   51.111 | L5[WIO] Q7 M2 C4 - L4[IO] M2 P28 C48 M2X Q4X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  6] Utilization = 0.19 | pJ/Compute =   51

[  7] Utilization = 1.00 | pJ/Compute =   27.103 | L5[WIO] Q2 M2 C4 - L4[IO] P28 C4 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  2] Utilization = 0.67 | pJ/Compute =   28.065 | L5[WIO] Q2 M2 C3 - L4[IO] M2 P28 C16 Q14X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  1] Utilization = 0.76 | pJ/Compute =   32.280 | L5[WIO] Q4 - L4[IO] M4 P28 C8 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  5] Utilization = 0.67 | pJ/Compute =   22.905 | L5[WIO] Q2 C6 - L4[IO] M2 P28 C16 Q14X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  2] Utilization = 0.67 | pJ/Compute =   22.905 | L5[WIO] Q2 C6 - L4[IO] M2 P28 C8 Q14X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  5] Utilization = 0.67 | pJ/Compute =   21.508 | L5[WIO] Q2 C2 - L4[IO] P28 C6 Q14X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  2] Utilization = 0.67 | pJ/Compute =   22.175 | L5[WIO] Q2 C2 - L4[IO] M2 P28 C48 Q14X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  4] Utilizat



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   22.515
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 56
  Factorization options along problem dimension M = 1050
  Factorization options along 

[  1] Utilization = 0.38 | pJ/Compute =   14.430 | L5[WIO] Q14 C2 - L4[IO] M4 P28 M4X Q2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M6 
[  6] Utilization = 0.16 | pJ/Compute =   33.366 | L5[WIO] Q7 M3 C16 - L4[IO] M8 P28 M2X Q4X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  7] Utilization = 0.10 | pJ/Compute =   77.649 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C16 M4X Q2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M6 
[  3] Utilization = 0.10 | pJ/Compute =   77.649 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C2 M4X Q2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C8 - L0[O] M6 
[  4] Utilization = 0.19 | pJ/Compute =   77.657 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C2 M4X Q2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M6 
[  5] Utilization = 0.38 | pJ/Compute =   77.673 | L5[WIO] Q14 C2 - L4[IO] M4 P28 C2 M4X Q2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M6 
[  2] Utilization = 0.38 | pJ/Compute =   49.212 | L5[WIO] Q7 M3 C2 - L4[IO] M8 P28 C4 M2X Q4X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[

[  0] Utilization = 1.00 | pJ/Compute =   21.994 | L5[WIO] Q2 M4 - L4[IO] M2 P28 C4 Q14X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   12.451
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:05:03,847 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:05:03,848 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 336
  Factorization options along problem dimension M = 210
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 4
Mapspace Dimension [IndexFactorization] Size: 282240
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 35280 Residue: 0
Mapspace construct

[  0] Utilization = 0.10 | pJ/Compute =   74.556 | L5[WIO] Q7 C6 - L4[IO] P14 C2 Q2X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  1] Utilization = 0.33 | pJ/Compute =   28.739 | L5[WIO] Q2 M4 - L4[IO] P14 C16 M2X Q7X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C3 - L0[O] M8 
[  0] Utilization = 0.25 | pJ/Compute =   28.739 | L5[WIO] Q2 M4 - L4[IO] P14 C16 M2X Q7X - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  2] Utilization = 0.50 | pJ/Compute =   28.739 | L5[WIO] Q2 M4 - L4[IO] P14 C16 M2X Q7X - L3[] Q1 C6Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  5] Utilization = 0.10 | pJ/Compute =   74.556 | L5[WIO] Q7 C6 - L4[IO] P14 C4 Q2X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  3] Utilization = 1.00 | pJ/Compute =   28.739 | L5[WIO] Q2 M4 - L4[IO] P14 C16 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  7] Utilization = 0.29 | pJ/Compute =   28.898 | L5[WIO] Q14 M4 C2 - L4[IO] M4 P14 M4X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  4] Utilization = 0.67 | pJ/Com



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =    9.647
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 84
  Factorization options along problem dimension M = 1650
  Factorization options along 

[  0] Utilization = 0.09 | pJ/Compute =   48.147 | L5[WIO] Q2 M6 C64 - L4[IO] P14 Q7X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  6] Utilization = 0.50 | pJ/Compute =   27.476 | L5[WIO] Q2 - L4[IO] M2 P14 C32 M2X Q7X - L3[] Q1 M6Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  3] Utilization = 0.04 | pJ/Compute =  138.742 | L5[WIO] Q14 M8 - L4[IO] M4 P14 C64 M2X - L3[] Q1 M3Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  7] Utilization = 0.14 | pJ/Compute =  138.742 | L5[WIO] Q14 M8 - L4[IO] M4 P14 C2 M2X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M2 
[  1] Utilization = 0.05 | pJ/Compute =   79.711 | L5[WIO] Q7 M6 C8 - L4[IO] M2 P14 C4 M4X Q2X - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  2] Utilization = 0.10 | pJ/Compute =   79.738 | L5[WIO] Q7 M6 C8 - L4[IO] M2 P14 C4 M4X Q2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  5] Utilization = 0.35 | pJ/Compute =   37.767 | L5[WIO] Q2 M4 C16 - L4[IO] M2 P14 C4 Q7X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  7] Utilization = 1.00 



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   10.344
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:05:15,712 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:05:15,714 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 480
  Factorization options along problem dimension M = 210
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 4
Mapspace Dimension [IndexFactorization] Size: 403200
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 50400 Residue: 0
Mapspace construct

[  4] Utilization = 0.30 | pJ/Compute =   26.224 | L5[WIO] Q2 M4 C96 - L4[IO] P14 M2X Q7X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  5] Utilization = 0.08 | pJ/Compute =   44.489 | L5[WIO] Q2 M4 C96 - L4[IO] P14 C2 M2X Q7X - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  7] Utilization = 0.02 | pJ/Compute =  148.684 | L5[WIO] Q14 M4 C2 - L4[IO] M4 P14 C192 M4X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  2] Utilization = 0.24 | pJ/Compute =   33.980 | L5[WIO] M2 C128 - L4[IO] M2 P14 C3 Q14X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  6] Utilization = 0.17 | pJ/Compute =   44.551 | L5[WIO] Q2 M4 C96 - L4[IO] P14 C2 M2X Q7X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  7] Utilization = 0.08 | pJ/Compute =   44.489 | L5[WIO] Q2 M4 C96 - L4[IO] P14 C4 M2X Q7X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  5] Utilization = 0.18 | pJ/Compute =  108.450 | L5[WIO] Q2 M32 C8 - L4[IO] P14 C6 M2X Q7X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  1] Utilization = 0.19 | pJ

[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] Utilization = 1.00 | pJ/Compute =   18.884 | L5[WIO] C4 - L4[IO] M2 P14 C16 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  1] Utilization = 1.00 | pJ/Compute =    9.985 | L5[WIO] Q2 C8 - L4[IO] M2 P14 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  5] Utilization = 1.00 | pJ/Compute =   18.579 | L5[WIO] C4 - L4[IO] P14 C16 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =    9.985
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 84
  Factorization options along problem dimension M = 1650
  Factorization options along 

[  0] Utilization = 0.09 | pJ/Compute =   48.147 | L5[WIO] Q2 M6 C64 - L4[IO] P14 Q7X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  3] Utilization = 0.04 | pJ/Compute =  138.742 | L5[WIO] Q14 M8 - L4[IO] M4 P14 C64 M2X - L3[] Q1 M3Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  7] Utilization = 0.14 | pJ/Compute =  138.742 | L5[WIO] Q14 M8 - L4[IO] M4 P14 C2 M2X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M2 
[  6] Utilization = 0.50 | pJ/Compute =   27.476 | L5[WIO] Q2 - L4[IO] M2 P14 C32 M2X Q7X - L3[] Q1 M6Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  1] Utilization = 0.05 | pJ/Compute =   79.711 | L5[WIO] Q7 M6 C8 - L4[IO] M2 P14 C4 M4X Q2X - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  2] Utilization = 0.10 | pJ/Compute =   79.738 | L5[WIO] Q7 M6 C8 - L4[IO] M2 P14 C4 M4X Q2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  5] Utilization = 0.35 | pJ/Compute =   37.767 | L5[WIO] Q2 M4 C16 - L4[IO] M2 P14 C4 Q7X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  7] Utilization = 1.00 



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   10.344
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:05:27,851 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:05:27,852 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 480
  Factorization options along problem dimension M = 210
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 4
Mapspace Dimension [IndexFactorization] Size: 403200
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 50400 Residue: 0
Mapspace construct

[  0] Utilization = 0.24 | pJ/Compute =   24.018 | L5[WIO] M2 C128 - L4[IO] M2 P14 Q14X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C3 - L0[O] M2 
[  4] Utilization = 0.30 | pJ/Compute =   26.224 | L5[WIO] Q2 M4 C96 - L4[IO] P14 M2X Q7X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  6] Utilization = 0.17 | pJ/Compute =   44.551 | L5[WIO] Q2 M4 C96 - L4[IO] P14 C2 M2X Q7X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  5] Utilization = 0.08 | pJ/Compute =   44.489 | L5[WIO] Q2 M4 C96 - L4[IO] P14 C2 M2X Q7X - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  7] Utilization = 0.02 | pJ/Compute =  148.684 | L5[WIO] Q14 M4 C2 - L4[IO] M4 P14 C192 M4X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  2] Utilization = 0.24 | pJ/Compute =   33.980 | L5[WIO] M2 C128 - L4[IO] M2 P14 C3 Q14X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  3] Utilization = 0.29 | pJ/Compute =  148.691 | L5[WIO] Q14 M4 C2 - L4[IO] M4 P14 C2 M4X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  1] Utilization = 0.19 | pJ/

[  1] Utilization = 1.00 | pJ/Compute =   19.873 | L5[WIO] C8 - L4[IO] M4 P14 C8 Q14X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] Utilization = 1.00 | pJ/Compute =    9.985 | L5[WIO] Q2 C8 - L4[IO] M2 P14 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  5] Utilization = 1.00 | pJ/Compute =   18.884 | L5[WIO] C4 - L4[IO] M2 P14 C16 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  5] Utilization = 1.00 | pJ/Compute =   18.579 | L5[WIO] C4 - L4[IO] P14 C16 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminatin



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =    9.985
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 84
  Factorization options along problem dimension M = 1650
  Factorization options along 

[  0] Utilization = 0.09 | pJ/Compute =   48.147 | L5[WIO] Q2 M6 C64 - L4[IO] P14 Q7X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  6] Utilization = 0.50 | pJ/Compute =   27.476 | L5[WIO] Q2 - L4[IO] M2 P14 C32 M2X Q7X - L3[] Q1 M6Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  3] Utilization = 0.04 | pJ/Compute =  138.742 | L5[WIO] Q14 M8 - L4[IO] M4 P14 C64 M2X - L3[] Q1 M3Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  1] Utilization = 0.05 | pJ/Compute =   79.711 | L5[WIO] Q7 M6 C8 - L4[IO] M2 P14 C4 M4X Q2X - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  7] Utilization = 0.14 | pJ/Compute =  138.742 | L5[WIO] Q14 M8 - L4[IO] M4 P14 C2 M2X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M2 
[  2] Utilization = 0.10 | pJ/Compute =   79.738 | L5[WIO] Q7 M6 C8 - L4[IO] M2 P14 C4 M4X Q2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  5] Utilization = 0.35 | pJ/Compute =   37.767 | L5[WIO] Q2 M4 C16 - L4[IO] M2 P14 C4 Q7X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  3] Utilization = 0.05 



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   10.344
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:05:40,318 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:05:40,319 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 480
  Factorization options along problem dimension M = 210
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 4
Mapspace Dimension [IndexFactorization] Size: 403200
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 50400 Residue: 0
Mapspace construct

[  0] Utilization = 0.24 | pJ/Compute =   24.018 | L5[WIO] M2 C128 - L4[IO] M2 P14 Q14X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C3 - L0[O] M2 
[  4] Utilization = 0.30 | pJ/Compute =   26.224 | L5[WIO] Q2 M4 C96 - L4[IO] P14 M2X Q7X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  2] Utilization = 0.24 | pJ/Compute =   33.980 | L5[WIO] M2 C128 - L4[IO] M2 P14 C3 Q14X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  5] Utilization = 0.08 | pJ/Compute =   44.489 | L5[WIO] Q2 M4 C96 - L4[IO] P14 C2 M2X Q7X - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  7] Utilization = 0.02 | pJ/Compute =  148.684 | L5[WIO] Q14 M4 C2 - L4[IO] M4 P14 C192 M4X - L3[] Q1 - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  1] Utilization = 0.19 | pJ/Compute =  148.689 | L5[WIO] Q14 M4 C2 - L4[IO] M4 P14 C2 M4X - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C12 - L0[O] Q1 
[  6] Utilization = 0.17 | pJ/Compute =   44.551 | L5[WIO] Q2 M4 C96 - L4[IO] P14 C2 M2X Q7X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  3] Utilization = 0.29 | pJ/

[  1] Utilization = 1.00 | pJ/Compute =   19.873 | L5[WIO] C8 - L4[IO] M4 P14 C8 Q14X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] Utilization = 1.00 | pJ/Compute =   18.884 | L5[WIO] C4 - L4[IO] M2 P14 C16 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  5] Utilization = 1.00 | pJ/Compute =   18.579 | L5[WIO] C4 - L4[IO] P14 C16 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  1] Utilization = 1.00 | pJ/Compute =    9.985 | L5[WIO] Q2 C8 - L4[IO] M2 P14 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminatin



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =    9.985
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 84
  Factorization options along problem dimension M = 1650
  Factorization options along 

[  0] Utilization = 0.09 | pJ/Compute =   48.147 | L5[WIO] Q2 M6 C64 - L4[IO] P14 Q7X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  3] Utilization = 0.04 | pJ/Compute =  138.742 | L5[WIO] Q14 M8 - L4[IO] M4 P14 C64 M2X - L3[] Q1 M3Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  6] Utilization = 0.50 | pJ/Compute =   27.476 | L5[WIO] Q2 - L4[IO] M2 P14 C32 M2X Q7X - L3[] Q1 M6Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  7] Utilization = 0.14 | pJ/Compute =  138.742 | L5[WIO] Q14 M8 - L4[IO] M4 P14 C2 M2X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M2 
[  1] Utilization = 0.05 | pJ/Compute =   79.711 | L5[WIO] Q7 M6 C8 - L4[IO] M2 P14 C4 M4X Q2X - L3[] Q1 - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  2] Utilization = 0.10 | pJ/Compute =   79.738 | L5[WIO] Q7 M6 C8 - L4[IO] M2 P14 C4 M4X Q2X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  5] Utilization = 0.35 | pJ/Compute =   37.767 | L5[WIO] Q2 M4 C16 - L4[IO] M2 P14 C4 Q7X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  7] Utilization = 1.00 



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   10.344
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:05:52,454 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:05:52,456 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 480
  Factorization options along problem dimension M = 630
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 4
Mapspace Dimension [IndexFactorization] Size: 1209600
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 151200 Residue: 0
Mapspace constru

[  1] Utilization = 0.02 | pJ/Compute =   27.639 | L5[WIO] Q14 M6 C32 - L4[IO] P14 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C12 - L0[O] M4 
[  2] Utilization = 0.05 | pJ/Compute =   27.659 | L5[WIO] Q14 M6 C32 - L4[IO] P14 M2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C6 - L0[O] M4 
[  3] Utilization = 0.07 | pJ/Compute =   27.679 | L5[WIO] Q14 M6 C32 - L4[IO] P14 M2X - L3[] Q1 M2Y C3Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  4] Utilization = 0.10 | pJ/Compute =   27.699 | L5[WIO] Q14 M6 C32 - L4[IO] P14 M2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C3 - L0[O] M4 
[  5] Utilization = 0.14 | pJ/Compute =   27.740 | L5[WIO] Q14 M6 C32 - L4[IO] P14 M2X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  6] Utilization = 0.08 | pJ/Compute =   38.846 | L5[WIO] M6 C48 - L4[IO] M8 P14 C2 Q14X - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] M2 
[  2] Utilization = 0.08 | pJ/Compute =   28.884 | L5[WIO] M6 C48 - L4[IO] M8 P14 Q14X - L3[] Q1 - L2[I] Q1 - L1[W] C8 - L0[O] M2 
[  3] Utilization = 0.15 | pJ/Compute =   2

[  5] Utilization = 1.00 | pJ/Compute =   11.346 | L5[WIO] Q2 M2 C16 - L4[IO] P14 M2X Q7X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C8 - L0[O] M6 
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] Utilization = 1.00 | pJ/Compute =   18.147 | L5[WIO] C6 - L4[IO] M2 P14 C32 Q14X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] Utilization = 1.00 | pJ/Compute =   18.147 | L5[WIO] C6 - L4[IO] M2 P14 C64 Q14X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  2] Utilization = 1.00 | pJ/Compute =   17.913 | L5[WIO] C4 - L4[IO] M2 P14 C4 Q14X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C6 - L0[



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =    8.351
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 224
  Factorization options along problem dimension M = 3150
  Factorization options along

[  1] Utilization = 0.07 | pJ/Compute =   23.088 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  0] Utilization = 0.04 | pJ/Compute =   22.978 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 - L3[] Q1 C6Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  2] Utilization = 0.01 | pJ/Compute =  143.216 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C2 - L3[] Q1 - L2[I] Q1 - L1[W] C6 - L0[O] M8 
[  6] Utilization = 0.01 | pJ/Compute =  143.216 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C3 - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  7] Utilization = 0.01 | pJ/Compute =  143.234 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C3 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  4] Utilization = 0.02 | pJ/Compute =  143.252 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C2 - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  3] Utilization = 0.01 | pJ/Compute =  143.234 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C2 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C3 - L0[O] M8 
[  5] Utilization = 0.04 | pJ/Compute =  143.307 | L5[WIO] Q1

[  2] Utilization = 1.00 | pJ/Compute =   27.959 | L5[WIO] Q2 M16 - L4[IO] M3 P14 C8 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M6 
[  3] Utilization = 1.00 | pJ/Compute =    9.185 | L5[WIO] Q2 M2 C3 - L4[IO] M3 P14 M2X Q7X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  2] Utilization = 1.00 | pJ/Compute =    9.766 | L5[WIO] M4 C4 - L4[IO] M4 P14 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C8 - L0[O] M9 
[  0] Utilization = 1.00 | pJ/Compute =   27.959 | L5[WIO] Q2 M6 - L4[IO] M8 P14 C4 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M6 
[  5] Utilization = 1.00 | pJ/Compute =   19.013 | L5[WIO] M4 - L4[IO] M12 P14 C8 Q14X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] C12 - L0[O] Q1 
[  0] Utilization = 1.00 | pJ/Compute =   23.179 | L5[WIO] M18 C2 - L4[IO] M2 P14 C8 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  3] Utilization = 1.00 | pJ/Compute =    8.715 | L5[WIO] Q2 M6 - L4[IO] M4 P14 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C8 - L0[O] M12 
[  1] STATEMENT:



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =    8.715
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:06:06,284 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:06:06,285 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 840
  Factorization options along problem dimension M = 630
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 4
Mapspace Dimension [IndexFactorization] Size: 2116800
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 264600 Residue: 0
Mapspace constru

[  2] Utilization = 0.02 | pJ/Compute =  145.899 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C8 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  7] Utilization = 0.02 | pJ/Compute =  145.899 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C32 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  1] Utilization = 0.16 | pJ/Compute =   27.268 | L5[WIO] M6 C48 - L4[IO] M8 P14 Q14X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C3 - L0[O] M2 
[  3] Utilization = 0.05 | pJ/Compute =  145.907 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C8 M2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  6] Utilization = 0.05 | pJ/Compute =  145.907 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C16 M2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  5] Utilization = 0.02 | pJ/Compute =  145.899 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C16 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  0] Utilization = 0.10 | pJ/Compute =  145.921 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C4 M2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  4] Utilization = 0.



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   14.027
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 224
  Factorization options along problem dimension M = 3150
  Factorization options along

[  0] Utilization = 0.04 | pJ/Compute =   22.978 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 - L3[] Q1 C6Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  1] Utilization = 0.07 | pJ/Compute =   23.088 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  6] Utilization = 0.01 | pJ/Compute =  143.216 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C3 - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  2] Utilization = 0.01 | pJ/Compute =  143.216 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C2 - L3[] Q1 - L2[I] Q1 - L1[W] C6 - L0[O] M8 
[  4] Utilization = 0.02 | pJ/Compute =  143.252 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C2 - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  7] Utilization = 0.01 | pJ/Compute =  143.234 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C3 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  5] Utilization = 0.04 | pJ/Compute =  143.307 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C2 - L3[] Q1 C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  3] Utilization = 0.01 | pJ/Compute =  143.234 | L5[WIO] Q1

[  7] Utilization = 1.00 | pJ/Compute =   10.627 | L5[WIO] Q2 M2 C4 - L4[IO] M18 P14 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  4] Utilization = 1.00 | pJ/Compute =   18.123 | L5[WIO] M12 - L4[IO] M2 P14 C96 Q14X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  2] Utilization = 1.00 | pJ/Compute =   27.959 | L5[WIO] Q2 M16 - L4[IO] M3 P14 C8 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M6 
[  3] Utilization = 1.00 | pJ/Compute =    9.185 | L5[WIO] Q2 M2 C3 - L4[IO] M3 P14 M2X Q7X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  0] Utilization = 1.00 | pJ/Compute =   27.959 | L5[WIO] Q2 M6 - L4[IO] M8 P14 C4 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M6 
[  2] Utilization = 1.00 | pJ/Compute =    9.766 | L5[WIO] M4 C4 - L4[IO] M4 P14 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C8 - L0[O] M9 
[  5] Utilization = 1.00 | pJ/Compute =   19.013 | L5[WIO] M4 - L4[IO] M12 P14 C8 Q14X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] C12 - L0[O] Q1 
[  3] Utilization 



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =    8.715
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:06:18,634 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:06:18,635 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 840
  Factorization options along problem dimension M = 630
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 4
Mapspace Dimension [IndexFactorization] Size: 2116800
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 264600 Residue: 0
Mapspace constru

[  1] Utilization = 0.16 | pJ/Compute =   27.268 | L5[WIO] M6 C48 - L4[IO] M8 P14 Q14X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C3 - L0[O] M2 
[  7] Utilization = 0.02 | pJ/Compute =  145.899 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C32 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  2] Utilization = 0.02 | pJ/Compute =  145.899 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C8 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  5] Utilization = 0.02 | pJ/Compute =  145.899 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C16 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  6] Utilization = 0.05 | pJ/Compute =  145.907 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C16 M2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  4] Utilization = 0.10 | pJ/Compute =  145.921 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C8 M2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  0] Utilization = 0.10 | pJ/Compute =  145.921 | L5[WIO] Q14 M6 C18 - L4[IO] P14 C4 M2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  3] Utilization = 0.



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   14.027
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 224
  Factorization options along problem dimension M = 3150
  Factorization options along

[  1] Utilization = 0.07 | pJ/Compute =   23.088 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  2] Utilization = 0.01 | pJ/Compute =  143.216 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C2 - L3[] Q1 - L2[I] Q1 - L1[W] C6 - L0[O] M8 
[  0] Utilization = 0.04 | pJ/Compute =   22.978 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 - L3[] Q1 C6Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  7] Utilization = 0.01 | pJ/Compute =  143.234 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C3 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  4] Utilization = 0.02 | pJ/Compute =  143.252 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C2 - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M8 
[  5] Utilization = 0.04 | pJ/Compute =  143.307 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C2 - L3[] Q1 C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M8 
[  6] Utilization = 0.01 | pJ/Compute =  143.216 | L5[WIO] Q14 M9 C8 - L4[IO] M8 P14 C3 - L3[] Q1 - L2[I] Q1 - L1[W] C4 - L0[O] M8 
[  3] Utilization = 0.01 | pJ/Compute =  143.234 | L5[WIO] Q1

[  2] Utilization = 1.00 | pJ/Compute =   27.959 | L5[WIO] Q2 M16 - L4[IO] M3 P14 C8 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M6 
[  3] Utilization = 1.00 | pJ/Compute =    9.185 | L5[WIO] Q2 M2 C3 - L4[IO] M3 P14 M2X Q7X - L3[] Q1 M3Y C4Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  2] Utilization = 1.00 | pJ/Compute =    9.766 | L5[WIO] M4 C4 - L4[IO] M4 P14 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C8 - L0[O] M9 
[  0] Utilization = 1.00 | pJ/Compute =   27.959 | L5[WIO] Q2 M6 - L4[IO] M8 P14 C4 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M6 
[  5] Utilization = 1.00 | pJ/Compute =   19.013 | L5[WIO] M4 - L4[IO] M12 P14 C8 Q14X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] C12 - L0[O] Q1 
[  0] Utilization = 1.00 | pJ/Compute =   23.179 | L5[WIO] M18 C2 - L4[IO] M2 P14 C8 Q14X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  3] Utilization = 1.00 | pJ/Compute =    8.715 | L5[WIO] Q2 M6 - L4[IO] M4 P14 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C8 - L0[O] M12 
[  1] STATEMENT:



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =    8.715
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:06:31,228 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:06:31,228 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 840
  Factorization options along problem dimension M = 630
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 2
Mapspace Dimension [IndexFactorization] Size: 1058400
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 132300 Residue: 0
Mapspace constru

[  0] Utilization = 0.04 | pJ/Compute =  136.488 | L5[WIO] Q7 M2 - L4[IO] M5 P7 C8 - L3[] Q1 C6Y - L2[I] Q1 - L1[W] C12 - L0[O] M16 
[  1] Utilization = 0.05 | pJ/Compute =  136.488 | L5[WIO] Q7 M2 - L4[IO] M5 P7 C8 - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C9 - L0[O] M16 
[  4] Utilization = 0.07 | pJ/Compute =  136.488 | L5[WIO] Q7 M2 - L4[IO] M5 P7 C8 - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C6 - L0[O] M16 
[  2] Utilization = 0.17 | pJ/Compute =   53.654 | L5[WIO] M20 C18 - L4[IO] M4 P7 C2 M2X Q7X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  6] Utilization = 0.08 | pJ/Compute =   53.647 | L5[WIO] M20 C18 - L4[IO] M4 P7 C4 M2X Q7X - L3[] Q1 - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  5] Utilization = 0.01 | pJ/Compute =  137.652 | L5[WIO] Q7 C8 - L4[IO] M16 P7 C6 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C6 - L0[O] M10 
[  3] Utilization = 0.05 | pJ/Compute =  136.488 | L5[WIO] Q7 M2 - L4[IO] M5 P7 C8 - L3[] Q1 C9Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  7] Utilization = 0.17 | pJ/Compute =   53.654 | L5[WIO] 

[  3] Utilization = 1.00 | pJ/Compute =   30.338 | L5[WIO] M4 C16 - L4[IO] M2 P7 C6 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M5 
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 1.00 | pJ/Compute =   26.551 | L5[WIO] Q1 - L4[IO] M8 P7 C6 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C8 - L0[O] M10 
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] Utilization = 1.00 | pJ/Compute =   26.305 | L5[WIO] C2 - L4[IO] M5 P7 C8 M2X Q7X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C12 - L0[O] M4 
[  3] STATEMENT: 500 suboptimal



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   11.754
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 224
  Factorization options along problem dimension M = 5250
  Factorization options along

[  7] Utilization = 0.09 | pJ/Compute =   62.234 | L5[WIO] Q7 M4 C160 - L4[IO] P7 M12X - L3[] Q1 M10Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  3] Utilization = 0.71 | pJ/Compute =  135.493 | L5[WIO] Q7 M4 - L4[IO] P7 C16 M12X - L3[] Q1 M10Y - L2[I] Q1 - L1[W] C10 - L0[O] M2 
[  5] Utilization = 0.05 | pJ/Compute =   30.456 | L5[WIO] Q7 M15 C8 - L4[IO] M8 P7 M2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C10 - L0[O] M2 
[  1] Utilization = 0.02 | pJ/Compute =   36.083 | L5[WIO] Q7 M15 C32 - L4[IO] M8 P7 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C5 - L0[O] M2 
[  6] Utilization = 0.10 | pJ/Compute =   30.478 | L5[WIO] Q7 M15 C8 - L4[IO] M8 P7 M2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C5 - L0[O] M2 
[  4] Utilization = 0.44 | pJ/Compute =   32.707 | L5[WIO] M8 C16 - L4[IO] M15 P7 C2 M2X Q7X - L3[] Q1 M2Y C5Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  1] Utilization = 0.44 | pJ/Compute =   15.674 | L5[WIO] M8 C16 - L4[IO] M15 P7 M2X Q7X - L3[] Q1 M2Y C5Y - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  2] Utilization = 0.1

[  1] Utilization = 1.00 | pJ/Compute =   26.173 | L5[WIO] M2 C2 - L4[IO] M4 P7 C40 M2X Q7X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] C2 - L0[O] M5 
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] Utilization = 1.00 | pJ/Compute =   26.195 | L5[WIO] M2 C2 - L4[IO] M5 P7 C5 M2X Q7X - L3[] Q1 M6Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] Utilization = 1.00 | pJ/Compute =   11.110 | L5[WIO] M4 C8 - L4[IO] M5 P7 M2X Q7X - L3[] Q1 M6Y C2Y - L2[I] Q1 - L1[W] C10 - L0[O] M4 
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptim



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   11.110
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:06:43,929 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:06:43,930 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 1344
  Factorization options along problem dimension M = 630
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 2
Mapspace Dimension [IndexFactorization] Size: 1693440
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 211680 Residue: 0
Mapspace constr

[  3] Utilization = 0.02 | pJ/Compute =  136.373 | L5[WIO] Q7 M2 - L4[IO] M5 P7 C60 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  0] Utilization = 0.08 | pJ/Compute =   53.950 | L5[WIO] M20 C40 - L4[IO] M4 P7 C3 M2X Q7X - L3[] Q1 - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  2] Utilization = 0.01 | pJ/Compute =  136.373 | L5[WIO] Q7 M2 - L4[IO] M5 P7 C60 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  4] Utilization = 0.05 | pJ/Compute =  136.373 | L5[WIO] Q7 M2 - L4[IO] M5 P7 C60 - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  5] Utilization = 0.17 | pJ/Compute =   53.960 | L5[WIO] M20 C40 - L4[IO] M4 P7 C4 M2X Q7X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C3 - L0[O] Q1 
[  1] Utilization = 0.17 | pJ/Compute =   53.960 | L5[WIO] M20 C40 - L4[IO] M4 P7 C3 M2X Q7X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  6] Utilization = 0.18 | pJ/Compute =   53.970 | L5[WIO] M20 C40 - L4[IO] M4 P7 C4 M2X Q7X - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  7] Utilization = 0.18 | pJ/Compute =  

[  1] Utilization = 0.87 | pJ/Compute =   31.883 | L5[WIO] M4 C80 - L4[IO] P7 C2 M2X Q7X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M5 
[  1] Utilization = 1.00 | pJ/Compute =   30.469 | L5[WIO] M5 C4 - L4[IO] M2 P7 C2 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C10 - L0[O] M8 
[  2] Utilization = 0.83 | pJ/Compute =   27.437 | L5[WIO] M2 C20 - L4[IO] P7 C6 M2X Q7X - L3[] Q1 M10Y - L2[I] Q1 - L1[W] C8 - L0[O] M4 
[  3] Utilization = 1.00 | pJ/Compute =   26.134 | L5[WIO] C2 - L4[IO] M5 P7 C10 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  2] Utilization = 1.00 | pJ/Compute =   29.482 | L5[WIO] M4 C8 - L4[IO] M2 P7 C5 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M10 
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] Utilization = 1.00 | pJ/Compute =   29.981 | L5[WIO] M4 C20 - L4[IO] M2 P7 C8 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M5 
[  1] Utilization = 1.00 | pJ/Compute =   27.040 | L5[WIO] C8 -



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   11.877
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 224
  Factorization options along problem dimension M = 5250
  Factorization options along

[  3] Utilization = 0.71 | pJ/Compute =  135.493 | L5[WIO] Q7 M4 - L4[IO] P7 C16 M12X - L3[] Q1 M10Y - L2[I] Q1 - L1[W] C10 - L0[O] M2 
[  1] Utilization = 0.02 | pJ/Compute =   36.083 | L5[WIO] Q7 M15 C32 - L4[IO] M8 P7 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C5 - L0[O] M2 
[  2] Utilization = 0.12 | pJ/Compute =   36.278 | L5[WIO] Q7 M15 C32 - L4[IO] M8 P7 M2X - L3[] Q1 M2Y C5Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  7] Utilization = 0.09 | pJ/Compute =   62.234 | L5[WIO] Q7 M4 C160 - L4[IO] P7 M12X - L3[] Q1 M10Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  6] Utilization = 0.10 | pJ/Compute =   30.478 | L5[WIO] Q7 M15 C8 - L4[IO] M8 P7 M2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C5 - L0[O] M2 
[  4] Utilization = 0.44 | pJ/Compute =   32.707 | L5[WIO] M8 C16 - L4[IO] M15 P7 C2 M2X Q7X - L3[] Q1 M2Y C5Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  0] Utilization = 0.05 | pJ/Compute =  140.819 | L5[WIO] Q7 M15 C5 - L4[IO] M8 P7 C16 M2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  5] Utilization = 0

[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] Utilization = 1.00 | pJ/Compute =   26.195 | L5[WIO] M2 C2 - L4[IO] M5 P7 C5 M2X Q7X - L3[] Q1 M6Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] Utilization = 1.00 | pJ/Compute =   11.110 | L5[WIO] M4 C8 - L4[IO] M5 P7 M2X Q7X - L3[] Q1 M6Y C2Y - L2[I] Q1 - L1[W] C10 - L0[O] M4 
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   11.110
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:06:56,141 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:06:56,142 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 1344
  Factorization options along problem dimension M = 630
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 2
Mapspace Dimension [IndexFactorization] Size: 1693440
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 211680 Residue: 0
Mapspace constr

[  3] Utilization = 0.02 | pJ/Compute =  136.373 | L5[WIO] Q7 M2 - L4[IO] M5 P7 C60 - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C4 - L0[O] M16 
[  2] Utilization = 0.01 | pJ/Compute =  136.373 | L5[WIO] Q7 M2 - L4[IO] M5 P7 C60 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M16 
[  4] Utilization = 0.05 | pJ/Compute =  136.373 | L5[WIO] Q7 M2 - L4[IO] M5 P7 C60 - L3[] Q1 C8Y - L2[I] Q1 - L1[W] C2 - L0[O] M16 
[  0] Utilization = 0.08 | pJ/Compute =   53.950 | L5[WIO] M20 C40 - L4[IO] M4 P7 C3 M2X Q7X - L3[] Q1 - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  1] Utilization = 0.17 | pJ/Compute =   53.960 | L5[WIO] M20 C40 - L4[IO] M4 P7 C3 M2X Q7X - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C4 - L0[O] Q1 
[  7] Utilization = 0.18 | pJ/Compute =   54.000 | L5[WIO] M20 C40 - L4[IO] M4 P7 C4 M2X Q7X - L3[] Q1 C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  6] Utilization = 0.18 | pJ/Compute =   53.970 | L5[WIO] M20 C40 - L4[IO] M4 P7 C4 M2X Q7X - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  5] Utilization = 0.17 | pJ/Compute =  

[  1] Utilization = 0.87 | pJ/Compute =   31.883 | L5[WIO] M4 C80 - L4[IO] P7 C2 M2X Q7X - L3[] Q1 M4Y C3Y - L2[I] Q1 - L1[W] C2 - L0[O] M5 
[  1] Utilization = 1.00 | pJ/Compute =   30.469 | L5[WIO] M5 C4 - L4[IO] M2 P7 C2 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C10 - L0[O] M8 
[  2] Utilization = 0.83 | pJ/Compute =   27.437 | L5[WIO] M2 C20 - L4[IO] P7 C6 M2X Q7X - L3[] Q1 M10Y - L2[I] Q1 - L1[W] C8 - L0[O] M4 
[  3] Utilization = 1.00 | pJ/Compute =   26.134 | L5[WIO] C2 - L4[IO] M5 P7 C10 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  2] Utilization = 1.00 | pJ/Compute =   29.482 | L5[WIO] M4 C8 - L4[IO] M2 P7 C5 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M10 
[  1] Utilization = 1.00 | pJ/Compute =   29.981 | L5[WIO] M4 C20 - L4[IO] M2 P7 C8 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M5 
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  1] Utilization = 1.00 | pJ/Compute =   27.040 | L5[WIO] C8 -



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   11.877
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 224
  Factorization options along problem dimension M = 5250
  Factorization options along

[  7] Utilization = 0.09 | pJ/Compute =   62.234 | L5[WIO] Q7 M4 C160 - L4[IO] P7 M12X - L3[] Q1 M10Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  1] Utilization = 0.02 | pJ/Compute =   36.083 | L5[WIO] Q7 M15 C32 - L4[IO] M8 P7 M2X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C5 - L0[O] M2 
[  3] Utilization = 0.71 | pJ/Compute =  135.493 | L5[WIO] Q7 M4 - L4[IO] P7 C16 M12X - L3[] Q1 M10Y - L2[I] Q1 - L1[W] C10 - L0[O] M2 
[  5] Utilization = 0.05 | pJ/Compute =   30.456 | L5[WIO] Q7 M15 C8 - L4[IO] M8 P7 M2X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C10 - L0[O] M2 
[  2] Utilization = 0.12 | pJ/Compute =   36.278 | L5[WIO] Q7 M15 C32 - L4[IO] M8 P7 M2X - L3[] Q1 M2Y C5Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  6] Utilization = 0.10 | pJ/Compute =   30.478 | L5[WIO] Q7 M15 C8 - L4[IO] M8 P7 M2X - L3[] Q1 M2Y C4Y - L2[I] Q1 - L1[W] C5 - L0[O] M2 
[  4] Utilization = 0.44 | pJ/Compute =   32.707 | L5[WIO] M8 C16 - L4[IO] M15 P7 C2 M2X Q7X - L3[] Q1 M2Y C5Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  0] Utilization = 0.05

[  1] Utilization = 1.00 | pJ/Compute =   26.173 | L5[WIO] M2 C2 - L4[IO] M4 P7 C40 M2X Q7X - L3[] Q1 M12Y - L2[I] Q1 - L1[W] C2 - L0[O] M5 
[  2] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  0] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  6] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] Utilization = 1.00 | pJ/Compute =   26.195 | L5[WIO] M2 C2 - L4[IO] M5 P7 C5 M2X Q7X - L3[] Q1 M6Y C2Y - L2[I] Q1 - L1[W] C8 - L0[O] M8 
[  7] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  5] Utilization = 1.00 | pJ/Compute =   11.110 | L5[WIO] M4 C8 - L4[IO] M5 P7 M2X Q7X - L3[] Q1 M6Y C2Y - L2[I] Q1 - L1[W] C10 - L0[O] M4 
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] STATEMENT: 500 suboptim



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   11.110
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
[ERROR] 2023-05-05 16:07:08,619 - pytimeloop.app.mapper - Could not find timeloop-mapper.stats.txt
[ERROR] 2023-05-05 16:07:08,620 - pytimeloop.app.mapper - Could not find timeloop-mapp

ERROR: parsing permutation: NMCPQRS: dimension M not found in problem shape.


Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 1344
  Factorization options along problem dimension M = 1050
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension P = 1
  Factorization options along problem dimension Q = 2
Mapspace Dimension [IndexFactorization] Size: 2822400
Mapspace Dimension [LoopPermutation] Size: 1
Mapspace Dimension [Spatial] Size: 1
Mapspace Dimension [DatatypeBypass] Size: 1
Mapspace split! Per-split Mapping Dimension [IndexFactorization] Size: 352800 Residue: 0
Mapspace const

[  4] Utilization = 0.33 | pJ/Compute =   25.704 | L5[WIO] C2 - L4[IO] M10 P7 C120 Q7X - L3[] Q1 M8Y - L2[I] Q1 - L1[W] C4 - L0[O] M4 
[  0] Utilization = 0.42 | pJ/Compute =   41.189 | L5[WIO] M32 C2 - L4[IO] P7 C24 M2X Q7X - L3[] Q1 C5Y - L2[I] Q1 - L1[W] C4 - L0[O] M5 
[  3] Utilization = 0.08 | pJ/Compute =   41.188 | L5[WIO] M32 C2 - L4[IO] P7 C40 M2X Q7X - L3[] Q1 - L2[I] Q1 - L1[W] C12 - L0[O] M5 
[  5] Utilization = 0.25 | pJ/Compute =   41.189 | L5[WIO] M32 C2 - L4[IO] P7 C40 M2X Q7X - L3[] Q1 C3Y - L2[I] Q1 - L1[W] C4 - L0[O] M5 
[  6] Utilization = 0.33 | pJ/Compute =   41.189 | L5[WIO] M32 C2 - L4[IO] P7 C40 M2X Q7X - L3[] Q1 C4Y - L2[I] Q1 - L1[W] C3 - L0[O] M5 
[  7] Utilization = 0.50 | pJ/Compute =   41.189 | L5[WIO] M32 C2 - L4[IO] P7 C40 M2X Q7X - L3[] Q1 C6Y - L2[I] Q1 - L1[W] C2 - L0[O] M5 
[  1] Utilization = 0.83 | pJ/Compute =   41.190 | L5[WIO] M32 C2 - L4[IO] P7 C24 M2X Q7X - L3[] Q1 C10Y - L2[I] Q1 - L1[W] C2 - L0[O] M5 
[  2] Utilization = 0.10 | pJ/Compute =

[  0] Utilization = 1.00 | pJ/Compute =   11.042 | L5[WIO] M2 C40 - L4[IO] M8 P7 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C2 - L0[O] M10 
[  2] Utilization = 1.00 | pJ/Compute =   25.513 | L5[WIO] M2 - L4[IO] M4 P7 C40 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] C4 - L0[O] M10 
[  7] Utilization = 1.00 | pJ/Compute =   28.075 | L5[WIO] M2 C40 - L4[IO] M8 P7 C2 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] Q1 - L0[O] M10 
[  5] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  4] Utilization = 1.00 | pJ/Compute =   25.675 | L5[WIO] C5 - L4[IO] M8 P7 C32 M2X Q7X - L3[] Q1 M2Y C6Y - L2[I] Q1 - L1[W] Q1 - L0[O] M10 
[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  3] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.
[  7] Utilization = 1.00 | pJ/Compute =    9.677 | L5[WIO] M2 C16 - L4[IO] M5 P7 M2X Q7X - L3[] Q1 C12Y - L2[I] Q1 - L1[W] C5 - L0[O] M16 
[  6] STATEMENT: 500 su



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =    9.677
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 336
  Factorization options along problem dimension M = 2475
  Factorization options along

[  2] Utilization = 0.17 | pJ/Compute =   28.541 | L5[WIO] M20 C2 - L4[IO] M4 P7 C32 M2X Q7X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C5 - L0[O] M4 
[  6] Utilization = 0.83 | pJ/Compute =   28.545 | L5[WIO] M20 C2 - L4[IO] M4 P7 C16 M2X Q7X - L3[] Q1 M2Y C5Y - L2[I] Q1 - L1[W] C2 - L0[O] M4 
[  5] Utilization = 0.33 | pJ/Compute =   28.542 | L5[WIO] M20 C2 - L4[IO] M4 P7 C16 M2X Q7X - L3[] Q1 M2Y C2Y - L2[I] Q1 - L1[W] C5 - L0[O] M4 
[  4] Utilization = 0.17 | pJ/Compute =   28.541 | L5[WIO] M20 C2 - L4[IO] M4 P7 C16 M2X Q7X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C10 - L0[O] M4 
[  0] Utilization = 0.10 | pJ/Compute =  136.713 | L5[WIO] Q7 M5 C2 - L4[IO] M16 P7 C20 M8X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  3] Utilization = 0.83 | pJ/Compute =   28.545 | L5[WIO] M20 C2 - L4[IO] M4 P7 C32 M2X Q7X - L3[] Q1 M2Y C5Y - L2[I] Q1 - L1[W] Q1 - L0[O] M4 
[  7] Utilization = 0.06 | pJ/Compute =  135.339 | L5[WIO] Q7 M2 C4 - L4[IO] M4 P7 C80 M5X - L3[] Q1 M2Y - L2[I] Q1 - L1[W] Q1 - L0[O] M16 
[ 



Summary stats for best mapping found by mapper:
  Utilization = 0.83 | pJ/Compute =    9.671
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension C = 660
  Factorization options along problem dimension M = 1225
  Factorization options along

[  1] Utilization = 0.05 | pJ/Compute =  155.143 | L5[WIO] M125 C8 - L4[IO] C80 M2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C2 - L0[O] Q1 
[  7] Utilization = 0.03 | pJ/Compute =  143.772 | L5[WIO] Q1 - L4[IO] M500 C128 - L3[] Q1 C5Y - L2[I] Q1 - L1[W] C2 - L0[O] M2 
[  0] Utilization = 0.06 | pJ/Compute =  143.772 | L5[WIO] Q1 - L4[IO] M500 C128 - L3[] Q1 C10Y - L2[I] Q1 - L1[W] Q1 - L0[O] M2 
[  5] Utilization = 0.01 | pJ/Compute =  143.772 | L5[WIO] Q1 - L4[IO] M500 C128 - L3[] Q1 - L2[I] Q1 - L1[W] C10 - L0[O] M2 
[  2] Utilization = 0.10 | pJ/Compute =  155.143 | L5[WIO] M125 C8 - L4[IO] C80 M2X - L3[] Q1 M4Y C2Y - L2[I] Q1 - L1[W] Q1 - L0[O] Q1 
[  6] Utilization = 0.01 | pJ/Compute =  143.772 | L5[WIO] Q1 - L4[IO] M500 C128 - L3[] Q1 C2Y - L2[I] Q1 - L1[W] C5 - L0[O] M2 
[  3] Utilization = 0.05 | pJ/Compute =  155.143 | L5[WIO] M125 C8 - L4[IO] C20 M2X - L3[] Q1 M4Y - L2[I] Q1 - L1[W] C8 - L0[O] Q1 
[  4] Utilization = 0.30 | pJ/Compute =  137.459 | L5[WIO] M2 C80 - L4[IO] M10 C8 M10X



Summary stats for best mapping found by mapper:
  Utilization = 0.60 | pJ/Compute =  134.502


[  1] STATEMENT: 500 suboptimal mappings found since the last upgrade, terminating search.


In [26]:
mobile_total_compute = 0
mobile_total_cycle = 0
mobile_total_energy = 0

for stat in mobile_stat:
    compute, cycle, energy, energy_per_compute = parse_stat(stat)
    if compute is not None:
        mobile_total_compute += compute
        mobile_total_cycle += cycle
        mobile_total_energy += energy

print(f"Mobile Net Total Compute:", mobile_total_compute)
print(f"Mobile Net Total Cycle:", mobile_total_cycle)
print(f"Mobile Net Total Energy:", mobile_total_energy)
print(f"Mobile Net Energy / Computer pJ / Compute", mobile_total_energy / mobile_total_compute * 10 ** (12))

lines= 851
lines= 1
['']
lines= 849
lines= 852
lines= 1
['']
lines= 851
lines= 851
lines= 1
['']
lines= 851
lines= 851
lines= 1
['']
lines= 849
lines= 850
lines= 1
['']
lines= 852
lines= 850
lines= 1
['']
lines= 852
lines= 850
lines= 1
['']
lines= 850
lines= 852
lines= 1
['']
lines= 852
lines= 852
lines= 1
['']
lines= 852
lines= 852
lines= 1
['']
lines= 852
lines= 852
lines= 1
['']
lines= 850
lines= 851
lines= 1
['']
lines= 850
lines= 851
lines= 1
['']
lines= 850
lines= 851
lines= 1
['']
lines= 852
lines= 852
lines= 1
['']
lines= 852
lines= 852
lines= 1
['']
lines= 852
lines= 852
lines= 1
['']
lines= 851
lines= 851
lines= 849
Mobile Net Total Compute: 280057856
Mobile Net Total Cycle: 1763584
Mobile Net Total Energy: 0.0037043699999999994
Mobile Net Energy / Computer pJ / Compute 13.227159748020064


In [25]:
import json
with open("eyeriss-mobilenet.json", "w") as f:
    json.dump({"stats": mobile_stat}, f)

### Dense Net

In [None]:
dense_layers = ConfigRegistry.DENSE_NET_LAYERS 
dense_stat = []
for i in range(len(dense_layers)):
    layer_stats, layer_mapping = run_timeloop_mapper(
        ConfigRegistry.EYERISS_ARCH, ConfigRegistry.EYERISS_COMPONENTS_DIR,
        ConfigRegistry.EYERISS_ARCH_CONSTRAINTS,
        ConfigRegistry.EYERISS_MAP_CONSTRAINTS,
        dense_layers[i+1], ConfigRegistry.DEFAULT_MAPPER_SETTING
    )
    dense_stat.append(layer_stats)

In [None]:
dense_total_compute = 0
dense_total_cycle = 0
dense_total_energy = 0

for stat in dense_stat:
    compute, cycle, energy, energy_per_compute = parse_stat(stat)
    dense_total_compute += compute
    dense_total_cycle += cycle
    dense_total_energy += energy

print(f"Dense Net Total Compute:", dense_total_compute)
print(f"Dense Net Total Cycle:", dense_total_cycle)
print(f"Dense Net Total Energy:", dense_total_energy)
print(f"Dense Net Energy / Computer pJ / Compute", dense_total_energy / dense_total_compute * 10 ** (12))

In [None]:
import json
with open("eyeriss-densenet.json", "w") as f:
    json.dump({"stats": squeeze_stat}, f)