In [1]:
import pandas as pd
import numpy as np
from loaders import *
from energy_helpers import get_energy
import torch
import torch.nn as nn

In [4]:
class HamidaEtAl(nn.Module):
    """
    3-D Deep Learning Approach for Remote Sensing Image Classification
    Amina Ben Hamida, Alexandre Benoit, Patrick Lambert, Chokri Ben Amar
    IEEE TGRS, 2018
    https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8344565
    """
    @staticmethod
    def weight_init(m):
        if isinstance(m, nn.Linear) or isinstance(m, nn.Conv3d):
            init.kaiming_normal_(m.weight)
            init.zeros_(m.bias)

    def __init__(self, input_channels, n_classes, patch_size=5, dilation=1):
        super(HamidaEtAl, self).__init__()
        # The first layer is a (3,3,3) kernel sized Conv characterized
        # by a stride equal to 1 and number of neurons equal to 20
        self.patch_size = patch_size
        self.input_channels = input_channels
        dilation = (dilation, 1, 1)

        if patch_size == 3:
            self.conv1 = nn.Conv3d(
                1, 22, (3, 3, 3), stride=(1, 1, 1), dilation=dilation, padding=1)
        else:
            self.conv1 = nn.Conv3d(
                1, 20, (3, 3, 3), stride=(1, 1, 1), dilation=dilation, padding=0)
        # Next pooling is applied using a layer identical to the previous one
        # with the difference of a 1D kernel size (1,1,3) and a larger stride
        # equal to 2 in order to reduce the spectral dimension
        self.pool1 = nn.Conv3d(
            20, 20, (3, 1, 1), dilation=dilation, stride=(2, 1, 1), padding=(1, 0, 0))
        # Then, a duplicate of the first and second layers is created with
        # 35 hidden neurons per layer.
        self.conv2 = nn.Conv3d(
            20, 35, (3, 3, 3), dilation=dilation, stride=(1, 1, 1), padding=(1, 0, 0))
        self.pool2 = nn.Conv3d(
            35, 35, (3, 1, 1), dilation=dilation, stride=(2, 1, 1), padding=(1, 0, 0))
        # Finally, the 1D spatial dimension is progressively reduced
        # thanks to the use of two Conv layers, 35 neurons each,
        # with respective kernel sizes of (1,1,3) and (1,1,2) and strides
        # respectively equal to (1,1,1) and (1,1,2)
        self.conv3 = nn.Conv3d(
            35, 35, (3, 1, 1), dilation=dilation, stride=(1, 1, 1), padding=(1, 0, 0))
        self.conv4 = nn.Conv3d(
            35, 35, (2, 1, 1), dilation=dilation, stride=(2, 1, 1), padding=(1, 0, 0))

        #self.dropout = nn.Dropout(p=0.5)

        self.features_size = self._get_final_flattened_size()
        # The architecture ends with a fully connected layer where the number
        # of neurons is equal to the number of input classes.
        self.fc = nn.Linear(self.features_size, n_classes)

        # self.apply(self.weight_init)

    def _get_final_flattened_size(self):
        with torch.no_grad():
            x = torch.zeros((1, 1, self.input_channels,
                             self.patch_size, self.patch_size))
            x = self.pool1(self.conv1(x))
            x = self.pool2(self.conv2(x))
            x = self.conv3(x)
            x = self.conv4(x)
            _, t, c, w, h = x.size()
        return t * c * w * h

    def forward(self, x):

        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = x.view(-1, self.features_size)
        #x = self.dropout(x)
        x = self.fc(x)
        return x


In [2]:

class HamidaEtAl2d(nn.Module):
    """
    3-D Deep Learning Approach for Remote Sensing Image Classification
    Amina Ben Hamida, Alexandre Benoit, Patrick Lambert, Chokri Ben Amar
    IEEE TGRS, 2018
    https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8344565
    """
    @staticmethod
    def weight_init(m):
        if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
            init.kaiming_normal_(m.weight)
            init.zeros_(m.bias)

    def __init__(self, input_channels, n_classes, patch_size=5, dilation=1):
        super(HamidaEtAl2d, self).__init__()
        # The first layer is a (3,3,3) kernel sized Conv characterized
        # by a stride equal to 1 and number of neurons equal to 20
        self.patch_size = patch_size
        self.input_channels = input_channels
        # dilation = (dil)

        if patch_size == 3:
            self.conv1 = nn.Conv2d(
                200, 20, (3, 3), stride=(1, 1), dilation=dilation, padding=1)
        else:
            self.conv1 = nn.Conv2d(
                200, 20, (3, 3), stride=(1, 1), dilation=dilation, padding=0)
        # Next pooling is applied using a layer identical to the previous one
        # with the difference of a 1D kernel size (1,1,3) and a larger stride
        # equal to 2 in order to reduce the spectral dimension
        self.pool1 = nn.Conv2d(
            20, 20, (3, 1), dilation=dilation, stride=(2, 1), padding=(1, 0))
        # Then, a duplicate of the first and second layers is created with
        # 35 hidden neurons per layer.
        self.conv2 = nn.Conv2d(
            20, 35, (3, 3), dilation=dilation, stride=(1, 1), padding=(1, 0))
        self.pool2 = nn.Conv2d(
            35, 35, (3, 1), dilation=dilation, stride=(2, 1), padding=(1, 0))
        # Finally, the 1D spatial dimension is progressively reduced
        # thanks to the use of two Conv layers, 35 neurons each,
        # with respective kernel sizes of (1,1,3) and (1,1,2) and strides
        # respectively equal to (1,1,1) and (1,1,2)
        self.conv3 = nn.Conv2d(
            35, 35, (3, 1), dilation=dilation, stride=(1, 1), padding=(1, 0))
        self.conv4 = nn.Conv2d(
            35, 35, (2, 1), dilation=dilation, stride=(2, 1), padding=(1, 0))

        #self.dropout = nn.Dropout(p=0.5)

        self.features_size = self._get_final_flattened_size()
        # The architecture ends with a fully connected layer where the number
        # of neurons is equal to the number of input classes.
        self.fc = nn.Linear(self.features_size, n_classes)

        # self.apply(self.weight_init)

    def _get_final_flattened_size(self):

        with torch.no_grad():
            x = torch.zeros((1, self.input_channels,
                             self.patch_size, self.patch_size))
            x = self.pool1(self.conv1(x))
            x = self.pool2(self.conv2(x))
            x = self.conv3(x)
            x = self.conv4(x)
            print(x.shape)
            _, c, w, h = x.size()
        return  c * w * h

    def forward(self, x):
        x = x.squeeze()

        x = F.relu(self.conv1(x))

        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = x.view(-1, self.features_size)
        #x = self.dropout(x)
        x = self.fc(x)
        return x


In [5]:
# Debug Conv3D
# model = nn.Sequential(
#     nn.Conv3d(20, 20, (3, 1, 1), stride=(1, 1, 1))
# )
# x = torch.rand(8, 20, 220, 5, 5)

# Debug Conv2D
# model = nn.Sequential(
#     nn.Conv2d(3, 5, (2, 2), stride=(1, 1))
# )
# x = torch.rand(8, 3, 5, 5)

# Debug linear
# model = nn.Sequential(
#     nn.Linear(30, 10)
# )
# x = torch.rand(8, 30)

# Hamida 3D
model = HamidaEtAl(input_channels=220, n_classes=16)
x = torch.rand(8, 1, 220, 5, 5)

# Hamida 2D
# model = HamidaEtAl2d(input_channels=200, n_classes=16)
# x = torch.rand(8, 200, 5, 5)

types, energy, l_energy, mac, l_mac, param, l_param, cycle, l_cycle = get_energy(model, x, verbose=False)
print("layer types:", types)
print("energy:", energy, f"(layerwise: {l_energy})")
print("mac:", mac, f"(layerwise: {l_mac})")
print("param:", param, f"(layerwise: {l_param})")
print("cycle:", cycle, f"(layerwise: {l_cycle})")

input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorization options along problem dimension T = 1
  Factorization options along problem dimension P = 1
  Factorization optio

[  2] STATEMENT: search algorithm is done, terminating search.
[  4] STATEMENT: search algorithm is done, terminating search.
[  7] STATEMENT: search algorithm is done, terminating search.
[  3] STATEMENT: search algorithm is done, terminating search.
[  6] STATEMENT: search algorithm is done, terminating search.
[  5] STATEMENT: search algorithm is done, terminating search.
[  1] STATEMENT: search algorithm is done, terminating search.
[  0] Utilization = 1.00 | pJ/Compute =  157.866 | L5[WIO] M20 F3 Q3 P218 T3 - L4[] S3 R3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =   93.845 | L5[WIO] M20 F3 Q3 T3 P218 - L4[I] S3 R3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =   78.087 | L5[WIO] M20 F3 Q3 T3 P218 - L4[WI] S3 R3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =   78.038 | L5[WIO] M20 F3 T3 Q3 P218 - L4[WI] S3 R3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[ 



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   55.285
468.59 8475840 1059480
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factoriza

[  1] STATEMENT: search algorithm is done, terminating search.
[  5] STATEMENT: search algorithm is done, terminating search.
[  2] STATEMENT: search algorithm is done, terminating search.
[  7] STATEMENT: search algorithm is done, terminating search.
[  3] STATEMENT: search algorithm is done, terminating search.
[  4] STATEMENT: search algorithm is done, terminating search.
[  6] STATEMENT: search algorithm is done, terminating search.
[  0] Utilization = 1.00 | pJ/Compute =  236.634 | L5[WIO] M20 C20 F3 Q3 P109 - L4[] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  220.818 | L5[WIO] M20 C20 F3 Q3 P109 - L4[W] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  181.984 | L5[WIO] M20 C20 F3 P109 Q3 - L4[I] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  166.169 | L5[WIO] M20 C20 F3 P109 Q3 - L4[WI] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utili



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =  116.458
1096.76 9417600 1177200
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factoriz

[  1] STATEMENT: search algorithm is done, terminating search.
[  2] STATEMENT: search algorithm is done, terminating search.
[  5] STATEMENT: search algorithm is done, terminating search.
[  6] STATEMENT: search algorithm is done, terminating search.
[  3] STATEMENT: search algorithm is done, terminating search.
[  4] STATEMENT: search algorithm is done, terminating search.
[  7] STATEMENT: search algorithm is done, terminating search.
[  0] Utilization = 1.00 | pJ/Compute =  162.404 | L5[WIO] M35 C20 P109 T3 - L4[] S3 R3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  153.362 | L5[WIO] M35 P109 C20 T3 - L4[] S3 R3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =   98.779 | L5[WIO] M35 C20 T3 P109 - L4[I] S3 R3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =   83.096 | L5[WIO] M35 C20 T3 P109 - L4[WI] S3 R3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utiliz



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   58.486
963.9 16480800 2060100
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factoriza

[  1] STATEMENT: search algorithm is done, terminating search.
[  7] STATEMENT: search algorithm is done, terminating search.
[  5] STATEMENT: search algorithm is done, terminating search.
[  3] STATEMENT: search algorithm is done, terminating search.
[  2] STATEMENT: search algorithm is done, terminating search.
[  6] STATEMENT: search algorithm is done, terminating search.
[  4] STATEMENT: search algorithm is done, terminating search.
[  0] Utilization = 1.00 | pJ/Compute =  237.555 | L5[WIO] M35 C35 P55 - L4[] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  222.017 | L5[WIO] M35 C35 P55 - L4[W] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  154.344 | L5[WIO] M35 P55 C35 - L4[] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  114.602 | L5[WIO] C35 P55 M35 - L4[I] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] STATEMENT: 200 suboptimal mappings 



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =  114.602
185.31 1617000 202125
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorizat

[  2] STATEMENT: search algorithm is done, terminating search.
[  4] STATEMENT: search algorithm is done, terminating search.
[  6] STATEMENT: search algorithm is done, terminating search.
[  1] STATEMENT: search algorithm is done, terminating search.
[  5] STATEMENT: search algorithm is done, terminating search.
[  3] STATEMENT: search algorithm is done, terminating search.
[  7] STATEMENT: search algorithm is done, terminating search.
[  0] Utilization = 1.00 | pJ/Compute =  237.555 | L5[WIO] M35 C35 P55 - L4[] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  222.017 | L5[WIO] M35 C35 P55 - L4[W] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  154.344 | L5[WIO] M35 P55 C35 - L4[] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  114.602 | L5[WIO] C35 P55 M35 - L4[I] S3 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] STATEMENT: 200 suboptimal mappings 



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =  114.602
185.31 1617000 202125
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension R = 1
  Factorization options along problem dimension S = 1
  Factorizat

[  4] STATEMENT: search algorithm is done, terminating search.
[  2] STATEMENT: search algorithm is done, terminating search.
[  7] STATEMENT: search algorithm is done, terminating search.
[  6] STATEMENT: search algorithm is done, terminating search.
[  3] STATEMENT: search algorithm is done, terminating search.
[  1] STATEMENT: search algorithm is done, terminating search.
[  5] STATEMENT: search algorithm is done, terminating search.
[  0] Utilization = 1.00 | pJ/Compute =  279.770 | L5[WIO] M35 C35 P28 - L4[] S2 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  264.517 | L5[WIO] M35 C35 P28 - L4[W] S2 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] Utilization = 1.00 | pJ/Compute =  154.953 | L5[WIO] M35 P28 C35 - L4[] S2 N8X - L3[] N1 - L2[W] N1 - L1[I] N1 - L0[O] N1 
[  0] STATEMENT: 200 suboptimal mappings found since the last upgrade, terminating search.




Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =  154.953
85.04 548800 68600
input file: tmp.yaml
  _______                __                
 /_  __(_)___ ___  ___  / /___  ____  ____ 
  / / / / __ `__ \/ _ \/ / __ \/ __ \/ __ \
 / / / / / / / / /  __/ / /_/ / /_/ / /_/ /
/_/ /_/_/ /_/ /_/\___/_/\____/\____/ .___/ 
                                  /_/      

Problem configuration complete.
execute:/usr/local/bin/accelergy tmp.yaml --oprefix timeloop-mapper. -o ./ > timeloop-mapper.accelergy.log 2>&1
Generate Accelergy ERT (energy reference table) to replace internal energy model.
Generate Accelergy ART (area reference table) to replace internal area model.
Architecture configuration complete.
Sparse optimization configuration complete.
Using threads = 8
Mapper configuration complete.
Initializing Index Factorization subspace.
  Factorization options along problem dimension N = 1
  Factorization options along problem dimension I = 5488
  Factorizat

[  1] Utilization = 1.00 | pJ/Compute =  154.304 | L5[WIO] O2 I5 - L4[] O8 I4 N8X - L3[] I49 - L2[W] O1 - L1[I] O1 - L0[O] O1 
[  7] Utilization = 1.00 | pJ/Compute =  154.304 | L5[WIO] O2 I5 - L4[] O8 I196 N8X - L3[] O1 - L2[W] O1 - L1[I] O1 - L0[O] O1 
[  1] Utilization = 1.00 | pJ/Compute =  153.275 | L5[WIO] O2 I5 - L4[O] O8 I4 N8X - L3[] I49 - L2[W] O1 - L1[I] O1 - L0[O] O1 
[  7] Utilization = 1.00 | pJ/Compute =  153.275 | L5[WIO] O2 I5 - L4[O] O8 I196 N8X - L3[] O1 - L2[W] O1 - L1[I] O1 - L0[O] O1 
[  7] Utilization = 1.00 | pJ/Compute =   44.332 | L5[WIO] O2 I5 - L4[O] I196 O8 N8X - L3[] O1 - L2[W] O1 - L1[I] O1 - L0[O] O1 
[  2] Utilization = 1.00 | pJ/Compute =  165.842 | L5[WIO] O2 I7 - L4[] O4 I7 N8X - L3[] O2 I20 - L2[W] O1 - L1[I] O1 - L0[O] O1 
[  2] Utilization = 1.00 | pJ/Compute =  102.242 | L5[WIO] O2 I7 - L4[] O4 I7 N8X - L3[I] O2 I20 - L2[W] O1 - L1[I] O1 - L0[O] O1 
[  2] Utilization = 1.00 | pJ/Compute =   91.298 | L5[WIO] O2 I7 - L4[] O4 I7 N8X - L3[IO] O2 I20 



Summary stats for best mapping found by mapper:
  Utilization = 1.00 | pJ/Compute =   34.520
4.33 125440 15680
layer types: ['conv3d', 'conv3d', 'conv3d', 'conv3d', 'conv3d', 'conv3d', 'linear']
energy: 2989.24 (layerwise: [468.59, 1096.76, 963.9, 185.31, 185.31, 85.04, 4.33])
mac: 38282480 (layerwise: [8475840, 9417600, 16480800, 1617000, 1617000, 548800, 125440])
param: 46120 (layerwise: [540, 1200, 18900, 3675, 3675, 2450, 15680])
cycle: 4785310 (layerwise: [1059480, 1177200, 2060100, 202125, 202125, 68600, 15680])
