In [27]:
import pandas as pd
import numpy as np
from loaders import *
import math
import random
from attn_masked import parse_timeloop_stats, aggregate_timeloop_runs
import os
import json

In [20]:
matrix_size = 2048 # CHANGE

dataflow_permuations = [['M', 'N', 'K'], ['M', 'K', 'N'], ['N', 'K', 'M'], ['N', 'M', 'K'], ['K', 'N', 'M'], ['K', 'M', 'N']]
'''
6 different dataflow permutations
9 components for dataflow
= 6^9 = 10077696 possible combinations
'''
# generate all the data flow permuations
def get_dataflow_permutations():
    dataflow_perumations = []
    
    for DRAM in dataflow_permuations:
        for ClusterArray in dataflow_permuations:
            for GLB_Cluster in dataflow_permuations:
                for glb in dataflow_permuations:
                    for PE_Cluster in dataflow_permuations:
                        for iact_spad in dataflow_permuations:
                            for weight_spad in dataflow_permuations:
                                for psum in dataflow_permuations:
                                    for reg in dataflow_permuations:
                                        permutation = {
                                            "DRAM": DRAM,
                                            "ClusterArray": ClusterArray,
                                            # "GLB_Cluster": GLB_Cluster,
                                            # "glb": glb,
                                            "PE_Cluster": PE_Cluster,
                                            "iact_spad": iact_spad,
                                            "weight_spad": weight_spad,
                                            "psum": psum,
                                            "reg": reg,
                                        }
                                        dataflow_perumations.append(permutation)
    return dataflow_perumations

# generate all the possible factor permutations for a specific rank (ie, M, N, or K)
def get_factor_permutations():
    factor_permutations = []
    iact_spad_depth = 16 # M, K
    weight_spad_depth = 192 # K, N
    psum_spad_depth = 32 # M, N

    max_K = int(max(math.log2(weight_spad_depth), math.log2(iact_spad_depth)))
    max_M = int(max(math.log2(psum_spad_depth), math.log2(iact_spad_depth)))
    max_N = int(max(math.log2(weight_spad_depth), math.log2(psum_spad_depth)))     
    
    for DRAM_M in [2**i for i in range(int(math.log2(matrix_size)) + 1)]:
        for DRAM_N in [2**i for i in range(int(math.log2(matrix_size)) + 1)]:
                for DRAM_K in [2**i for i in range(int(math.log2(matrix_size)) + 1)]:

                    for iact_spad_M in [2**i for i in range(max_M + 1)]:
                        for iact_spad_K in [2**j for j in range(max_K + 1)]:
                            iact_spad_N = 1  # Inputs → M & K only
                    
                            for weight_spad_K in [2**i for i in range(max_K + 1)]:
                                for weight_spad_N in [2**j for j in range(max_N + 1)]:
                                    weight_spad_M = 1  # Weights → K & N only
                    
                                    for psum_M in [2**i for i in range(max_M + 1)]:
                                        for psum_N in [2**j for j in range(max_N + 1)]:
                                            psum_K = 1  # Outputs → M & N only
                                            # tile size must fit into the iact_spad, weight_spad, and psum_spad
                                            if not (
                                                ((iact_spad_M * weight_spad_M * psum_M) * (iact_spad_K * weight_spad_K * psum_K) <=  iact_spad_depth) and
                                                ((iact_spad_K * weight_spad_K * psum_K) * (iact_spad_N * weight_spad_N * psum_N) <=  weight_spad_depth) and
                                                ((iact_spad_M * weight_spad_M * psum_M) * (iact_spad_N * weight_spad_N * psum_N) <=  psum_spad_depth)
                                            ):
                                                break

                                            if ((DRAM_M * iact_spad_M * weight_spad_M * psum_M == matrix_size) and
                                                (DRAM_N * iact_spad_N * weight_spad_N * psum_N == matrix_size) and
                                                (DRAM_K * iact_spad_K * weight_spad_K * psum_K == matrix_size)):
                                                permuation = {
                                                    "DRAM_M": DRAM_M,
                                                    "DRAM_N": DRAM_N,
                                                    "DRAM_K": DRAM_K,

                                                    "ClusterArray_M": 1,
                                                    "ClusterArray_N": 1,
                                                    "ClusterArray_K": 1,

                                                    # "GLB_Cluster_M": 1,
                                                    # "GLB_Cluster_N": 1,
                                                    # "GLB_Cluster_K": 1,

                                                    # "glb_M": 4,
                                                    # "glb_N": 4,
                                                    # "glb_K": 4,

                                                    "PE_Cluster_M": 1,
                                                    "PE_Cluster_N": 1,
                                                    "PE_Cluster_K": 1,

                                                    "iact_spad_M": iact_spad_M,
                                                    "iact_spad_N": iact_spad_N,
                                                    "iact_spad_K": iact_spad_K,

                                                    "weight_spad_M": weight_spad_M,
                                                    "weight_spad_N": weight_spad_N,
                                                    "weight_spad_K": weight_spad_K,

                                                    "psum_M": psum_M,
                                                    "psum_N": psum_N,
                                                    "psum_K": psum_K,

                                                    "reg_M": 1,
                                                    "reg_N": 1,
                                                    "reg_K": 1,
                                                }
                                                factor_permutations.append(permuation)                            
    return factor_permutations

In [21]:
dataflow_permutations = get_dataflow_permutations()
factor_permutations = get_factor_permutations()

In [22]:
def hash_config(config):
    return tuple((k, tuple(v) if isinstance(v, list) else v) for k, v in config.items())

def sample_config(dataflow_permutations, factor_permutations):
    factor_permutation = random.choice(factor_permutations)
    dataflow_permutation = random.choice(dataflow_permutations)
    
    config = dict(
        matrix_size_M_dim=matrix_size,
        matrix_size_K_dim=matrix_size,
        matrix_size_N_dim=matrix_size,
        
        DRAM_factor_M=factor_permutation["DRAM_M"],
        DRAM_factor_N=factor_permutation["DRAM_N"],
        DRAM_factor_K=factor_permutation["DRAM_K"],
        DRAM_permutation=dataflow_permutation["DRAM"],
    
        ClusterArray_factor_M=factor_permutation["ClusterArray_M"],
        ClusterArray_factor_N=factor_permutation["ClusterArray_N"],
        ClusterArray_factor_K=factor_permutation["ClusterArray_K"],
        ClusterArray_permutation=dataflow_permutation["ClusterArray"],
    
        # GLB_Cluster_factor_M=factor_permutation["GLB_Cluster_M"],
        # GLB_Cluster_factor_N=factor_permutation["GLB_Cluster_N"],
        # GLB_Cluster_factor_K=factor_permutation["GLB_Cluster_K"],
        # GLB_Cluster_permutation=dataflow_permutation["GLB_Cluster"],
    
        # glb_factor_M=factor_permutation["glb_M"],
        # glb_factor_N=factor_permutation["glb_N"],
        # glb_factor_K=factor_permutation["glb_K"],
        # glb_permutation=dataflow_permutation["glb"],
    
        PE_Cluster_factor_M=factor_permutation["PE_Cluster_M"],
        PE_Cluster_factor_N=factor_permutation["PE_Cluster_N"],
        PE_Cluster_factor_K=factor_permutation["PE_Cluster_K"],
        PE_Cluster_permutation=dataflow_permutation["PE_Cluster"],
    
        iact_spad_factor_M=factor_permutation["iact_spad_M"],
        iact_spad_factor_N=factor_permutation["iact_spad_N"],
        iact_spad_factor_K=factor_permutation["iact_spad_K"],
        iact_spad_permutation=dataflow_permutation["iact_spad"],
    
        weight_spad_factor_M=factor_permutation["weight_spad_M"],
        weight_spad_factor_N=factor_permutation["weight_spad_N"],
        weight_spad_factor_K=factor_permutation["weight_spad_K"],
        weight_spad_permutation=dataflow_permutation["weight_spad"],
    
        psum_factor_M=factor_permutation["psum_M"],
        psum_factor_N=factor_permutation["psum_N"],
        psum_factor_K=factor_permutation["psum_K"],
        psum_permutation=dataflow_permutation["psum"],
    
        reg_factor_M=factor_permutation["reg_M"],
        reg_factor_N=factor_permutation["reg_N"],
        reg_factor_K=factor_permutation["reg_K"],
        reg_permutation=dataflow_permutation["reg"],
    
        density_weights=1,
        density_inputs=1
    )
    return config

In [35]:
def evaluate_config(config):
    out = run_timeloop_model(
        config,
        problem='designs/baseline/baseline_problem.yaml',
        mapping='designs/baseline/baseline_mapping.yaml',
        sparse_optimizations='designs/baseline/baseline_sparse_opt.yaml', 
    )
    stats = open('./output_dir/timeloop-model.stats.txt', 'r').read()
    return stats

def get_min_config_attention(attributes, num_of_random_samples = 1):
    print(f"optimizing over: {attributes}, num_of_random_samples: {num_of_random_samples}")
    visited = set()
    
    curr_opt = {}
    for attr in attributes: 
        if attr in ['total_energy_uJ', 'memory_traffic']: 
            curr_opt[attr] = float('inf')
        elif attr in ['utilization_percent', 'computes_per_cycle']: 
            curr_opt[attr] = -float('inf')
        else: 
            raise ValueError(f"Unexpected Attribute {attr}")
            
    opt_config =  {attr: None for attr in attributes}
    
    # uses random sampling instead of a for loop
    for sample in range(num_of_random_samples):
        # print("\nsample: ", sample, "curr_min", curr_min)
        
        config = sample_config(dataflow_permutations, factor_permutations)
        while hash_config(config) in visited:
            config = sample_config(dataflow_permutations, factor_permutations)
        visited.add(hash_config(config))
        # print("config: ", config)

        q1_config, q2_config, q4_config = config.copy(), config.copy(), config.copy()
        q1_config['density_inputs'] = 0.5
        q2_config['density_inputs'] = 0.001
        q4_config['density_inputs'] = 1.0

        q1_stats = evaluate_config(q1_config)
        q2_stats = evaluate_config(q2_config)
        q4_stats = evaluate_config(q4_config)
        
        all_stats = [q1_stats, q2_stats, q1_stats, q4_stats]
        
        parsed_runs = [parse_timeloop_stats(stats) for stats in all_stats]
        combined_stats = aggregate_timeloop_runs(parsed_runs)

        for attr in attributes: 
            if attr in ['total_energy_uJ', 'computes_per_cycle']:
                val = combined_stats[attr]
            elif attr == 'utilization_percent': 
                val = combined_stats[attr]['overall']
            elif attr == 'memory_traffic': 
                val = sum(combined_stats[attr].values())
            else: 
                raise ValueError(f"{attr} not a valid attribute.")

            if attr in ['total_energy_uJ', 'memory_traffic']: 
                if val < curr_opt[attr]:
                    opt_config[attr] = config
                    curr_opt[attr] = val
            elif attr in ['utilization_percent', 'computes_per_cycle']: 
                if val > curr_opt[attr]: 
                    opt_config[attr] = config
                    curr_opt[attr] = val

    return opt_config, curr_opt

In [34]:
num_of_random_samples = 10
attributes = ['total_energy_uJ', 'utilization_percent', 'computes_per_cycle', 'memory_traffic']
min_config, curr_min = get_min_config_attention(attributes, num_of_random_samples)
print("==================OUTPUT==================")
print("min_config: ", min_config)
print("curr_min: ", curr_min)

optimizing over: ['total_energy_uJ', 'utilization_percent', 'computes_per_cycle', 'memory_traffic'], num_of_random_samples: 10
[INFO] 2025-05-02 23:08:03,897 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:06,939 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:09,938 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


parsed_runs[0]={'energy_per_compute_fJ': {'MAC': 15574.6, 'reg': 35.93, 'psum_spad': 745.16, 'weight_spad': 984.13, 'iact_spad': 51.74, 'DRAM': 68000.0, 'Total': 85391.56}, 'total_energy_uJ': 366753.97, 'memory_energy_pJ': {'MAC': 66892397648.28, 'reg': 154334925.32, 'psum_spad': 3200435812.83, 'weight_spad': 4226753976.34, 'iact_spad': 222215004.38, 'DRAM': 268435456.0}, 'memory_traffic': {'reg': 6442450944, 'psum_spad': 19318964224, 'weight_spad': 8594128896, 'iact_spad': 4831838208, 'DRAM': 4831838208}, 'utilization_percent': {'MAC': 1.0, 'overall': 0.39}, 'computes_per_cycle': 1.0, 'actual_computes': 4294967296, 'cycles': 4294967296}
combined_stats={'total_energy_uJ': 1467198.14, 'memory_energy_pJ': defaultdict(<class 'float'>, {'MAC': 267703375394.77, 'reg': 617648371.13, 'psum_spad': 12806679733.43, 'weight_spad': 16915452273.23, 'iact_spad': 889304447.54, 'DRAM': 1073741824.0}), 'memory_traffic': defaultdict(<class 'int'>, {'reg': 25782688677, 'psum_spad': 77275856896, 'weight_s

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:15,970 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:18,967 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


parsed_runs[0]={'energy_per_compute_fJ': {'MAC': 15574.6, 'reg': 24.14, 'psum_spad': 1257.6, 'weight_spad': 858.75, 'iact_spad': 11.0, 'DRAM': 287968.75, 'Total': 305694.83}, 'total_energy_uJ': 1312949.31, 'memory_energy_pJ': {'MAC': 66892397648.28, 'reg': 103689852.08, 'psum_spad': 5401318789.22, 'weight_spad': 3688258068.28, 'iact_spad': 47223345.36, 'DRAM': 137438953472.0}, 'memory_traffic': {'reg': 4831838208, 'psum_spad': 25761415168, 'weight_spad': 10737418240, 'iact_spad': 1077936128, 'DRAM': 19327352832}, 'utilization_percent': {'MAC': 1.0, 'overall': 0.39}, 'computes_per_cycle': 1.0, 'actual_computes': 4294967296, 'cycles': 4294967296}
combined_stats={'total_energy_uJ': 5251940.53, 'memory_energy_pJ': defaultdict(<class 'float'>, {'MAC': 267703375394.77, 'reg': 414966788.02, 'psum_spad': 21610211638.99, 'weight_spad': 14757035612.33, 'iact_spad': 188987828.12, 'DRAM': 549755813888.0}), 'memory_traffic': defaultdict(<class 'int'>, {'reg': 19337016508, 'psum_spad': 103045660672,

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:28,176 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:31,174 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


parsed_runs[0]={'energy_per_compute_fJ': {'MAC': 15574.6, 'reg': 22.18, 'psum_spad': 1257.6, 'weight_spad': 2036.82, 'iact_spad': 13.48, 'DRAM': 387937.5, 'Total': 406842.16}, 'total_energy_uJ': 1747373.79, 'memory_energy_pJ': {'MAC': 66892397648.28, 'reg': 95249006.54, 'psum_spad': 5401318789.22, 'weight_spad': 8748023748.23, 'iact_spad': 57873825.32, 'DRAM': 549755813888.0}, 'memory_traffic': {'reg': 4563402752, 'psum_spad': 25761415168, 'weight_spad': 17179869184, 'iact_spad': 1073741824, 'DRAM': 26302480384}, 'utilization_percent': {'MAC': 1.0, 'overall': 0.39}, 'computes_per_cycle': 1.0, 'actual_computes': 4294967296, 'cycles': 4294967296}
combined_stats={'total_energy_uJ': 6989672.5600000005, 'memory_energy_pJ': defaultdict(<class 'float'>, {'MAC': 267703375394.77, 'reg': 381186524.1700001, 'psum_spad': 21610211638.99, 'weight_spad': 34996098332.13, 'iact_spad': 231611048.93, 'DRAM': 2199023255552.0}), 'memory_traffic': defaultdict(<class 'int'>, {'reg': 18262737813, 'psum_spad':

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:37,205 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:40,212 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


parsed_runs[0]={'energy_per_compute_fJ': {'MAC': 15574.6, 'reg': 21.19, 'psum_spad': 1257.6, 'weight_spad': 3805.51, 'iact_spad': 2.8, 'DRAM': 383968.75, 'Total': 404630.45}, 'total_energy_uJ': 1737874.53, 'memory_energy_pJ': {'MAC': 66892397648.28, 'reg': 91028583.77, 'psum_spad': 5401318789.22, 'weight_spad': 16344501668.81, 'iact_spad': 12007378.41, 'DRAM': 549755813888.0}, 'memory_traffic': {'reg': 4429185024, 'psum_spad': 25761415168, 'weight_spad': 17179869184, 'iact_spad': 272629760, 'DRAM': 25769803776}, 'utilization_percent': {'MAC': 1.0, 'overall': 0.39}, 'computes_per_cycle': 1.0, 'actual_computes': 4294967296, 'cycles': 4294967296}
combined_stats={'total_energy_uJ': 6951643.91, 'memory_energy_pJ': defaultdict(<class 'float'>, {'MAC': 267703375394.77, 'reg': 364296392.25, 'psum_spad': 21610211638.99, 'weight_spad': 65384595948.06, 'iact_spad': 48053528.41, 'DRAM': 2199023255552.0}), 'memory_traffic': defaultdict(<class 'int'>, {'reg': 17725598466, 'psum_spad': 103045660672, 

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:46,161 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:49,142 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


parsed_runs[0]={'energy_per_compute_fJ': {'MAC': 15574.6, 'reg': 22.18, 'psum_spad': 1257.6, 'weight_spad': 1251.44, 'iact_spad': 13.48, 'DRAM': 323937.5, 'Total': 342056.79}, 'total_energy_uJ': 1469122.71, 'memory_energy_pJ': {'MAC': 66892397648.28, 'reg': 95249006.54, 'psum_spad': 5401318789.22, 'weight_spad': 5374846628.27, 'iact_spad': 57873825.32, 'DRAM': 274877906944.0}, 'memory_traffic': {'reg': 4563402752, 'psum_spad': 25761415168, 'weight_spad': 12884901888, 'iact_spad': 1073741824, 'DRAM': 22007513088}, 'utilization_percent': {'MAC': 1.0, 'overall': 0.39}, 'computes_per_cycle': 1.0, 'actual_computes': 4294967296, 'cycles': 4294967296}
combined_stats={'total_energy_uJ': 5876668.220000001, 'memory_energy_pJ': defaultdict(<class 'float'>, {'MAC': 267703375394.77, 'reg': 381186524.1700001, 'psum_spad': 21610211638.99, 'weight_spad': 21503389852.28, 'iact_spad': 231611048.93, 'DRAM': 1099511627776.0}), 'memory_traffic': defaultdict(<class 'int'>, {'reg': 18262737813, 'psum_spad': 

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:55,150 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:08:58,161 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


parsed_runs[0]={'energy_per_compute_fJ': {'MAC': 15574.6, 'reg': 51.66, 'psum_spad': 1257.6, 'weight_spad': 1251.44, 'iact_spad': 215.6, 'DRAM': 383937.5, 'Total': 402288.39}, 'total_energy_uJ': 1727815.47, 'memory_energy_pJ': {'MAC': 66892397648.28, 'reg': 221861689.64, 'psum_spad': 5401318789.22, 'weight_spad': 5374846628.27, 'iact_spad': 925981205.12, 'DRAM': 274877906944.0}, 'memory_traffic': {'reg': 8589934592, 'psum_spad': 25761415168, 'weight_spad': 12884901888, 'iact_spad': 17179869184, 'DRAM': 30060576768}, 'utilization_percent': {'MAC': 1.0, 'overall': 0.39}, 'computes_per_cycle': 1.0, 'actual_computes': 4294967296, 'cycles': 4294967296}
combined_stats={'total_energy_uJ': 6911956.649999999, 'memory_energy_pJ': defaultdict(<class 'float'>, {'MAC': 267703375394.77, 'reg': 887890481.93, 'psum_spad': 21610211638.99, 'weight_spad': 21503389852.28, 'iact_spad': 3705776782.8900003, 'DRAM': 1099511627776.0}), 'memory_traffic': defaultdict(<class 'int'>, {'reg': 34376918237, 'psum_spa

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:09:04,142 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:09:07,153 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


parsed_runs[0]={'energy_per_compute_fJ': {'MAC': 15574.6, 'reg': 35.93, 'psum_spad': 915.97, 'weight_spad': 2036.82, 'iact_spad': 107.8, 'DRAM': 287937.5, 'Total': 306608.62}, 'total_energy_uJ': 1316874.01, 'memory_energy_pJ': {'MAC': 66892397648.28, 'reg': 154334925.32, 'psum_spad': 3934063471.62, 'weight_spad': 8748023748.23, 'iact_spad': 462990602.56, 'DRAM': 549755813888.0}, 'memory_traffic': {'reg': 6442450944, 'psum_spad': 21466447872, 'weight_spad': 17179869184, 'iact_spad': 8589934592, 'DRAM': 21470642176}, 'utilization_percent': {'MAC': 1.0, 'overall': 0.39}, 'computes_per_cycle': 1.0, 'actual_computes': 4294967296, 'cycles': 4294967296}
combined_stats={'total_energy_uJ': 5267914.879999999, 'memory_energy_pJ': defaultdict(<class 'float'>, {'MAC': 267703375394.77, 'reg': 617648371.13, 'psum_spad': 15741190368.61, 'weight_spad': 34996098332.13, 'iact_spad': 1852888391.4499998, 'DRAM': 2199023255552.0}), 'memory_traffic': defaultdict(<class 'int'>, {'reg': 25782688677, 'psum_spad

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:09:13,116 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:09:16,099 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


parsed_runs[0]={'energy_per_compute_fJ': {'MAC': 15574.6, 'reg': 51.66, 'psum_spad': 1257.6, 'weight_spad': 466.83, 'iact_spad': 119.49, 'DRAM': 272000.0, 'Total': 289470.17}, 'total_energy_uJ': 1243264.92, 'memory_energy_pJ': {'MAC': 66892397648.28, 'reg': 221861689.64, 'psum_spad': 5401318789.22, 'weight_spad': 2004963626.58, 'iact_spad': 513223036.82, 'DRAM': 268435456.0}, 'memory_traffic': {'reg': 8589934592, 'psum_spad': 25761415168, 'weight_spad': 8594128896, 'iact_spad': 10737418240, 'DRAM': 19327352832}, 'utilization_percent': {'MAC': 1.0, 'overall': 0.39}, 'computes_per_cycle': 1.0, 'actual_computes': 4294967296, 'cycles': 4294967296}
combined_stats={'total_energy_uJ': 4973341.32, 'memory_energy_pJ': defaultdict(<class 'float'>, {'MAC': 267703375394.77, 'reg': 887890481.93, 'psum_spad': 21610211638.99, 'weight_spad': 8023857845.53, 'iact_spad': 2053918593.3400002, 'DRAM': 1073741824.0}), 'memory_traffic': defaultdict(<class 'int'>, {'reg': 34376918237, 'psum_spad': 10304566067

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:09:22,149 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:09:25,132 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


parsed_runs[0]={'energy_per_compute_fJ': {'MAC': 15574.6, 'reg': 51.66, 'psum_spad': 1257.6, 'weight_spad': 466.83, 'iact_spad': 215.6, 'DRAM': 320000.0, 'Total': 337566.28}, 'total_energy_uJ': 1449836.11, 'memory_energy_pJ': {'MAC': 66892397648.28, 'reg': 221861689.64, 'psum_spad': 5401318789.22, 'weight_spad': 2004963626.58, 'iact_spad': 925981205.12, 'DRAM': 268435456.0}, 'memory_traffic': {'reg': 8589934592, 'psum_spad': 25761415168, 'weight_spad': 8594128896, 'iact_spad': 17179869184, 'DRAM': 25769803776}, 'utilization_percent': {'MAC': 1.0, 'overall': 0.39}, 'computes_per_cycle': 1.0, 'actual_computes': 4294967296, 'cycles': 4294967296}
combined_stats={'total_energy_uJ': 5800039.22, 'memory_energy_pJ': defaultdict(<class 'float'>, {'MAC': 267703375394.77, 'reg': 887890481.93, 'psum_spad': 21610211638.99, 'weight_spad': 8023857845.53, 'iact_spad': 3705776782.8900003, 'DRAM': 1073741824.0}), 'memory_traffic': defaultdict(<class 'int'>, {'reg': 34376918237, 'psum_spad': 103045660672

INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:09:31,150 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


[INFO] 2025-05-02 23:09:34,197 - pytimeloop.accelergy_interface - Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


INFO:pytimeloop.accelergy_interface:Running Accelergy with command: accelergy /home/workspace/final_project/output_dir/parsed-processed-input.yaml -o ./output_dir/ -v


parsed_runs[0]={'energy_per_compute_fJ': {'MAC': 15574.6, 'reg': 22.18, 'psum_spad': 1257.6, 'weight_spad': 2036.82, 'iact_spad': 13.48, 'DRAM': 387937.5, 'Total': 406842.16}, 'total_energy_uJ': 1747373.79, 'memory_energy_pJ': {'MAC': 66892397648.28, 'reg': 95249006.54, 'psum_spad': 5401318789.22, 'weight_spad': 8748023748.23, 'iact_spad': 57873825.32, 'DRAM': 549755813888.0}, 'memory_traffic': {'reg': 4563402752, 'psum_spad': 25761415168, 'weight_spad': 17179869184, 'iact_spad': 1073741824, 'DRAM': 26302480384}, 'utilization_percent': {'MAC': 1.0, 'overall': 0.39}, 'computes_per_cycle': 1.0, 'actual_computes': 4294967296, 'cycles': 4294967296}
combined_stats={'total_energy_uJ': 6989672.5600000005, 'memory_energy_pJ': defaultdict(<class 'float'>, {'MAC': 267703375394.77, 'reg': 381186524.1700001, 'psum_spad': 21610211638.99, 'weight_spad': 34996098332.13, 'iact_spad': 231611048.93, 'DRAM': 2199023255552.0}), 'memory_traffic': defaultdict(<class 'int'>, {'reg': 18262737813, 'psum_spad':

In [None]:
folder = "data"
name = "saniya_1" #Change
os.makedirs(f"{folder}", exist_ok=True)

with open(f"{folder}/200_min_config_{name}", "w") as f:
    json.dump(min_config, f, indent=2)
with open(f"{folder}/200_curr_min_{name}", "w") as f:
    json.dump(curr_min, f, indent=2)

In [None]:
# !timeloop model ./output_dir/parsed-processed-input.yaml