In [296]:
import pandas as pd
import numpy as np
from loaders import *
import math
import random
from attn_masked import parse_timeloop_stats, aggregate_timeloop_runs

In [297]:
matrix_size = 2048 # CHANGE

dataflow_permuations = [['M', 'N', 'K'], ['M', 'K', 'N'], ['N', 'K', 'M'], ['N', 'M', 'K'], ['K', 'N', 'M'], ['K', 'M', 'N']]
'''
6 different dataflow permutations
9 components for dataflow
= 6^9 = 10077696 possible combinations
'''
# generate all the data flow permuations
def get_dataflow_permutations():
    dataflow_perumations = []
    
    for DRAM in dataflow_permuations:
        for ClusterArray in dataflow_permuations:
            for GLB_Cluster in dataflow_permuations:
                for glb in dataflow_permuations:
                    for PE_Cluster in dataflow_permuations:
                        for iact_spad in dataflow_permuations:
                            for weight_spad in dataflow_permuations:
                                for psum in dataflow_permuations:
                                    for reg in dataflow_permuations:
                                        permutation = {
                                            "DRAM": DRAM,
                                            "ClusterArray": ClusterArray,
                                            # "GLB_Cluster": GLB_Cluster,
                                            # "glb": glb,
                                            "PE_Cluster": PE_Cluster,
                                            "iact_spad": iact_spad,
                                            "weight_spad": weight_spad,
                                            "psum": psum,
                                            "reg": reg,
                                        }
                                        dataflow_perumations.append(permutation)
    return dataflow_perumations

# generate all the possible factor permutations for a specific rank (ie, M, N, or K)
def get_factor_permutations():
    factor_permutations = []
    iact_spad_depth = 16 # M, K
    weight_spad_depth = 192 # K, N
    psum_spad_depth = 32 # M, N

    max_K = int(max(math.log2(weight_spad_depth), math.log2(iact_spad_depth)))
    max_M = int(max(math.log2(psum_spad_depth), math.log2(iact_spad_depth)))
    max_N = int(max(math.log2(weight_spad_depth), math.log2(psum_spad_depth)))     
    
    for DRAM_M in [2**i for i in range(int(math.log2(matrix_size)) + 1)]:
        for DRAM_N in [2**i for i in range(int(math.log2(matrix_size)) + 1)]:
                for DRAM_K in [2**i for i in range(int(math.log2(matrix_size)) + 1)]:

                    for iact_spad_M in [2**i for i in range(max_M + 1)]:
                        for iact_spad_K in [2**j for j in range(max_K + 1)]:
                            iact_spad_N = 1  # Inputs → M & K only
                    
                            for weight_spad_K in [2**i for i in range(max_K + 1)]:
                                for weight_spad_N in [2**j for j in range(max_N + 1)]:
                                    weight_spad_M = 1  # Weights → K & N only
                    
                                    for psum_M in [2**i for i in range(max_M + 1)]:
                                        for psum_N in [2**j for j in range(max_N + 1)]:
                                            psum_K = 1  # Outputs → M & N only
                                            # tile size must fit into the iact_spad, weight_spad, and psum_spad
                                            if not (
                                                ((iact_spad_M * weight_spad_M * psum_M) * (iact_spad_K * weight_spad_K * psum_K) <=  iact_spad_depth) and
                                                ((iact_spad_K * weight_spad_K * psum_K) * (iact_spad_N * weight_spad_N * psum_N) <=  weight_spad_depth) and
                                                ((iact_spad_M * weight_spad_M * psum_M) * (iact_spad_N * weight_spad_N * psum_N) <=  psum_spad_depth)
                                            ):
                                                break

                                            if ((DRAM_M * iact_spad_M * weight_spad_M * psum_M == matrix_size) and
                                                (DRAM_N * iact_spad_N * weight_spad_N * psum_N == matrix_size) and
                                                (DRAM_K * iact_spad_K * weight_spad_K * psum_K == matrix_size)):
                                                permuation = {
                                                    "DRAM_M": DRAM_M,
                                                    "DRAM_N": DRAM_N,
                                                    "DRAM_K": DRAM_K,

                                                    "ClusterArray_M": 1,
                                                    "ClusterArray_N": 1,
                                                    "ClusterArray_K": 1,

                                                    # "GLB_Cluster_M": 1,
                                                    # "GLB_Cluster_N": 1,
                                                    # "GLB_Cluster_K": 1,

                                                    # "glb_M": 4,
                                                    # "glb_N": 4,
                                                    # "glb_K": 4,

                                                    "PE_Cluster_M": 1,
                                                    "PE_Cluster_N": 1,
                                                    "PE_Cluster_K": 1,

                                                    "iact_spad_M": iact_spad_M,
                                                    "iact_spad_N": iact_spad_N,
                                                    "iact_spad_K": iact_spad_K,

                                                    "weight_spad_M": weight_spad_M,
                                                    "weight_spad_N": weight_spad_N,
                                                    "weight_spad_K": weight_spad_K,

                                                    "psum_M": psum_M,
                                                    "psum_N": psum_N,
                                                    "psum_K": psum_K,

                                                    "reg_M": 1,
                                                    "reg_N": 1,
                                                    "reg_K": 1,
                                                }
                                                factor_permutations.append(permuation)                            
    return factor_permutations

In [298]:
dataflow_permutations = get_dataflow_permutations()
factor_permutations = get_factor_permutations()

In [299]:
def hash_config(config):
    return tuple((k, tuple(v) if isinstance(v, list) else v) for k, v in config.items())

def sample_config(dataflow_permutations, factor_permutations):
    factor_permutation = random.choice(factor_permutations)
    dataflow_permutation = random.choice(dataflow_permutations)
    
    config = dict(
        matrix_size_M_dim=matrix_size,
        matrix_size_K_dim=matrix_size,
        matrix_size_N_dim=matrix_size,
        
        DRAM_factor_M=factor_permutation["DRAM_M"],
        DRAM_factor_N=factor_permutation["DRAM_N"],
        DRAM_factor_K=factor_permutation["DRAM_K"],
        DRAM_permutation=dataflow_permutation["DRAM"],
    
        ClusterArray_factor_M=factor_permutation["ClusterArray_M"],
        ClusterArray_factor_N=factor_permutation["ClusterArray_N"],
        ClusterArray_factor_K=factor_permutation["ClusterArray_K"],
        ClusterArray_permutation=dataflow_permutation["ClusterArray"],
    
        # GLB_Cluster_factor_M=factor_permutation["GLB_Cluster_M"],
        # GLB_Cluster_factor_N=factor_permutation["GLB_Cluster_N"],
        # GLB_Cluster_factor_K=factor_permutation["GLB_Cluster_K"],
        # GLB_Cluster_permutation=dataflow_permutation["GLB_Cluster"],
    
        # glb_factor_M=factor_permutation["glb_M"],
        # glb_factor_N=factor_permutation["glb_N"],
        # glb_factor_K=factor_permutation["glb_K"],
        # glb_permutation=dataflow_permutation["glb"],
    
        PE_Cluster_factor_M=factor_permutation["PE_Cluster_M"],
        PE_Cluster_factor_N=factor_permutation["PE_Cluster_N"],
        PE_Cluster_factor_K=factor_permutation["PE_Cluster_K"],
        PE_Cluster_permutation=dataflow_permutation["PE_Cluster"],
    
        iact_spad_factor_M=factor_permutation["iact_spad_M"],
        iact_spad_factor_N=factor_permutation["iact_spad_N"],
        iact_spad_factor_K=factor_permutation["iact_spad_K"],
        iact_spad_permutation=dataflow_permutation["iact_spad"],
    
        weight_spad_factor_M=factor_permutation["weight_spad_M"],
        weight_spad_factor_N=factor_permutation["weight_spad_N"],
        weight_spad_factor_K=factor_permutation["weight_spad_K"],
        weight_spad_permutation=dataflow_permutation["weight_spad"],
    
        psum_factor_M=factor_permutation["psum_M"],
        psum_factor_N=factor_permutation["psum_N"],
        psum_factor_K=factor_permutation["psum_K"],
        psum_permutation=dataflow_permutation["psum"],
    
        reg_factor_M=factor_permutation["reg_M"],
        reg_factor_N=factor_permutation["reg_N"],
        reg_factor_K=factor_permutation["reg_K"],
        reg_permutation=dataflow_permutation["reg"],
    
        density_weights=1,
        density_inputs=1
    )
    return config

In [300]:
def evaluate_config(config):
    out = run_timeloop_model(
        config,
        problem='designs/baseline/baseline_problem.yaml',
        mapping='designs/baseline/baseline_mapping.yaml',
        sparse_optimizations='designs/baseline/baseline_sparse_opt.yaml', 
    )
    stats = open('./output_dir/timeloop-model.stats.txt', 'r').read()
    return stats

def get_min_config_attention(atribute, num_of_random_samples = 1):
    print("minimizing: ", atribute, "num_of_random_samples: ", num_of_random_samples)
    visited = set()
    curr_min = float('inf')
    min_config =  None
    
    # uses random sampling instead of a for loop
    for sample in range(num_of_random_samples):
        print("\nsample: ", sample, "curr_min", curr_min)
        
        config = sample_config(dataflow_permutations, factor_permutations)
        while hash_config(config) in visited:
            config = sample_config(dataflow_permutations, factor_permutations)
        visited.add(hash_config(config))
        print("config: ", config)

        q1_config, q2_config, q4_config = config.copy(), config.copy(), config.copy()
        q1_config['density_inputs'] = 0.5
        q2_config['density_inputs'] = 0.001
        q4_config['density_inputs'] = 1.0

        q1_stats = evaluate_config(q1_config)
        q2_stats = evaluate_config(q2_config)
        q4_stats = evaluate_config(q4_config)
        
        all_stats = [q1_stats, q2_stats, q1_stats, q4_stats]
        parsed_runs = [parse_timeloop_stats(stats) for stats in all_stats]
        combined_stats = aggregate_timeloop_runs(parsed_runs)
        
        if combined_stats[atribute] < curr_min:
            min_config = config
            curr_min = combined_stats[atribute]
        
    return min_config 

In [301]:
num_of_random_samples = 10
atribute = 'total_energy_uJ'
min_config = get_min_config_attention(atribute, num_of_random_samples)
print("min_config: ", min_config)

minimizing:  total_energy_uJ num_of_random_samples:  10

sample:  0 curr_min inf
config:  {'matrix_size_M_dim': 2048, 'matrix_size_K_dim': 2048, 'matrix_size_N_dim': 2048, 'DRAM_factor_M': 1024, 'DRAM_factor_N': 128, 'DRAM_factor_K': 2048, 'DRAM_permutation': ['K', 'N', 'M'], 'ClusterArray_factor_M': 1, 'ClusterArray_factor_N': 1, 'ClusterArray_factor_K': 1, 'ClusterArray_permutation': ['M', 'K', 'N'], 'PE_Cluster_factor_M': 1, 'PE_Cluster_factor_N': 1, 'PE_Cluster_factor_K': 1, 'PE_Cluster_permutation': ['M', 'K', 'N'], 'iact_spad_factor_M': 2, 'iact_spad_factor_N': 1, 'iact_spad_factor_K': 1, 'iact_spad_permutation': ['K', 'N', 'M'], 'weight_spad_factor_M': 1, 'weight_spad_factor_N': 1, 'weight_spad_factor_K': 1, 'weight_spad_permutation': ['N', 'K', 'M'], 'psum_factor_M': 1, 'psum_factor_N': 16, 'psum_factor_K': 1, 'psum_permutation': ['K', 'M', 'N'], 'reg_factor_M': 1, 'reg_factor_N': 1, 'reg_factor_K': 1, 'reg_permutation': ['N', 'K', 'M'], 'density_weights': 1, 'density_inputs': 

ValueError: Error loading YAML file /home/workspace/final_project/top.yaml.jinja2. Error loading YAML file /home/workspace/final_project/designs/baseline/baseline_mapping.yaml. 'GLB_Cluster_factor_M' is undefined

In [None]:
!timeloop model ./output_dir/parsed-processed-input.yaml