<a href="https://colab.research.google.com/github/mattanova/ELE6310E-Assignments/blob/main/HW2_Q1_MN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Loading problem data**

In [None]:
!git clone https://github.com/RedaBensaidDS/ELE6310E.git

In [None]:
%cd ELE6310E/A2/Q1

/content/ELE6310E/A2/Q1


# **Functions and classes**

In [None]:
import yaml
def read_yaml(path):
    """
    This function reads the YAML files and loads
    their content
    """
    with open(path,'r') as file:
        content = yaml.safe_load(file)
    return content

In [None]:
class main_memory:
    """
    > main memory is initialized based on the architecture file
    > every time .read / .write are performed on memory, the counter
    goes up by one
    > since main memory is initiallized with all the data that will
    take space in it, checking fot space limitation is done via
    fill, for the first time that memory is filled
    > create_space returns the overall capacity of memory for data (depth)
    and the parallel read/write process numbers it can perform (word_bits)
    """

    def __init__(self, architecture):
        self.arch = architecture
        self.capacity, self.word_bits = self.create_space()
        self.read_counter = 0
        self.write_counter = 0

    def create_space(self):
        capacity = self.arch['architecture']['subtree'][0]['local'][0]['attributes']['depth']
        width = self.arch['architecture']['subtree'][0]['local'][0]['attributes']['width']
        block_size = self.arch['architecture']['subtree'][0]['local'][0]['attributes']['block-size']
        word_bits = width * block_size
        return capacity, word_bits

    def fill(self, blocks):
        assert blocks <= self.capacity , "Not enough space"

    def read(self):
        self.read_counter += 1

    def write(self):
        self.write_counter += 1

In [None]:
class buffer:
    """
    > buffer is initialized based on the architecture file
    > every time .read / .write are performed on memory, the counter
    goes up by one
    > create_space returns the overall capacity of the buffer for data (depth)
    and the parallel read/write process numbers it can perform (word_bits)
    > every time something is written in buffer, the available space is checked
    based on capacity, and if the buffer does not have space FIFO_remove, removes
    the oldest element.
    > If the oldest element is an output, FIFO takes care to write it back to
    main memory.
    > In buffer read, the data points (two-dimensional) are added to the buffer
    space, to be recognizable if requested again by MAC

    """
    def __init__(self,architecture,problem, main):
        self.main = main
        self.arch = architecture
        self.prob = problem
        self.space, self.capacity, self.word_bits = self.create_space()
        self.read_counter = 0
        self.write_counter = 0

    def create_space(self):
        capacity = self.arch['architecture']['subtree'][0]['subtree'][0]['local'][0]['attributes']['depth']
        width = self.arch['architecture']['subtree'][0]['subtree'][0]['local'][0]['attributes']['width']
        block_size = self.arch['architecture']['subtree'][0]['subtree'][0]['local'][0]['attributes']['block-size']
        word_bits = width * block_size
        space = []
        return space, capacity, word_bits

    def FIFO_remove(self):
        # Also checking to see if the element is an output
        # if yes, writing it to main memory before removing
        pair1,pair2 = self.space[0]
        sub_space = [pair1[0],pair2[0]]
        if 'M'and 'P' in sub_space:
            self.main.write()
        self.space.pop(0)

    def write(self, dim1, index1, dim2, index2):
        if len(self.space)>=self.capacity:
            self.FIFO_remove()
        self.write_counter+=1
        self.space.append({(dim1, index1), (dim2,index2)})


    def read(self, dim1, index1, dim2, index2):
        self.read_counter+=1
        if {(dim1, index1), (dim2,index2)} not in self.space:
            self.main.read()
            self.write(dim1, index1, dim2, index2)
            self.read_counter+=1
        # if the block does not exists in the buffer,
        # it should be taken from main memory and written
        # in the buffer


In [None]:
class mac:
    """
    > MAC is initialized based on the architecture file
    > for every operation, mac_usage goes up by one
    > create_space returns the data_width for every MAC operation
    """
    def __init__(self,architecture):
        self.arch = architecture
        self.data_width = self.create_space()
        self.mac_counter = 0

    def create_space(self):
        data_width = self.arch['architecture']['subtree'][0]['subtree'][0]['local'][1]['attributes']['datawidth']
        return data_width

    def perform(self):
        self.mac_counter+=1

In [None]:
def energy_calculator(main,buff,mac):
    """
    This function can be adjusted to represent the
    energy consumption of any given hardware by changing
    the numerical values in the multiplication
    """
    main_read = main.word_bits * main.read_counter * 7.95
    main_write = main.word_bits * main.write_counter * 5.45
    buffer_read = buff.word_bits *  buff.read_counter * 0.42
    buffer_write = buff.word_bits *  buff.write_counter * 0.42
    mac_use = mac.data_width * mac.mac_counter * 0.56
    return main_read, main_write, buffer_read, buffer_write, mac_use

In [None]:
def flow_simulator(architecture, mapping, problem):
    """
    This function is the data flow simulator
    The steps are commented throughout the lines of the function
    """
    # getting the permutation orders for the specific problem
    main_perm = mapping['mapping'][0]['permutation']
    main_perm = [char for char in reversed(main_perm)]
    buffer_perm = mapping['mapping'][1]['permutation']
    buffer_perm = [char for char in reversed(buffer_perm)]
    # getting the factors for the specific problem
    main_factors = mapping['mapping'][0]['factors'].split()
    main_factors = {item[0]: int(item[2:]) for item in main_factors}
    buff_factors = mapping['mapping'][1]['factors'].split()
    buff_factors = {item[0]: int(item[2:]) for item in buff_factors}
    # creating the memory/computation units
    main = main_memory(architecture)
    buff = buffer(architecture,prob,main)
    macc = mac(architecture)
    # starting the data_flow simulation
    #checking if there is enough space in main_memory
    total_required_memory = sum((problem['problem']['instance']).values())
    main.fill(total_required_memory)
    # for loops of the data projection/movement
    # filling the buffer while performing MAC
    for A in range(main_factors[main_perm[0]]):
        for B in range(main_factors[main_perm[1]]):
            main.read()
            # to read data, two space dimensions are required
            buff.write(main_perm[0],A,main_perm[1],B)
            for C in range(main_factors[main_perm[2]]):
                main.read()
                main.read()
                buff.write(main_perm[1],B,main_perm[2],C)
                buff.write(main_perm[2],C,main_perm[0],A)
                main_dimensions = [A,B,C]
                # moving on to reading from buffer and performing MAC
                for a in range(buff_factors[buffer_perm[0]]):
                    for b in range(buff_factors[buffer_perm[1]]):
                        buff.read(buffer_perm[0],a*main_dimensions[main_perm.index(buffer_perm[0])],buffer_perm[1],b*main_dimensions[main_perm.index(buffer_perm[1])])
                        for c in range(buff_factors[buffer_perm[2]]):
                            buff.read(buffer_perm[1],b*main_dimensions[main_perm.index(buffer_perm[1])],buffer_perm[2],c*main_dimensions[main_perm.index(buffer_perm[2])])
                            buff.read(buffer_perm[2],c*main_dimensions[main_perm.index(buffer_perm[2])],buffer_perm[0],a*main_dimensions[main_perm.index(buffer_perm[0])])
                            macc.perform()
                            dimensions = [a,b,c]
                            # writing the last generated output to the buffer
                            # for this architecture the projection of the output was -[M] -[P]
                            # can be changed to match any other problem
                            buff.write('M',dimensions[buffer_perm.index('M')],'P', dimensions[buffer_perm.index('P')])
                            # no need writing to main memory. It is taken care of in FIFO_remove

    # writing all output related results to main_memory while
    # empyting the buffer by the end of the data_flow simulation
    while buff.space:
        buff.FIFO_remove()

    # generating the energy values
    stats_list = energy_calculator(main,buff,macc)
    keys = ['main_read', 'main_write', 'buffer_read', 'buffer_write', 'mac_use']
    stats={}
    for j in range(len(keys)):
        stats[keys[j]] = stats_list[j]


    return stats


# **Sample problem execution**

In [None]:
# these can be adjusted for any specific problem
arch = read_yaml("arch/Q1_arch.yaml")
prob = read_yaml("prob/Q1_prob.yaml")
map = read_yaml("map/Q1_os-tiled.map2.yaml")

In [None]:
# using the simulator to get energy_stats
stat = flow_simulator(arch, map, prob)

In [None]:
stat

{'main_read': 16281.6,
 'main_write': 74163.6,
 'buffer_read': 12465.6,
 'buffer_write': 6021.12,
 'mac_use': 6881.280000000001}