In [1]:
from sonos_accelerator import helpers

In [2]:
from sonos_accelerator.helpers.dataflow_plots import plotDataflow

In [3]:
import numpy as np
from sonos_accelerator.neural_networks.specifyModel_VGG import specifyModel_VGG
from sonos_accelerator.neural_networks.specifyModel_ResNet50 import specifyModel_ResNet50
from sonos_accelerator.neural_networks.specifyModel_ResNet50v15 import specifyModel_ResNet50v15
from sonos_accelerator.neural_networks.specifyModel_ResNet34 import specifyModel_ResNet34
from sonos_accelerator.neural_networks.specifyModel_custom import specifyModel_custom
from sonos_accelerator.buildAccelerator import allocateHardware, allocateTiles, calculateTileEnergyArea
from sonos_accelerator.dataflow import simulateDataflow
from sonos_accelerator.helpers.dataflow_plots import plotDataflow
from sonos_accelerator.helpers.eval_energy import eval_energy

def cli(model="VGG16"):
	########################################
	##
	##	Choose neural network model
	##
	########################################

	# Definitions:
	#	arrayDims : array dimension to be used in accelerator (not including negative array)
	#	Ncycles_target : target # compute cycles to finish a given conv layer, 
	#		used to set weight replication factor

	# model = "ResNet34"
	# model = "ResNet50"
	# model = "ResNet50v15"
	#model = "VGG16"
	# model = "custom"

	# Load previously saved dataflow results
	loadFromSaved = False
	Nimages = 1

	archParams = {}

	# Sliding window allocation order
	# Affects delay, latency, and buffering
	# 1: Nbx parallel to Ntx
	# 2: Nbx orthogonal to Ntx

	if model == "VGG16":
		NN_layers = specifyModel_VGG()
		arrayDims = np.array([1152,256])
		Ncycles_delay = 39

		Ncycles_target = 112

		if Ncycles_target == 112:
			archParams['SWorder'] = 2
			archParams['memorySizeKb'] = 64
			archParams['receiveBufferSizeKb'] = 8
			archParams['tileOutBufferSizeKb'] = 4
			archParams['maxMPoutputs'] = 1000
			archParams['Nbc_pool_factor'] = 50000
			archParams['A_router'] = 107526e-12
			archParams['P_router'] = 11.8e-3*2

	elif model == "ResNet50":
		NN_layers = specifyModel_ResNet50()
		arrayDims = np.array([1152,256])
		Ncycles_target = 28
		Ncycles_delay = 0
	elif model == "ResNet50v15":
		NN_layers = specifyModel_ResNet50v15()
		arrayDims = np.array([1152,256])
		Ncycles_target = 112
		archParams['SWorder'] = 1

		if Ncycles_target == 224:
			archParams['memorySizeKb'] = 64
			archParams['receiveBufferSizeKb'] = 6
			archParams['maxMPoutputs'] = int(np.ceil(2*384))
			archParams['mergeTileFactor'] = np.array([4,4,4,4,2,2,2,2,2,2,2,2,2,2,2,2])
			archParams['Nbc_pool_factor'] = 200000
			archParams['A_router'] = 64479.1e-12
			archParams['P_router'] = 11.8e-3
		elif Ncycles_target == 112:
			archParams['memorySizeKb'] = 64
			archParams['receiveBufferSizeKb'] = 4
			archParams['maxMPoutputs'] = 2*384
			archParams['mergeTileFactor'] = np.array([2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2])
			archParams['Nbc_pool_factor'] = 200000
			archParams['A_router'] = 64479.1e-12
			archParams['P_router'] = 11.8e-3
		elif Ncycles_target == 56:
			archParams['memorySizeKb'] = 64
			archParams['A_router'] = 107526e-12
			archParams['P_router'] = 11.8e-3*2
			archParams['receiveBufferSizeKb'] = 6
			archParams['maxMPoutputs'] = int(np.ceil(2*384))
			archParams['mergeTileFactor'] = np.array([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1])
			archParams['Nbc_pool_factor'] = 100000
		elif Ncycles_target == 52:
			archParams['memorySizeKb'] = 64
			archParams['A_router'] = 107526e-12
			archParams['P_router'] = 11.8e-3*2
			archParams['receiveBufferSizeKb'] = 4
			archParams['maxMPoutputs'] = int(np.ceil(2*384))
			archParams['mergeTileFactor'] = np.array([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1])
			archParams['Nbc_pool_factor'] = 100000
		elif Ncycles_target == 28:
			archParams['memorySizeKb'] = 64
			archParams['receiveBufferSizeKb'] = 6.125
			archParams['maxMPoutputs'] = int(np.ceil(1.5*384))
			archParams['mergeTileFactor'] = np.array([1,1,1,1,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,1,1,1])
			archParams['Nbc_pool_factor'] = 100000
			archParams['A_router'] = 107526e-12
			archParams['P_router'] = 11.8e-3*2

		Ncycles_delay = 105 # 39 if no shift reg, 34 with shift reg
	elif model == "ResNet34":
		NN_layers = specifyModel_ResNet34()
		arrayDims = np.array([1152,256])
		Ncycles_delay = 39

		Ncycles_target = 110

		archParams['SWorder'] = 1
		archParams['memorySizeKb'] = 64
		archParams['receiveBufferSizeKb'] = 2
		archParams['maxMPoutputs'] = 2*384
		archParams['mergeTileFactor'] = np.array([2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2])
		archParams['Nbc_pool_factor'] = 200000
		archParams['A_router'] = 64479.1e-12
		archParams['P_router'] = 11.8e-3

	elif model == "custom":
		NN_layers = specifyModel_custom()
		arrayDims = np.array([1152,256])
		Ncycles_target = 112
		# For this network,
		# SWorder = 2 is faster (126 cycles) but less efficient (59.1 TOPS/W)
		# SWorder = 1 is slower (172 cycles) but more efficient ()
		archParams['SWorder'] = 1
		archParams['memorySizeKb'] = 64
		archParams['receiveBufferSizeKb'] = 6
		archParams['tileOutBufferSizeKb'] = 1
		archParams['maxMPoutputs'] = 1000
		archParams['Nbc_pool_factor'] = 200000
		archParams['A_router'] = 107526e-12
		archParams['P_router'] = 11.8e-3*2
		Ncycles_delay = 0
	else:
		raise ValueError("This neural network is not yet supported")

	########################################
	##
	##	Set architecture parameters
	##
	########################################

	# Create architecture parameter object
	# Parameters
	#	heterogeneousTiling	: if True (deprecated), four different array sizes are used, set in allocateHardware
	#	NoutputsTile		: number of array output values per tile; 1024 recommended
	#							may be greater than the number of activations produced per tile, if partial sums are added in-tile
	#	Nbits 				: number of activation bits, same as ADC resolution						
	#	buffersizeKb 		: capacity of tile cache in Kbytes
	#	t_clk 				: clock period
	#	t_compCycle 		: length of compute cycle, i.e. pipeline stage
	#	I_read 				: maximum read current
	#	Vdd_digital 		: digital supply
	#	Vdd_analog 			: analog supply
	#	Vref 				: DAC reference voltage
	archParams['model'] = model
	archParams['ArrayDims'] = arrayDims
	archParams['NoutputsTile'] = 1024
	archParams['heterogeneousTiling'] = False
	archParams['Ncycles_target'] = Ncycles_target
	archParams['Nbits'] = 8
	archParams['Nbanks'] = 32
	archParams['Ncycles_machine'] = 295
	archParams['digital_bias'] = True
	archParams['t_clk'] = 1e-9
	archParams['I_read'] = 3200e-9
	archParams['Vdd_digital'] = 1.1
	archParams['Vdd_analog'] = 2.5
	archParams['Vref'] = 1.0 # ADC reference voltage

	archParams['CoreOutKb'] = 1.0

	# Process
	archParams['A_lowV'] = 0.15e-12
	archParams['A_highV'] = 0.653e-12

	# Area inflation factors:
	# 1) area_fraction_wiring is % of area dedicated to wiring/layout overhead in certain blocks
	#	Applied everywhere except SONOS array and SRAM
	# 2) area_fraction_control is % of tile area dedicated to control unit and instruction memory
	archParams['area_fraction_wiring'] = 0.2500
	archParams['area_fraction_control'] = 0.0500

	# Data widths
	archParams['NcoreOut_cycle'] = 8 # Number of outputs to send out of core each cycle
	archParams['Nwrite_inputReg'] = 8 # Number of values written to input buffer per cycle
	archParams['Nread_outputReg'] = 16 # Number of values read from output buffer per cycle
	archParams['Nread_tileOut'] = 16 # Number of values to send out of tile per cycle
	archParams['Nports_receiveBuffer'] = 16 # Number of values to read into the tile per cycle

	# Other settings
	archParams['useShiftRegisters'] = True
	archParams['weightReorder'] = True
	archParams['Nimages'] = Nimages
	archParams['Ncycles_delay'] = Ncycles_delay
		
	# Dataflow output file name
	if Nimages == 1:
		archName = model+"_"+str(archParams['ArrayDims'][0])+"x"+str(archParams['ArrayDims'][1])+"_"+str(Ncycles_target)+"cycles.npz"
	else:
		archName = model+"_"+str(archParams['ArrayDims'][0])+"x"+str(archParams['ArrayDims'][1])+"_"+str(Ncycles_target)+"cycles_"+str(Nimages)+"images.npz"


	########################################
	##
	##	Generate accelerator
	##
	########################################

	# Set overall hardware parameters
	NN_layers, layerParams, archParams = allocateHardware(NN_layers,archParams)

	# Allocate tiles  to layers
	NN_layers, layerParams, tiles = allocateTiles(NN_layers,layerParams,archParams)

	# Calculate the MVM energy consumption and area of the tiles
	layerParams, archParams, area_layers = calculateTileEnergyArea(layerParams,NN_layers,archParams)

	########################################`
	##
	##	Simulate neural network dataflow
	##
	########################################

	if loadFromSaved:
		# Load previously saved outputs
		outputs = np.load("./dataflow_outputs/"+archName,allow_pickle=True)
		activity = outputs["activity"]
		processing = outputs["processing"]
		buffersizes = outputs["buffersizes"]
		traffic = outputs["traffic"]
		energies = outputs["energies"]
		speed = outputs["speed"]

	else:
		# Simulate data flow
		activity, processing, buffersizes, traffic, energies, speed = simulateDataflow(NN_layers,layerParams,tiles,archParams)
		np.savez("./dataflow_outputs/"+archName,activity=activity,buffersizes=buffersizes,traffic=traffic,energies=energies,processing=processing,speed=speed)

	# Plot dataflow
	eval_energy(NN_layers,layerParams,archParams,energies,speed[1],Nimages=Nimages,concise=False)
	plotDataflow(activity,processing,buffersizes,traffic,speed,layerParams,NN_layers,Nimages,saveFigs=True,concise=True)
	return layerParams, archParams, area_layers




In [4]:
cli()

Layer name:	array:	tile:	all:	Nbc:


Compilation is falling back to object mode WITH looplifting enabled because Function "findBlock" failed type inference due to: [1mCan't unify return type from the following types: Literal[bool](False), UniTuple(int64 x 2)
[1mReturn of: IR name '$62return_value.6', type 'UniTuple(int64 x 2)', location: [1m
File "sonos_accelerator/helpers/mathematical.py", line 16:[0m
[1mdef findBlock(N):
    <source elided>
		F1 -= 1
[1m	return F1, intDiv(N,F1)
[0m [1m^[0m[0m[0m
[1mReturn of: IR name '$12return_value.1', type 'Literal[bool](False)', location: [1m
File "sonos_accelerator/helpers/mathematical.py", line 12:[0m
[1mdef findBlock(N):
    <source elided>
	if N < 1:
[1m		return False
[0m  [1m^[0m[0m[0m[0m[0m
  @jit
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "findBlock" failed type inference due to: [1m[1mCannot determine Numba type of <class 'numba.core.dispatcher.LiftedLoop'>[0m
[1m
File "sonos_accelerator/helpers/mathemat

Conv1:		4, 4	28, 16	1
Conv2:	2, 2	4, 4	28, 16	1
Conv3:		2, 4	14, 8	1
Conv4:		2, 2	14, 8	1
Conv5:		2, 2	14, 2	1
Conv6:		1, 2	7, 4	1
Conv7:		1, 2	7, 4	1
Conv8:		1, 1	7, 1	1
Conv9:		1, 1	7, 1	2
Conv10:		1, 1	7, 1	2
Conv11:		1, 1	2, 1	2
Conv12:		1, 1	2, 1	2
Conv13:		1, 1	2, 1	2
----
Effective tile area in cmesh: 2.316 mm^2
Tile area: 1.844 mm^2
  Cores + ramp: 0.731 mm^2 (39.645%)
  Core: 0.727 mm^2 (39.406%)
     per core: 0.182mm^2
  RAM: 0.181 mm^2 (9.813%)
  Buffers: 0.671 mm^2 (36.372%)
     Receive FIFOs: 0.213 mm^2
     MVMin buffer: 0.143 mm^2
     ALUin buffer: 0.158 mm^2
     TileOut buffer: 0.157 mm^2
  Ramp generator: 0.004 mm^2 (0.238%)
  ALU: 0.169 mm^2 (9.171%)
     Adder1: 0.0020 mm^2
     Adder2: 0.0012 mm^2
     Bias Adder: 0.0232 mm^2
     ReLU: 0.0115 mm^2
     Range converter: 0.0791 mm^2
     MaxPool: 0.0033 mm^2
     AvgPool: 0.0064 mm^2
  Control unit: 0.0922 mm^2 (5.000%)
Total area: 769.030 mm^2
  0: Conv1 (28 tiles), MVM: 54 x 64, area: 64.858 mm^2 (8.434%), util

  print("   Conv1 tiles: {:.3f}".format(100*util_devices_breakdown[0]/devices_breakdown[0])+'%')


Cycle 0, started 1 layers, finished 0 layers
Cycle 1, started 1 layers, finished 0 layers
Cycle 2, started 1 layers, finished 0 layers
Cycle 3, started 1 layers, finished 0 layers
Cycle 4, started 3 layers, finished 0 layers
Cycle 5, started 3 layers, finished 0 layers
Cycle 6, started 3 layers, finished 0 layers
Cycle 7, started 3 layers, finished 0 layers
Cycle 8, started 3 layers, finished 0 layers
Cycle 9, started 4 layers, finished 0 layers
Cycle 10, started 4 layers, finished 0 layers
Cycle 11, started 4 layers, finished 0 layers
Cycle 12, started 4 layers, finished 0 layers
Cycle 13, started 4 layers, finished 0 layers
Cycle 14, started 6 layers, finished 0 layers
Cycle 15, started 6 layers, finished 0 layers
Cycle 16, started 6 layers, finished 0 layers
Cycle 17, started 6 layers, finished 0 layers
Cycle 18, started 6 layers, finished 0 layers
Cycle 19, started 7 layers, finished 0 layers
Cycle 20, started 7 layers, finished 0 layers
Cycle 21, started 7 layers, finished 0 layer

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


FileNotFoundError: [Errno 2] No such file or directory: './dataflow_outputs/VGG16_1152x256_112cycles.npz'