# Imports

In [2]:
import pandas
#import ConfigParser
import os
import numpy as np
%load_ext autoreload
%autoreload 2
from bitfusion.graph_plot.barchart import BarChart

%matplotlib inline
import matplotlib

import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.insert(0, '../dnnweaver2')
print(sys.version_info)
import dnnweaver2

import bitfusion.src.benchmarks.benchmarks as benchmarks
from bitfusion.src.simulator.stats import Stats
from bitfusion.src.simulator.simulator import Simulator
from bitfusion.src.sweep.sweep import SimulatorSweep, check_pandas_or_run
from bitfusion.src.utils.utils import *
from bitfusion.src.optimizer.optimizer import optimize_for_order, get_stats_fast



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
sys.version_info(major=3, minor=6, micro=2, releaselevel='final', serial=0)


ModuleNotFoundError: No module named 'layer'

## Constants
### We use a batch size of 16

In [2]:
batch_size = 16

results_dir = './results'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

#  if last result exists, the simulator would no generate new results
result_file = "bitfusion-abs-sim-sweep.csv"
if os.path.exists(os.path.join(results_dir, result_file)):
    os.remove(os.path.join(results_dir, result_file))

fig_dir = './fig'
if not os.path.exists(fig_dir):
    os.makedirs(fig_dir)

### Create a simulator object using the configuration from BitFusion-Eyeriss (bf_e_conf.ini)

In [3]:
# BitFusion configuration file
config_file = 'bf_e_conf.ini'

# Create simulator object
verbose = False
bf_e_sim = Simulator(config_file, verbose)
bf_e_energy_costs = bf_e_sim.get_energy_cost()
print(bf_e_sim)

energy_tuple = bf_e_energy_costs
print('')
print('*'*50)
print(energy_tuple)

Can't find binary file /workspace/git/bitfusion/bitfusion/src/simulator/../../sram/cacti/cacti. Please clone and compile cacti first
Simulator object
	Max supported precision: 8
	Min supported precision: 2
	Systolic array size: 16 -inputs x 32 -outputs
	Wbuf size: 65,536 Bytes
	Ibuf size: 32,768 Bytes
	Obuf size: 16,384 Bytes
Double buffering enabled. Sizes of SRAM are halved

**************************************************
Energy costs for BitFusion
Core dynamic energy : 235.104 pJ/cycle (for entire systolic array)
WBUF Read energy    : 0.021 pJ/bit
WBUF Write energy   : 0.029 pJ/bit
IBUF Read energy    : 0.059 pJ/bit
IBUF Write energy   : 0.114 pJ/bit
OBUF Read energy    : 0.033 pJ/bit
OBUF Write energy   : 0.073 pJ/bit



### Generate BitFusion numbers

In [4]:
sim_sweep_columns = ['N', 'M',
        'Max Precision (bits)', 'Min Precision (bits)',
        'Network', 'Layer',
        'Cycles', 'Memory wait cycles',
        'WBUF Read', 'WBUF Write',
        'OBUF Read', 'OBUF Write',
        'IBUF Read', 'IBUF Write',
        'DRAM Read', 'DRAM Write',
        'Bandwidth (bits/cycle)',
        'WBUF Size (bits)', 'OBUF Size (bits)', 'IBUF Size (bits)',
        'Batch size']

bf_e_sim_sweep_csv = os.path.join(results_dir, result_file)
if os.path.exists(bf_e_sim_sweep_csv):
    bf_e_sim_sweep_df = pandas.read_csv(bf_e_sim_sweep_csv)
else:
    bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
print('Got BitFusion Eyeriss, Numbers')

bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df,
        bf_e_sim_sweep_csv, batch_size=batch_size)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
area_stats = bf_e_sim.get_area()

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: AlexNet
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 2
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 16
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 192


Got BitFusion Eyeriss, Numbers
Can't find binary file /workspace/git/bitfusion/bitfusion/src/simulator/../../sram/cacti/cacti. Please clone and compile cacti first


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: SVHN
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 2
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 16
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 192
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: CIFAR10
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 2
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 16
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 192
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: LeNet-5
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min

### Get Eyeriss numbers

In [5]:
def get_eyeriss_energy(df):
    eyeriss_energy_per_mac = 16 * 0.2 * 1.e-3 #energy in nJ
    eyeriss_energy_alu = float(df['ALU'])
    eyeriss_energy_dram = float(df['DRAM']) * 0.15 #Scaling due to technology
    eyeriss_energy_buffer = float(df['Buffer'])
    eyeriss_energy_array = float(df['Array'])
    eyeriss_energy_rf = float(df['RF'])
    eyeriss_energy = eyeriss_energy_alu + eyeriss_energy_dram + eyeriss_energy_buffer + eyeriss_energy_array + eyeriss_energy_rf
    eyeriss_energy *= eyeriss_energy_per_mac
    return eyeriss_energy

def get_eyeriss_energy_breakdown(df):
    eyeriss_energy_per_mac = 16 * 0.2 * 1.e-3 #energy in nJ
    eyeriss_energy_alu = float(df['ALU'])
    eyeriss_energy_dram = float(df['DRAM'])
    eyeriss_energy_buffer = float(df['Buffer'])
    eyeriss_energy_array = float(df['Array'])
    eyeriss_energy_rf = float(df['RF'])
    eyeriss_energy = [eyeriss_energy_alu+eyeriss_energy_array, eyeriss_energy_buffer, eyeriss_energy_rf, eyeriss_energy_dram]
    eyeriss_energy = [x * eyeriss_energy_per_mac for x in eyeriss_energy]
    return eyeriss_energy

def df_to_stats(df):
    stats = Stats()
    stats.total_cycles = float(df['Cycles'])
    stats.mem_stall_cycles = float(df['Memory wait cycles'])
    stats.reads['act'] = float(df['IBUF Read'])
    stats.reads['out'] = float(df['OBUF Read'])
    stats.reads['wgt'] = float(df['WBUF Read'])
    stats.reads['dram'] = float(df['DRAM Read'])
    stats.writes['act'] = float(df['IBUF Write'])
    stats.writes['out'] = float(df['OBUF Write'])
    stats.writes['wgt'] = float(df['WBUF Write'])
    stats.writes['dram'] = float(df['DRAM Write'])
    return stats

In [6]:
# Eyeriss-Simulator
eyeriss_data = pandas.read_csv(os.path.join(results_dir, 'eyeriss_results.csv'))
eyeriss_data_bench = eyeriss_data.groupby('Benchmark', as_index=False).agg(np.sum)
eyeriss_data_bench['Platform'] = 'Eyeriss (16-bit)'

print('BitFusion-Eyeriss comparison')
eyeriss_area = 3.5*3.5*45*45/65./65.
print('Area budget = {}'.format(eyeriss_area))


print(area_stats)
if abs(sum(area_stats)-eyeriss_area)/eyeriss_area > 0.1:
    print('Warning: BitFusion Area is outside 10% of eyeriss')
print('total_area = {}, budget = {}'.format(sum(area_stats), eyeriss_area))
bf_e_area = sum(area_stats)

baseline_data = []
for bench in benchmarks.benchlist:
    lookup_dict = {'Benchmark': bench}

    #eyeriss_cycles = float(lookup_pandas_dataframe(eyeriss_data_bench, lookup_dict)['time(ms)'])
    #eyeriss_time = eyeriss_cycles / 500.e3 / 16
    #eyeriss_energy = get_eyeriss_energy(lookup_pandas_dataframe(eyeriss_data_bench, lookup_dict))
    #eyeriss_power = eyeriss_energy / eyeriss_time * 1.e-9

    #eyeriss_speedup = eyeriss_time / eyeriss_time
    #eyeriss_energy_efficiency = eyeriss_energy / eyeriss_energy

    #eyeriss_ppa = eyeriss_speedup / eyeriss_area / (eyeriss_speedup / eyeriss_area)
    #eyeriss_ppw = eyeriss_speedup / eyeriss_power / (eyeriss_speedup / eyeriss_power)
    
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == bench])
    bf_e_cycles = bf_e_stats.total_cycles * (batch_size / 16.)
    bf_e_time = bf_e_cycles / 500.e3 / 16
    bf_e_energy = bf_e_stats.get_energy(bf_e_sim.get_energy_cost()) * (batch_size / 16.)
    bf_e_power = bf_e_energy / bf_e_time * 1.e-9

    #bf_e_speedup = eyeriss_time / bf_e_time
    #bf_e_energy_efficiency = eyeriss_energy / bf_e_energy

    #bf_e_ppa = bf_e_speedup / bf_e_area / (eyeriss_speedup / eyeriss_area)
    #bf_e_ppw = bf_e_speedup / bf_e_power / (eyeriss_speedup / eyeriss_power)

    #baseline_data.append(['Performance', bench, bf_e_speedup])
    #baseline_data.append(['Energy reduction', bench, bf_e_energy_efficiency])
    #baseline_data.append(['Performance-per-Watt', bench, bf_e_ppw])
    #baseline_data.append(['Performance-per-Area', bench, bf_e_ppa])
    
    print('*'*50)
    print('Benchmark: {}'.format(bench))
    #print('Eyeriss time: {} ms'.format(eyeriss_time))
    print('BitFusion time: {} ms'.format(bf_e_time))    
    #print('Eyeriss power: {} mWatt'.format(eyeriss_power*1.e3*16))
    print('BitFusion power: {} mWatt'.format(bf_e_power*1.e3*16))
    print('*'*50)
    
eyeriss_comparison_df = pandas.DataFrame(baseline_data, columns=['Metric', 'Network', 'Value'])

BitFusion-Eyeriss comparison
Area budget = 5.87130177515
(0.8880907385600001, 0.76747776, 0.242112, 0.24330592)
total_area = 2.14098641856, budget = 5.87130177515
**************************************************
Benchmark: AlexNet
BitFusion time: 4.708311375 ms
BitFusion power: 448.386241718 mWatt
**************************************************
**************************************************
Benchmark: SVHN
BitFusion time: 0.164690375 ms
BitFusion power: 410.014092738 mWatt
**************************************************
**************************************************
Benchmark: CIFAR10
BitFusion time: 0.411713 ms
BitFusion power: 450.464576981 mWatt
**************************************************
**************************************************
Benchmark: LeNet-5
BitFusion time: 0.051294 ms
BitFusion power: 356.024253811 mWatt
**************************************************
**************************************************
Benchmark: VGG-7
BitFusion time: 0.3528