In [None]:
import numpy as np
from nntool.api import NNGraph
from nntool.api.utils import quantization_options, model_settings

In [None]:

def representative_dataset():
    yield np.random.uniform(-1, 1, (3, 32, 32))

In [None]:
G = NNGraph.load_graph("checkpoint/shufflenetv2.onnx", load_quantization=False)
G.adjust_order()
G.fusions("scaled_match_group")

In [None]:
max_activ_size, total_params = G.total_memory_usage
ops = G.total_ops

print(f"{G.name}:")
print(f"\tMax Active Size:\t{max_activ_size} elements")
print(f"\tTotal # Parameters:\t{total_params} elements")
print(f"\tTotal # Operations:\t{ops / 1e6:.2f} MOps")
# G.draw()

In [None]:
print("Calibrating...")
stats = G.collect_statistics(representative_dataset())

In [None]:
# G.quantize(graph_options=quantization_options(scheme="FLOAT", float_type="bfloat16"))
G.quantize(
    statistics=stats,
    # graph_options={
    #     'use_ne16': False,
    #     'hwc': True
    # },
    graph_options=quantization_options(use_ne16=True, hwc=True)
)

In [None]:

G = NNGraph.load_graph("checkpoint/shufflenetv2.onnx", load_quantization=False)
#G.draw(filepath="draw", view=True)
max_activ_size, total_params = G.total_memory_usage
ops = G.total_ops

print(f"{G.name}:")
print(f"\tMax Active Size:\t{max_activ_size} elements")
print(f"\tTotal # Parameters:\t{total_params} elements")
print(f"\tTotal # Operations:\t{ops / 1e6:.2f} MOps")
G.adjust_order()
G.fusions('scaled_match_group')
# G.draw()

print("Calibrating...")
stats = G.collect_statistics(representative_dataset())
# G.quantize(graph_options=quantization_options(scheme="FLOAT", float_type="bfloat16"))
G.quantize(
    statistics=stats,
    graph_options={
        'use_ne16': False,
        'hwc': True
    },
)

In [None]:
G.draw()

In [None]:
test_input = np.random.uniform(-1, 1, (32, 32, 3))
fout = G.execute([test_input])
qout = G.execute([test_input], dequantize=True, quantize=True)

In [None]:
# G.dict_qsnrs(fout, qout)

In [None]:

# Autotiler options: make the autotiler allocate the input of the network and reuse that space after the first layer
# more L2 for the rest of the network
G[0].at_options.allocate = 1
test_qinput = np.random.uniform(-1, 1, (32, 32, 3))
# test_qinput = np.random.uniform(-1, 1, (3, 32, 32))
qout = G.execute([test_qinput], quantize=True, dequantize=False)


In [None]:
# qout

In [None]:
res = G.execute_on_target(
    platform="gvsoc",
    directory="test_run2",
    input_tensors=qout[0],
    check_on_target=True,
    print_output=True,
    do_clean=False,
    # output_tensors=4,
    settings=model_settings(
        l1_size=128000,
        l2_size=1200000, 
        tensor_directory='./tensors',
        graph_const_exec_from_flash=True,
        graph_l1_promotion=2,
        # graph_dump_tensor=4,
        # graph_dump_tensor_to_file=True
    ),
    at_loglevel=2,
)


In [None]:
G.dict_qsnrs(qout, res.output_tensors)

In [None]:
G.draw(quant_labels=True)

In [None]:
import matplotlib.pyplot as plt
%matplotlib widget
res.plot_memory_boxes()

In [None]:
res.plot_performance()

In [None]:
res.pieplot_perf_per_layer()

In [10]:
res.at_graph_struct

{0: {'channel': 0,
  'channel_depth': 0,
  'type': 'GraphEntry',
  'name': '__GraphEntry__',
  'Total Memory': 0,
  'L3 Moves': 0,
  'L2 Moves': 0,
  'Move overhead (L2/Total)': 1.0,
  'Operations': 0,
  'L3 Move/Oper': 0.0,
  'L2 Move/Oper': 0.0,
  'Alive Size': 3072},
 1: {'channel': 17,
  'channel_depth': 20,
  'type': 'UKer',
  'name': 'S3__conv1_Conv_fusion',
  'Total Memory': 28453,
  'L3 Moves': 0,
  'L2 Moves': 28453,
  'Move overhead (L2/Total)': 1.0,
  'Operations': 663552,
  'L3 Move/Oper': 0.0,
  'L2 Move/Oper': 0.04288,
  'Alive Size': 28453},
 2: {'channel': 16,
  'channel_depth': 4,
  'type': 'UKer',
  'name': 'S4__conv1_Conv_fusion_qout0',
  'Total Memory': 49160,
  'L3 Moves': 0,
  'L2 Moves': 49160,
  'Move overhead (L2/Total)': 1.0,
  'Operations': 24576,
  'L3 Move/Oper': 0.0,
  'L2 Move/Oper': 2.000325,
  'Alive Size': 49160},
 3: {'channel': 16,
  'channel_depth': 4,
  'type': 'UKer',
  'name': 'S7__layer1_layer1_0_conv1_Conv',
  'Total Memory': 31093,
  'L3 Moves